In [None]:
# Codigo para gerar os dados mocados

import json
import random
import datetime
import os
import math

output_path = "./data/turtle_sensors.jsonl"
os.makedirs("./data", exist_ok=True)

NUM_ROWS = 10_000

def generate_temperature(t):
    minute_of_day = t % 1440  
    daily_cycle = 0.5 * math.sin(2 * math.pi * (minute_of_day / 1440))

    long_term_rise = 4 * (t / NUM_ROWS)

    base_temp = random.uniform(17, 22)  # base for nights
    temp = base_temp + daily_cycle * 10 + long_term_rise

    return round(min(max(temp, 17), 34), 2)  # clamp between 17 and 34


def generate_movement(t):
    if t < NUM_ROWS * 0.75:
        movement_intensity = 0.02  # very calm
    elif t < NUM_ROWS * 0.9:
        movement_intensity = 0.08  # increasing
    else:
        movement_intensity = 0.4  # babies starting to break shells

    ax = random.uniform(-movement_intensity, movement_intensity)
    ay = random.uniform(-movement_intensity, movement_intensity)
    az = 1 + random.uniform(-movement_intensity, movement_intensity)  # gravity reference

    gx = random.uniform(-movement_intensity, movement_intensity)
    gy = random.uniform(-movement_intensity, movement_intensity)
    gz = random.uniform(-movement_intensity, movement_intensity)

    return (
        round(ax, 4), round(ay, 4), round(az, 4),
        round(gx, 4), round(gy, 4), round(gz, 4)
    )


with open(output_path, "w") as f:
    start_time = datetime.datetime.now()

    for t in range(NUM_ROWS):
        temp = generate_temperature(t)
        ax, ay, az, gx, gy, gz = generate_movement(t)

        reading = {
            "timestamp": (start_time + datetime.timedelta(seconds=t)).isoformat(),
            "temperature_c": temp,
            "accelerometer": {"ax": ax, "ay": ay, "az": az},
            "gyroscope": {"gx": gx, "gy": gy, "gz": gz}
        }

        f.write(json.dumps(reading) + "\n")

print(f"Generated {NUM_ROWS} sensor readings in:", output_path)

In [None]:
# Carregamento do corpus

import json
from sentence_transformers import SentenceTransformer
import dspy

with open("./data/turtle_sensors.jsonl") as f:
    corpus = []
    for line in f:
        obj = json.loads(line)
        
        text = (
            f"timestamp: {obj['timestamp']}. "
            f"temperature: {obj['temperature_c']}C. "
            f"accelerometer ax={obj['accelerometer']['ax']}, "
            f"ay={obj['accelerometer']['ay']}, "
            f"az={obj['accelerometer']['az']}. "
            f"gyroscope gx={obj['gyroscope']['gx']}, "
            f"gy={obj['gyroscope']['gy']}, "
            f"gz={obj['gyroscope']['gz']}."
        )
        corpus.append(text)

print("Loaded", len(corpus), "sensor readings.")

In [None]:

import dspy

# criação do embedder + retriever
model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1", device="cpu")
embedder = dspy.Embedder(model.encode)
search = dspy.retrievers.Embeddings(embedder=embedder, corpus=corpus, k=10)


In [None]:
# Configurando a LM

# %pip install python-dotenv
from dotenv import load_dotenv
import os

gemini_api_key = os.getenv("GEMINI_API_KEY")

print("Loaded:", gemini_api_key[:5] + "..." if gemini_api_key else "Not found")

lm = dspy.LM("gemini/gemini-2.5-flash", api_key=gemini_api_key)
dspy.configure(lm=lm)

In [None]:
class SensorRAG(dspy.Module):
    def __init__(self):
        self.answer = dspy.ChainOfThought(
            "context, question -> response"
        )

    def forward(self, question):
        retrieved = search(question).passages
        return self.answer(context=retrieved, question=question)

sensor_rag = SensorRAG()

In [None]:
# 1a Pergunta
sensor_rag(question="Qual foi a temperatura média?")

In [None]:
# 2a Pergunta
sensor_rag(question="Quando ocorreram picos de atividade do acelerômetro?")

In [None]:
# 3a Pergunta
sensor_rag(question="Qual foi a temperatura máxima registrada?")