In [2]:
import sys
from pathlib import Path

# Add project root to Python path
PROJECT_ROOT = Path.cwd().parents[0]
sys.path.insert(0, str(PROJECT_ROOT))

print("Added to sys.path:", PROJECT_ROOT)

Added to sys.path: /Users/croberts/practice-telemetry-ml


In [3]:
import pandas as pd

from src.config import RAW_TELEMETRY_PATH

In [4]:
df_raw = pd.read_parquet(RAW_TELEMETRY_PATH)
df_raw.head()

Unnamed: 0,device_id,timestamp,sensor_value,temp_c,battery_v,rssi,event_tag
0,D000,2025-12-11 21:32:00,9.833085,19.594316,3.908354,-55.495804,normal
1,D000,2025-12-11 21:47:00,11.005451,19.998744,3.911803,-56.698568,normal
2,D000,2025-12-11 22:02:00,11.217204,20.449461,3.901991,-55.531041,normal
3,D000,2025-12-11 22:17:00,9.57909,20.673206,3.898062,-54.816955,normal
4,D000,2025-12-11 22:32:00,10.063996,22.027144,3.903768,-52.33176,normal


In [5]:
df_raw.shape

(33667, 7)

In [6]:
df_raw.columns

Index(['device_id', 'timestamp', 'sensor_value', 'temp_c', 'battery_v', 'rssi',
       'event_tag'],
      dtype='object')

In [7]:
df = df_raw.copy()
df["timestamp"] = pd.to_datetime(df["timestamp"], errors = "coerce")

df = df.sort_values(["device_id", "timestamp"]).reset_index(drop=True)
df.head()

Unnamed: 0,device_id,timestamp,sensor_value,temp_c,battery_v,rssi,event_tag
0,D000,2025-12-11 21:32:00,9.833085,19.594316,3.908354,-55.495804,normal
1,D000,2025-12-11 21:47:00,11.005451,19.998744,3.911803,-56.698568,normal
2,D000,2025-12-11 22:02:00,11.217204,20.449461,3.901991,-55.531041,normal
3,D000,2025-12-11 22:17:00,9.57909,20.673206,3.898062,-54.816955,normal
4,D000,2025-12-11 22:32:00,10.063996,22.027144,3.903768,-52.33176,normal


In [8]:
d0 = df[df["device_id"] == df["device_id"].iloc[0]].copy()
d0[["device_id", "timestamp", "sensor_value", "temp_c", "rssi", "event_tag"]].head(10)

Unnamed: 0,device_id,timestamp,sensor_value,temp_c,rssi,event_tag
0,D000,2025-12-11 21:32:00,9.833085,19.594316,-55.495804,normal
1,D000,2025-12-11 21:47:00,11.005451,19.998744,-56.698568,normal
2,D000,2025-12-11 22:02:00,11.217204,20.449461,-55.531041,normal
3,D000,2025-12-11 22:17:00,9.57909,20.673206,-54.816955,normal
4,D000,2025-12-11 22:32:00,10.063996,22.027144,-52.33176,normal
5,D000,2025-12-11 22:47:00,11.015939,20.22063,-55.297635,normal
6,D000,2025-12-11 23:02:00,10.841355,22.603017,-53.80269,normal
7,D000,2025-12-11 23:17:00,11.110428,21.907694,-56.624801,normal
8,D000,2025-12-11 23:32:00,10.695249,22.801558,-53.283713,normal
9,D000,2025-12-11 23:47:00,11.81807,21.876261,-53.872809,normal


In [9]:
d0["timestamp"].is_monotonic_increasing

True

In [10]:
feature_cols = ["sensor_value", "temp_c", "rssi"]
lags = [1, 2, 4]

g = df.groupby("device_id", group_keys=False)

for col in feature_cols:
    for k in lags:
        df[f"{col}_lag_{k}"] = g[col].shift(k)

In [11]:
cols_to_view = ["timestamp", "sensor_value", "sensor_value_lag_1", "sensor_value_lag_2", "sensor_value_lag_4", "event_tag"]
d0 = df[df["device_id"] == df["device_id"].iloc[0]].copy()
d0[cols_to_view].head(12)

Unnamed: 0,timestamp,sensor_value,sensor_value_lag_1,sensor_value_lag_2,sensor_value_lag_4,event_tag
0,2025-12-11 21:32:00,9.833085,,,,normal
1,2025-12-11 21:47:00,11.005451,9.833085,,,normal
2,2025-12-11 22:02:00,11.217204,11.005451,9.833085,,normal
3,2025-12-11 22:17:00,9.57909,11.217204,11.005451,,normal
4,2025-12-11 22:32:00,10.063996,9.57909,11.217204,9.833085,normal
5,2025-12-11 22:47:00,11.015939,10.063996,9.57909,11.005451,normal
6,2025-12-11 23:02:00,10.841355,11.015939,10.063996,11.217204,normal
7,2025-12-11 23:17:00,11.110428,10.841355,11.015939,9.57909,normal
8,2025-12-11 23:32:00,10.695249,11.110428,10.841355,10.063996,normal
9,2025-12-11 23:47:00,11.81807,10.695249,11.110428,11.015939,normal


In [12]:
i = 10
d0.iloc[i][["sensor_value", "sensor_value_lag_1", "sensor_value_lag_2"]]

sensor_value          11.836893
sensor_value_lag_1     11.81807
sensor_value_lag_2    10.695249
Name: 10, dtype: object

In [13]:
w = 4  # 4 rows = 1 hour at 15-min cadence

for col in feature_cols:
    past = g[col].shift(1)  # <-- leakage prevention
    df[f"{col}_roll_mean_{w}"] = past.groupby(df["device_id"]).rolling(window=w, min_periods=w).mean().reset_index(level=0, drop=True)
    df[f"{col}_roll_std_{w}"]  = past.groupby(df["device_id"]).rolling(window=w, min_periods=w).std().reset_index(level=0, drop=True)

In [14]:
cols = ["timestamp", "sensor_value", "sensor_value_lag_1", f"sensor_value_roll_mean_{w}", f"sensor_value_roll_std_{w}", "event_tag"]
d0 = df[df["device_id"] == df["device_id"].iloc[0]].copy()
d0[cols].head(15)

Unnamed: 0,timestamp,sensor_value,sensor_value_lag_1,sensor_value_roll_mean_4,sensor_value_roll_std_4,event_tag
0,2025-12-11 21:32:00,9.833085,,,,normal
1,2025-12-11 21:47:00,11.005451,9.833085,,,normal
2,2025-12-11 22:02:00,11.217204,11.005451,,,normal
3,2025-12-11 22:17:00,9.57909,11.217204,,,normal
4,2025-12-11 22:32:00,10.063996,9.57909,10.408707,0.822471,normal
5,2025-12-11 22:47:00,11.015939,10.063996,10.466435,0.775356,normal
6,2025-12-11 23:02:00,10.841355,11.015939,10.469057,0.7778,normal
7,2025-12-11 23:17:00,11.110428,10.841355,10.375095,0.672925,normal
8,2025-12-11 23:32:00,10.695249,11.110428,10.75793,0.47586,normal
9,2025-12-11 23:47:00,11.81807,10.695249,10.915743,0.184474,normal


In [15]:
i = 10
prev4 = d0["sensor_value"].iloc[i-4:i]  # these are rows i-4, i-3, i-2, i-1
prev4.mean(), d0[f"sensor_value_roll_mean_{w}"].iloc[i]

(np.float64(11.116275547599239), np.float64(11.116275547599239))