In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

DATA_PATH = Path("../data/sample_data.csv")
df = pd.read_csv(DATA_PATH, parse_dates=["timestamp"])
df = df.sort_values("timestamp").reset_index(drop=True)
df.head()


In [None]:
df["hour"] = df["timestamp"].dt.hour

summary = df.groupby("hour").agg(
    activity_mean=("activity", "mean"),
    hr_mean=("hr", "mean"),
    sleep_rate=("sleep", "mean"),
).reset_index()

summary

plt.figure()
plt.plot(summary["hour"], summary["activity_mean"])
plt.xlabel("Hour of day")
plt.ylabel("Mean activity")
plt.title("Activity rhythm (synthetic data)")
plt.show()

plt.figure()
plt.plot(summary["hour"], summary["hr_mean"])
plt.xlabel("Hour of day")
plt.ylabel("Mean heart rate")
plt.title("Heart-rate rhythm (synthetic data)")
plt.show()


In [None]:
features = pd.DataFrame([{
    "mean_activity": df["activity"].mean(),
    "std_activity": df["activity"].std(),
    "mean_hr": df["hr"].mean(),
    "std_hr": df["hr"].std(),
    "sleep_fraction": df["sleep"].mean(),
    "day_night_activity_ratio": df.loc[df["hour"].between(8, 19), "activity"].mean()
                               / df.loc[df["hour"].between(0, 6), "activity"].mean()
}])

features
