# Manual Multi-Day CSV Loading and Analysis
Specify your list of CSV file paths (one per day) in `csv_paths`, then run all cells.

In [None]:
import pandas as pd
import ast
from pathlib import Path

# 1) Manually list your 5 daily CSV file paths here:
csv_paths = [
    Path(r"C:\store\git\km-stat-activity\data\real\day1.csv"),
    Path(r"C:\store\git\km-stat-activity\data\real\day2.csv"),
    Path(r"C:\store\git\km-stat-activity\data\real\day3.csv"),
    Path(r"C:\store\git\km-stat-activity\data\real\day4.csv"),
    Path(r"C:\store\git\km-stat-activity\data\real\day5.csv"),
]

# 2) Read, parse x/y lists and dates for each day
dfs = []
for path in csv_paths:
    df = pd.read_csv(
        path,
        converters={
            "x": lambda s: ast.literal_eval(s) if isinstance(s, str) else [],
            "y": lambda s: ast.literal_eval(s) if isinstance(s, str) else []
        },
        parse_dates=["start_date_time", "end_date_time"]
    )
    # Ensure a 'date' column exists
    if "date" not in df.columns:
        df["date"] = df["start_date_time"].dt.date
    else:
        df["date"] = pd.to_datetime(df["date"]).dt.date
    dfs.append(df)

# 3) Concatenate all days into one DataFrame
df_all = pd.concat(dfs, ignore_index=True)
print(f"Loaded and concatenated {len(dfs)} files. Total rows: {len(df_all)}")

In [None]:
# 4) Define time segments (08:30-10:00, 11:00-12:30, 13:30-15:30, 15:30-17:00, 17:00-17:30)
def time_segment(ts):
    h = ts.hour + ts.minute/60
    if 8.5  <= h < 10:    return "morning_start"
    if 11   <= h < 12.5:  return "pre_lunch"
    if 13.5 <= h < 15.5:  return "post_lunch"
    if 15.5 <= h < 17:    return "afternoon_peak"
    if 17   <= h < 17.5:  return "end_of_day"
    return "other"

df_all["segment"] = df_all["start_date_time"].map(time_segment)
print("Segments assigned. Unique segments:", df_all["segment"].unique())

In [None]:
# 5) Example: summarize avg_speed by date and segment for a single user
user_id = "013d5cac-f09d-48a5-bff1-00d81c91b017"  # Replace with desired profile_guid
df_user = df_all[df_all["profile_guid"] == user_id]
features = ["avg_speed", "avg_acceleration", "mouse_idle_ratio", "movement_entropy", "linearity"]
summary = df_user.groupby(["date", "segment"])[features].mean().reset_index()
display(summary)