# Load and Concatenate Multi-Day Data
This notebook shows how to load multiple CSV files (one per day),
concatenate them into a single DataFrame, and then apply the same time-segment analysis.

In [None]:
from pathlib import Path
import pandas as pd
import ast

# 1) Directory containing your daily CSVs
data_dir = Path(r"C:\store\git\km-stat-activity\data\real")

# 2) Gather all CSV files (adjust pattern if needed)
csv_files = sorted(data_dir.glob("*.csv"))
print("Found CSV files:", [f.name for f in csv_files])

In [None]:
# 3) Read and parse each CSV, adding DataFrame to list
dfs = []
for csv in csv_files:
    df_day = pd.read_csv(
        csv,
        converters={"x": lambda s: ast.literal_eval(s) if isinstance(s, str) else [], 
                    "y": lambda s: ast.literal_eval(s) if isinstance(s, str) else []},
        parse_dates=["start_date_time", "end_date_time"]
    )
    # If 'date' column is missing or as string, convert to datetime.date
    if 'date' in df_day.columns and df_day['date'].dtype == object:
        df_day['date'] = pd.to_datetime(df_day['date']).dt.date
    # Or infer date from filename: e.g. '2025-04-14_...'
    else:
        date_from_name = csv.stem.split('_')[0]
        df_day['date'] = pd.to_datetime(date_from_name).date()
    dfs.append(df_day)

# 4) Concatenate all days
df_all = pd.concat(dfs, ignore_index=True)
print(f"Combined DataFrame shape: {df_all.shape}")

In [None]:
# 5) Define time segments (as before)
def time_segment(ts):
    h = ts.hour + ts.minute/60
    if 8.5  <= h < 10:    return "morning_start"
    if 11   <= h < 12.5:  return "pre_lunch"
    if 13.5 <= h < 15.5:  return "post_lunch"
    if 15.5 <= h < 17:    return "afternoon_peak"
    if 17   <= h < 17.5:  return "end_of_day"
    return "other"

df_all['segment'] = df_all['start_date_time'].map(time_segment)
print("Segments assigned, unique:", df_all['segment'].unique())

In [None]:
# 6) Example: summarizing avg_speed for user over all days
user = "013d5cac-f09d-48a5-bff1-00d81c91b017"  # replace as needed
df_user = df_all[df_all['profile_guid'] == user]
summary_all = df_user.groupby(['date','segment'])['avg_speed'].mean().reset_index()
summary_all