In [8]:
import os
from pathlib import Path

import pandas as pd
import plotly.express as px

# Configuration
DATA_ROOT = Path("../../Data/Experiment_Data/2_PreprocessDataset")
ACTIVITY_ORDER = [
    "Other", "Shower", "Tooth_brushing",
    "Washing_hands", "Wiping", "Vacuum_Cleaner"
]

# Utility Functions
def read_dataframe(folder: Path, filename: str) -> pd.DataFrame:
    """
    Load a CSV file into a pandas DataFrame.
    """
    return pd.read_csv(folder / filename)

def make_datetime(df: pd.DataFrame) -> pd.Series:
    """
    Combine separate Year/Month/Day/Hour/Min/Sec columns into a single datetime series.
    """
    return pd.to_datetime({
        "year": df["Year"].astype(int),
        "month": df["Month"].astype(int),
        "day": df["Day"].astype(int),
        "hour": df["Hour"].astype(int),
        "minute": df["Min"].astype(int),
        "second": df["Sec"].astype(int),
    })

def extract_intervals_from_annotations(df: pd.DataFrame) -> pd.DataFrame:
    """
    From annotation DataFrame with 'Start'/'End' events, build activity intervals.
    """
    records = []
    current_start = None
    current_act = None

    for _, row in df.iterrows():
        if row.Event == "Start":
            current_start = row.elapsed_td
            current_act = row.Activity
        elif row.Event == "End" and current_act == row.Activity and current_start is not None:
            records.append({
                "Activity": current_act,
                "start_td": current_start,
                "end_td": row.elapsed_td
            })
            current_start = None
            current_act = None

    return pd.DataFrame(records)

def extract_intervals_from_predictions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Collapse consecutive identical predictions into intervals.
    """
    records = []
    prev_label = None
    prev_start = None

    for _, row in df.iterrows():
        label = row.Predict
        td = row.elapsed_td
        if prev_label is None:
            prev_label = label
            prev_start = td
        elif label != prev_label:
            records.append({
                "Activity": prev_label,
                "start_td": prev_start,
                "end_td": td
            })
            prev_label = label
            prev_start = td

    # Add last interval
    if prev_label is not None:
        records.append({
            "Activity": prev_label,
            "start_td": prev_start,
            "end_td": df.elapsed_td.iloc[-1]
        })

    return pd.DataFrame(records)

def fill_gaps(df_int: pd.DataFrame, full_start: pd.Timedelta, full_end: pd.Timedelta) -> pd.DataFrame:
    """
    Fill gaps between intervals with 'Other' activity.
    """
    filled = []
    prev_end = full_start

    for _, row in df_int.sort_values("start_td").iterrows():
        if row.start_td > prev_end:
            filled.append({
                "Activity": "Other",
                "start_td": prev_end,
                "end_td": row.start_td
            })
        filled.append(row.to_dict())
        prev_end = row.end_td

    if prev_end < full_end:
        filled.append({
            "Activity": "Other",
            "start_td": prev_end,
            "end_td": full_end
        })

    return pd.DataFrame(filled)

def add_wall_clock(df_int: pd.DataFrame, zero_dt: pd.Timestamp) -> pd.DataFrame:
    """
    Convert elapsed timedeltas to real datetimes by adding reference datetime.
    """
    df_int = df_int.copy()
    df_int["start_dt"] = zero_dt + df_int["start_td"]
    df_int["end_dt"] = zero_dt + df_int["end_td"]
    return df_int

def plot_timeline(df_int: pd.DataFrame, title: str) -> None:
    """
    Plot interactive timeline of activities using Plotly Express.
    """
    fig = px.timeline(
        df_int,
        x_start="start_dt",
        x_end="end_dt",
        y="Activity",
        color="Activity",
        category_orders={"Activity": ACTIVITY_ORDER}
    )

    min_dt = df_int["start_dt"].min()
    max_dt = df_int["end_dt"].max()

    fig.update_xaxes(
        type="date",
        range=[min_dt, max_dt],
        dtick=3600 * 1000,
        tickformat="%H:%M",
        title="Time of day (HH:MM)"
    )

    # Hourly ticks plus final
    tick_vals = []
    t = min_dt
    while t < max_dt:
        tick_vals.append(t)
        t += pd.Timedelta(hours=1)
    tick_vals.append(max_dt)

    fig.update_xaxes(
        tickmode="array",
        tickvals=tick_vals,
        ticktext=[t.strftime("%H:%M") for t in tick_vals]
    )

    fig.update_traces(
        hovertemplate=(
            "Activity: %{y}<br>"
            "Start: %{base|%Y-%m-%d %H:%M:%S}<br>"
            "End: %{x|%Y-%m-%d %H:%M:%S}<extra></extra>"
        )
    )

    fig.update_layout(title=title, yaxis_title=None)
    fig.show()

# Main execution
if __name__ == "__main__":
    participant = "201"
    folder = DATA_ROOT / participant

    # Process annotations
    df_anno = read_dataframe(folder, f"{participant}_Annotation_processed.csv")
    df_anno["datetime"] = make_datetime(df_anno)
    df_anno["elapsed_td"] = pd.to_timedelta(df_anno["Time"], unit="s")
    intervals_anno = extract_intervals_from_annotations(df_anno)

    # Determine full session range
    if "Session Start" in df_anno.Event.values:
        full_start = df_anno.loc[df_anno.Event == "Session Start", "elapsed_td"].iloc[0]
    else:
        full_start = intervals_anno["start_td"].min()
    if "Session Stop" in df_anno.Event.values:
        full_end = df_anno.loc[df_anno.Event == "Session Stop", "elapsed_td"].iloc[-1]
    else:
        full_end = intervals_anno["end_td"].max()

    # Fill gaps and convert to wall-clock time
    df_filled_anno = fill_gaps(intervals_anno, full_start, full_end)
    zero_dt = df_anno.loc[df_anno.Time == 0, "datetime"].iloc[0]
    df_filled_anno = add_wall_clock(df_filled_anno, zero_dt)
    plot_timeline(df_filled_anno, "Ground Truth Timeline")

    # Process predictions
    df_pred = read_dataframe(folder, f"{participant}_Predicted_Activity_processed.csv")
    df_pred["datetime"] = make_datetime(df_pred)
    df_pred["elapsed_td"] = pd.to_timedelta(df_pred["Time"], unit="s")
    intervals_pred = extract_intervals_from_predictions(df_pred)

    full_start_pred = pd.Timedelta(0)
    full_end_pred = df_pred["elapsed_td"].max()
    df_filled_pred = fill_gaps(intervals_pred, full_start_pred, full_end_pred)
    zero_dt_pred = df_pred.loc[df_pred.Time == 0, "datetime"].iloc[0]
    df_filled_pred = add_wall_clock(df_filled_pred, zero_dt_pred)
    plot_timeline(df_filled_pred, "Predicted Activity Timeline")
