In [1]:
import os
import sys
from pathlib import Path

# Find project root by walking up until we see a 'src' directory
project_root = Path().resolve()
while project_root != project_root.parent and not (project_root / "src").exists():
    project_root = project_root.parent

# Add project root to sys.path and set it as working directory
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
os.chdir(project_root)

print(f"Project root set to: {project_root}")


Project root set to: /Users/araj/Documents/Code/Machine Learning/semeval-task2/semeval-2-emotion-dynamics


In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

plt.style.use("ggplot")

from src.eval.analysis_tools import (
    compute_subtask2a_predictions,
    save_subtask2a_predictions,
    save_eval_summary,
)

preds_path = Path("reports/subtask2a_predictions.parquet")
summary_path = Path("reports/eval_summary.csv")

if preds_path.exists():
    print(f"Loading existing predictions from: {preds_path}")
    pred_df = pd.read_parquet(preds_path)
else:
    print("Predictions file not found; computing predictions...")
    pred_df, metrics = compute_subtask2a_predictions()
    save_subtask2a_predictions(pred_df, preds_path)
    save_eval_summary(metrics, summary_path)

print(f"Total prediction rows: {len(pred_df)}")
print(f"Unique users: {pred_df['user_id'].nunique()}")


Predictions file not found; computing predictions...
[subtask1] column 'valence' min=-2.0 max=2.0 (nan_count=0)
[subtask1] column 'arousal' min=0.0 max=2.0 (nan_count=0)
[subtask2a] column 'valence' min=-2.0 max=2.0 (nan_count=0)
[subtask2a] column 'arousal' min=0.0 max=2.0 (nan_count=0)
[subtask2a] column 'state_change_valence' min=-4.0 max=4.0 (nan_count=137)
[subtask2a] column 'state_change_arousal' min=-2.0 max=2.0 (nan_count=137)
[subtask2b] column 'valence' min=-2.0 max=2.0 (nan_count=0)
[subtask2b] column 'arousal' min=0.0 max=2.0 (nan_count=0)
[subtask2b] column 'disposition_change_valence' min=-3.666666666666667 max=2.916666666666667 (nan_count=0)
[subtask2b] column 'disposition_change_arousal' min=-2.0 max=2.0 (nan_count=0)
[subtask2b_detailed] column 'valence' min=-2.0 max=2.0 (nan_count=0)
[subtask2b_detailed] column 'arousal' min=0.0 max=2.0 (nan_count=0)
[subtask2b_detailed] column 'mean_valence_half1' min=-1.6363636363636365 max=2.0 (nan_count=0)
[subtask2b_detailed] col

In [3]:
def plot_user_trajectory(pred_df: pd.DataFrame, user_id) -> plt.Figure:
    """
    Plot approximate valence and arousal trajectories over time
    for a single user using cumulative sums of ΔV/ΔA.
    """
    user_df = pred_df[pred_df["user_id"] == user_id].copy()
    if user_df.empty:
        raise ValueError(f"No data for user_id={user_id}")

    user_df = user_df.sort_values("timestamp")

    # cumulative trajectories (shape matters more than absolute baseline)
    true_val = user_df["delta_val_true"].cumsum()
    pred_val = user_df["delta_val_pred"].cumsum()
    true_aro = user_df["delta_aro_true"].cumsum()
    pred_aro = user_df["delta_aro_pred"].cumsum()

    t = user_df["timestamp"]

    fig, axes = plt.subplots(2, 1, figsize=(8, 6), sharex=True)

    axes[0].plot(t, true_val, label="True valence")
    axes[0].plot(t, pred_val, label="Predicted valence", linestyle="--")
    axes[0].set_ylabel("Valence (cumulative)")
    axes[0].legend(loc="best")

    axes[1].plot(t, true_aro, label="True arousal")
    axes[1].plot(t, pred_aro, label="Predicted arousal", linestyle="--")
    axes[1].set_ylabel("Arousal (cumulative)")
    axes[1].set_xlabel("Time")
    axes[1].legend(loc="best")

    fig.suptitle(f"User {user_id} — emotional trajectories (approximate)")
    fig.tight_layout()
    return fig


In [4]:
from pathlib import Path

fig_dir = Path("reports") / "figures"
fig_dir.mkdir(parents=True, exist_ok=True)

# Choose up to 3 users with the most entries
user_counts = pred_df["user_id"].value_counts()
top_users = user_counts.index[:3]

print("Generating trajectory plots for users:", list(top_users))

for uid in top_users:
    fig = plot_user_trajectory(pred_df, uid)
    out_path = fig_dir / f"user_{uid}_trajectory.png"
    fig.savefig(out_path, dpi=150)
    plt.close(fig)
    print(f"Saved trajectory figure for user {uid} to: {out_path}")


Generating trajectory plots for users: [40, 2, 33]
Saved trajectory figure for user 40 to: reports/figures/user_40_trajectory.png
Saved trajectory figure for user 2 to: reports/figures/user_2_trajectory.png
Saved trajectory figure for user 33 to: reports/figures/user_33_trajectory.png
