In [1]:
import os
import numpy as np
import pandas as pd
from collections import defaultdict

from pygazeanalyser.detectors import fixation_detection
from utils.fixations import fixation_durations_and_final_clicks
from utils.common import extract_emotion_rating_segments, convert_to_pygaze_compatible_format

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

base_dir = r"eye-data-sept2025\EXPERIMENTS_selective"

In [2]:
fixation_durations_and_final_clicks(base_dir)

  data = pd.read_csv(os.path.join(s, "eye_tracking_data.csv"))



Stimulus category: POSITIVE
Total number of rating trials processed: 500
Top 1 longest fixation duration, with matching button click: 214
Top 2 longest fixation duration, with matching button click: 88
Top 3 longest fixation duration, with matching button click: 48
Top 4 longest fixation duration, with matching button click: 23
Top 5 longest fixation duration, with matching button click: 5
Incorrect (doesn't match any top 5 fixation durations): 122

Stimulus category: NEUTRAL
Total number of rating trials processed: 500
Top 1 longest fixation duration, with matching button click: 186
Top 2 longest fixation duration, with matching button click: 98
Top 3 longest fixation duration, with matching button click: 39
Top 4 longest fixation duration, with matching button click: 18
Top 5 longest fixation duration, with matching button click: 7
Incorrect (doesn't match any top 5 fixation durations): 152

Stimulus category: NEGATIVE
Total number of rating trials processed: 500
Top 1 longest fixat

---

In [3]:
# emotion ratings grouped by stim_cat
def compute_fixation_metrics_collapsed_0(base_dir, save_dir="fixation_metrics_collapsed_0"):
    """
    Compute fixation metrics for emotion rating screens,
    averaged across stimuli within each emotion category.
    Produces both subject-level and group-level summaries.
    """

    subjects = os.listdir(base_dir)
    subjects = [os.path.join(base_dir, s) for s in subjects]

    subject_summaries = []

    for s in subjects:
        subject_id = os.path.splitext(os.path.basename(s))[0]
        df = pd.read_csv(os.path.join(s, "eye_tracking_data.csv"))

        # --- CLEAN COLUMN NAMES AND STIM_CAT ---
        df.columns = df.columns.str.strip()
        df['stim_cat'] = df['stim_cat'].astype(str).str.strip().replace({'nan': np.nan})

        # Extract rating segments
        rating_segs = extract_emotion_rating_segments(df)

        # Container: store metrics per stimulus per category
        category_metrics = defaultdict(list)

        for stim_id, emotions_dict in rating_segs.items():
            for emotion, seg_df in emotions_dict.items():

                # Assign category
                cat = seg_df['stim_cat'].iloc[0]
                if pd.isna(cat) or str(cat).strip() == "":
                    fallback = df.loc[df['stim_id'] == stim_id, 'stim_cat']
                    if not fallback.empty:
                        cat = (
                            fallback.ffill()
                            .bfill()
                            .dropna()
                            .astype(str)
                            .str.strip()
                            .iloc[0]
                            if fallback.dropna().any()
                            else "UNKNOWN"
                        )
                    else:
                        cat = "UNKNOWN"
                else:
                    cat = str(cat).strip()

                emotion_category = cat

                # --- Calculate fixations ---
                x, y, t = convert_to_pygaze_compatible_format(seg_df)
                _, Efix = fixation_detection(x, y, t)

                durations = [f[2] for f in Efix] if Efix else []

                if durations:
                    # per stimulus metrics
                    fixation_count = len(durations)
                    total_duration = np.sum(durations)
                    mean_duration = np.mean(durations)

                    category_metrics[emotion_category].append({
                        'fixation_count': fixation_count,
                        'total_duration': total_duration,
                        'mean_duration': mean_duration
                    })

        # --- Compute subject-level averages per category ---
        for category, metrics_list in category_metrics.items():
            # convert list of dicts to arrays
            fixation_counts = np.array([m['fixation_count'] for m in metrics_list])
            total_durations = np.array([m['total_duration'] for m in metrics_list])
            mean_durations = np.array([m['mean_duration'] for m in metrics_list])

            subject_summaries.append({
                'subject_id': subject_id,
                'emotion_category': category,
                'fixation_count': fixation_counts.mean(),   # mean across stimuli
                'mean_duration': mean_durations.mean(),    # already mean per fixation
                'total_duration': total_durations.mean()   # mean across stimuli
            })

    # ---- Convert to DataFrame ----
    subject_df = pd.DataFrame(subject_summaries)

    # ---- Group-level summaries ----
    group_df = (
        subject_df.groupby("emotion_category")
        .agg(
            mean_fixation_count=("fixation_count", "mean"),
            sd_fixation_count=("fixation_count", "std"),
            mean_fixation_duration=("mean_duration", "mean"),
            sd_fixation_duration=("mean_duration", "std"),
            mean_total_duration=("total_duration", "mean"),
            sd_total_duration=("total_duration", "std"),
        )
        .reset_index()
    )

    # ---- Save results ----
    os.makedirs(save_dir, exist_ok=True)
    subject_path = os.path.join(save_dir, "subject_level_fixation_metrics_collapsed.csv")
    group_path = os.path.join(save_dir, "group_level_fixation_metrics_collapsed.csv")

    subject_df.to_csv(subject_path, index=False)
    group_df.to_csv(group_path, index=False)

    print(f"Saved subject-level metrics to: {subject_path}")
    print(f"Saved group-level metrics to: {group_path}")

    return subject_df, group_df


subject_df, group_df = compute_fixation_metrics_collapsed_0(base_dir)

  df = pd.read_csv(os.path.join(s, "eye_tracking_data.csv"))


Saved subject-level metrics to: fixation_metrics_collapsed_0\subject_level_fixation_metrics_collapsed.csv
Saved group-level metrics to: fixation_metrics_collapsed_0\group_level_fixation_metrics_collapsed.csv


In [4]:
subject_df

Unnamed: 0,subject_id,emotion_category,fixation_count,mean_duration,total_duration
0,anjana,negative,6.28,203.338979,1243.57376
1,anjana,neutral,3.62,226.884538,704.71872
2,anjana,positive,5.96,192.799673,1035.0902
3,ankita,negative,8.4,213.109716,1788.27898
4,ankita,neutral,8.32,228.731551,1887.77916
5,ankita,positive,7.2,217.46329,1593.27402
6,deepali,negative,7.54,218.850231,1555.76032
7,deepali,neutral,5.86,235.190427,1199.15678
8,deepali,positive,6.3,203.683872,1210.34328
9,jini,negative,10.58,195.504467,2003.2576


In [5]:
group_df

Unnamed: 0,emotion_category,mean_fixation_count,sd_fixation_count,mean_fixation_duration,sd_fixation_duration,mean_total_duration,sd_total_duration
0,negative,8.722,3.811963,209.937243,18.899703,1724.946372,691.603498
1,neutral,5.954,1.613775,218.408119,23.627539,1174.059314,354.664409
2,positive,7.062,2.163915,202.055883,17.175595,1362.45124,422.357447


---

In [6]:
def compute_fixation_metrics_per_rating_screen_0(base_dir, save_dir="fixation_metrics_per_rating_screen_0"):
    """
    Compute fixation metrics per emotion rating screen, grouped by stimulus category.
    Each row in the subject-level CSV corresponds to one subject × stimulus_category × emotion_rating.
    Produces both subject-level and group-level summaries.
    """

    subjects = os.listdir(base_dir)
    subjects = [os.path.join(base_dir, s) for s in subjects]

    subject_summaries = []

    for s in subjects:
        subject_id = os.path.splitext(os.path.basename(s))[0]
        df = pd.read_csv(os.path.join(s, "eye_tracking_data.csv"))

        # --- Clean columns ---
        df.columns = df.columns.str.strip()
        df['stim_cat'] = df['stim_cat'].astype(str).str.strip().replace({'nan': np.nan})

        # Extract rating segments
        rating_segs = extract_emotion_rating_segments(df)

        # Container: store metrics per stimulus category × emotion rating
        subj_cat_emotion_metrics = defaultdict(lambda: defaultdict(list))

        for stim_id, emotions_dict in rating_segs.items():
            for emotion_rating, seg_df in emotions_dict.items():

                # Assign stimulus category
                stim_cat = seg_df['stim_cat'].iloc[0]
                if pd.isna(stim_cat) or str(stim_cat).strip() == "":
                    fallback = df.loc[df['stim_id'] == stim_id, 'stim_cat']
                    if not fallback.empty and fallback.dropna().any():
                        stim_cat = fallback.ffill().bfill().dropna().iloc[0]
                    else:
                        stim_cat = "UNKNOWN"
                stim_cat = str(stim_cat).strip()

                # --- Calculate fixations for this segment ---
                x, y, t = convert_to_pygaze_compatible_format(seg_df)
                _, Efix = fixation_detection(x, y, t)

                durations = [f[2] for f in Efix] if Efix else []

                if durations:
                    subj_cat_emotion_metrics[stim_cat][emotion_rating].append({
                        'fixation_count': len(durations),
                        'total_duration': np.sum(durations),
                        'mean_duration': np.mean(durations)
                    })

        # --- Aggregate metrics per stimulus category × emotion rating ---
        for stim_cat, emotion_dict in subj_cat_emotion_metrics.items():
            for emotion_rating, metrics_list in emotion_dict.items():
                fixation_counts = np.array([m['fixation_count'] for m in metrics_list])
                total_durations = np.array([m['total_duration'] for m in metrics_list])
                mean_durations = np.array([m['mean_duration'] for m in metrics_list])

                subject_summaries.append({
                    'subject_id': subject_id,
                    'stimulus_category': stim_cat,
                    'emotion_rating': emotion_rating,
                    'mean_fixation_count': fixation_counts.mean(),
                    'sd_fixation_count': fixation_counts.std(ddof=0),
                    'mean_fixation_duration': mean_durations.mean(),
                    'sd_fixation_duration': mean_durations.std(ddof=0),
                    'mean_total_duration': total_durations.mean(),
                    'sd_total_duration': total_durations.std(ddof=0)
                })

    # ---- Convert to DataFrame ----
    subject_df = pd.DataFrame(subject_summaries)

    # ---- Group-level summaries ----
    group_df = (
        subject_df.groupby(['stimulus_category', 'emotion_rating'])
        .agg(
            mean_fixation_count=('mean_fixation_count', 'mean'),
            sd_fixation_count=('mean_fixation_count', 'std'),
            mean_fixation_duration=('mean_fixation_duration', 'mean'),
            sd_fixation_duration=('mean_fixation_duration', 'std'),
            mean_total_duration=('mean_total_duration', 'mean'),
            sd_total_duration=('mean_total_duration', 'std')
        )
        .reset_index()
    )

    # ---- Save CSVs ----
    os.makedirs(save_dir, exist_ok=True)
    subject_path = os.path.join(save_dir, "subject_level_fixation_metrics.csv")
    group_path = os.path.join(save_dir, "group_level_fixation_metrics.csv")

    subject_df.to_csv(subject_path, index=False)
    group_df.to_csv(group_path, index=False)

    print(f"Saved subject-level metrics to: {subject_path}")
    print(f"Saved group-level metrics to: {group_path}")

    return subject_df, group_df


subject_df, group_df = compute_fixation_metrics_per_rating_screen_0(base_dir)

  df = pd.read_csv(os.path.join(s, "eye_tracking_data.csv"))


Saved subject-level metrics to: fixation_metrics_per_rating_screen_0\subject_level_fixation_metrics.csv
Saved group-level metrics to: fixation_metrics_per_rating_screen_0\group_level_fixation_metrics.csv


In [7]:
group_df

Unnamed: 0,stimulus_category,emotion_rating,mean_fixation_count,sd_fixation_count,mean_fixation_duration,sd_fixation_duration,mean_total_duration,sd_total_duration
0,negative,anger,8.2,3.876711,220.691525,25.908086,1733.5838,905.925065
1,negative,disgust,9.21,3.664378,213.623969,36.626919,1872.7789,661.973997
2,negative,fear,8.16,3.416691,205.409271,18.069098,1555.99238,633.689158
3,negative,happy,8.74,5.461013,202.607811,27.2616,1686.00301,946.20256
4,negative,sad,9.3,4.140317,207.353637,39.725458,1776.37377,619.650234
5,neutral,anger,5.31,1.700621,213.775032,26.632804,1054.6655,399.359587
6,neutral,disgust,5.68,1.621933,218.537513,53.003824,1111.51563,336.966337
7,neutral,fear,5.91,1.722047,206.909002,21.975006,1116.28997,368.470639
8,neutral,happy,6.5,2.775688,229.147998,45.474572,1312.62459,570.98687
9,neutral,sad,6.37,1.990003,223.671049,34.338265,1275.20088,383.073806


In [8]:
subject_df

Unnamed: 0,subject_id,stimulus_category,emotion_rating,mean_fixation_count,sd_fixation_count,mean_fixation_duration,sd_fixation_duration,mean_total_duration,sd_total_duration
0,anjana,negative,disgust,5.8,3.867816,182.505756,111.820010,1144.0043,856.790201
1,anjana,negative,sad,5.3,2.865310,253.870761,55.647748,1213.0913,499.401872
2,anjana,negative,happy,6.9,2.736786,199.591596,34.148457,1360.0898,588.138093
3,anjana,negative,anger,6.3,3.769615,210.744844,46.535081,1313.0101,702.400687
4,anjana,negative,fear,7.1,2.773085,169.981940,29.075631,1187.6733,442.087997
...,...,...,...,...,...,...,...,...,...
145,vaishnavi,neutral,disgust,8.7,7.100000,176.121802,36.276703,1585.3478,1590.659282
146,vaishnavi,neutral,fear,7.7,5.060632,204.739655,96.223735,1290.2600,753.766150
147,vaishnavi,neutral,anger,6.0,3.577709,211.947928,73.627863,1200.1783,764.719877
148,vaishnavi,neutral,happy,13.6,9.318798,337.654370,314.071814,2653.5230,1569.391790


# Stats

In [9]:
from scipy.stats import ttest_rel
from itertools import combinations

def run_pairwise_ttests(df, metric_cols=None):
    """
    Run within-subject paired t-tests between stimulus categories for each emotion rating.

    Args:
        csv_path (str): Path to the subject-level CSV.
        metric_cols (list of str, optional): Columns to test. Defaults to
            ['mean_fixation_count', 'mean_fixation_duration', 'mean_total_duration'].

    Returns:
        pd.DataFrame: Columns: emotion_rating, metric, cat1, cat2, t_stat, p_value
    """
    if metric_cols is None:
        metric_cols = ['mean_fixation_count', 'mean_fixation_duration', 'mean_total_duration']

    results = []

    stimulus_categories = ['positive', 'neutral', 'negative']
    category_pairs = list(combinations(stimulus_categories, 2))

    for emotion in df['emotion_rating'].unique():
        sub_df = df[df['emotion_rating'] == emotion]

        for metric in metric_cols:
            for cat1, cat2 in category_pairs:
                vals1 = sub_df[sub_df['stimulus_category'] == cat1][metric].values
                vals2 = sub_df[sub_df['stimulus_category'] == cat2][metric].values

                # Ensure same length (subjects must match)
                if len(vals1) != len(vals2):
                    raise ValueError(f"Unequal number of subjects for {cat1} vs {cat2} in {emotion}")

                t_stat, p_val = ttest_rel(vals1, vals2)
                results.append({
                    'emotion_rating': emotion,
                    'metric': metric,
                    'cat1': cat1,
                    'cat2': cat2,
                    't_stat': t_stat,
                    'p_value': p_val
                })

    return pd.DataFrame(results)


results_df = run_pairwise_ttests(subject_df)
results_df

Unnamed: 0,emotion_rating,metric,cat1,cat2,t_stat,p_value
0,disgust,mean_fixation_count,positive,neutral,0.761387,0.465907
1,disgust,mean_fixation_count,positive,negative,-3.300104,0.009228
2,disgust,mean_fixation_count,neutral,negative,-3.948827,0.003361
3,disgust,mean_fixation_duration,positive,neutral,-0.88771,0.397799
4,disgust,mean_fixation_duration,positive,negative,-0.786156,0.451977
5,disgust,mean_fixation_duration,neutral,negative,0.679655,0.513822
6,disgust,mean_total_duration,positive,neutral,0.598585,0.564209
7,disgust,mean_total_duration,positive,negative,-4.759933,0.00103
8,disgust,mean_total_duration,neutral,negative,-4.696276,0.001126
9,sad,mean_fixation_count,positive,neutral,0.596053,0.565827
