## Train Results

In [1]:
import pandas as pd

# Load the CSV file (replace 'your_file.csv' with the actual path)
train_results_path = './results/240364_train_fusion_rates_results.csv'
train_id = train_results_path.split('/')[-1].split('_')[0]
print(f"Processing train id: {train_id}")

df = pd.read_csv(train_results_path)

# Clean the 'subject_id' column
df['subject_id'] = df['subject_id'].str.strip("[]").str.replace("'", "", regex=False)

# drop trial_id column
df = df.drop(columns=['trial_id'])

# Convert rate columns to numeric, coercing errors to NaN
rate_cols = ['smartwatch_rate', 'video_rate', 'gt_rate']
df[rate_cols] = df[rate_cols].apply(pd.to_numeric, errors='coerce')

# Drop rows with invalid subject_ids (e.g., '...' or empty)
print(df['subject_id'].unique())
# Compute subject-wise average for each rate
subject_avg = df.groupby('subject_id')[rate_cols].mean().reset_index()

# drop subject_id P10
subject_avg = subject_avg[subject_avg['subject_id'] != 'P10']

# drop index column
subject_avg = subject_avg.reset_index(drop=True)
# Print or save the result

print(subject_avg)

subject_avg.to_csv(f'./results/{train_id}_train_subject_wise_avg_rates.csv', index=False)  # Optional: save to CSV


Processing train id: 240364
['wa1' 'ng5' 'ng8' 'ng3' 'P14' 'ng4' 'ms2' 'P20' 'P5' 'P0' 'P12' 'ng6'
 'P21' 'P16' 'ng1' 'P11' 'P3']
   subject_id  smartwatch_rate  video_rate     gt_rate
0          P0       183.000000   63.000000  228.000000
1         P11       120.000000   54.000000  114.000000
2         P12              NaN         NaN         NaN
3         P14       103.800000   51.600000   63.600000
4         P16       102.000000   78.000000  100.363636
5         P20       102.000000   36.000000  156.000000
6         P21       108.381818   67.109091  112.690909
7          P3       123.000000   45.000000  189.000000
8          P5       102.000000   30.000000   88.000000
9         ms2       111.000000  105.000000  111.000000
10        ng1              NaN         NaN         NaN
11        ng3       116.000000   70.000000  120.000000
12        ng4       112.000000  102.000000  118.000000
13        ng5              NaN         NaN         NaN
14        ng6        99.000000   66.000000   

## Test Result

In [1]:
import pandas as pd
import numpy as np

# Load the CSV file (replace with your actual path if needed)
test_results_path = './results/fusion_test_default/test_results_414469_smartwatch_model_test_fusion_rmse_per_batch.csv'
test_id = test_results_path.split('/')[-1].split('_')[0]
print(f"Processing test id: {test_id}")

test_result_folder = test_results_path.split('/')[-2]

df = pd.read_csv(test_results_path)

# Clean the 'subject_id' column
df['subject_id'] = df['subject_id'].str.strip("[]").str.replace("'", "", regex=False)

# Drop the trial_id column
df = df.drop(columns=['trial_id'])

# Ensure rate columns are numeric
rate_cols = [
    'rmse_cpm',
    'smartwatch_rate', 'video_rate', 'fused_rate', 'gt_rate',
    'smartwatch_depth', 'video_depth', 'fused_depth', 'gt_depth', 'rmse_depth'
]
df[rate_cols] = df[rate_cols].apply(pd.to_numeric, errors='coerce')

# --- NEW: compute simple 50/50 fusion rate per row ---
df['simple_fusion_rate'] = 0.5 * df['smartwatch_rate'] + 0.5 * df['video_rate']

# Drop any rows with invalid or empty subject_ids
df = df[df['subject_id'].notna() & (df['subject_id'] != '')]

# Compute subject-wise averages, now including simple_fusion_rate
subject_avg = (
    df
    .groupby('subject_id', as_index=False)
    .agg({
        'rmse_cpm': 'mean',
        'fused_rate': 'mean',
        'simple_fusion_rate': 'mean',   # ← new
        'smartwatch_rate': 'mean',
        'video_rate': 'mean',
        'gt_rate': 'mean',
        'smartwatch_depth': 'mean',
        'video_depth': 'mean',
        'fused_depth': 'mean',
        'gt_depth': 'mean',
        'rmse_depth': 'mean'
    })
    .rename(columns={
        'rmse_cpm': 'avg_fused_rmse_cpm',
        'fused_rate': 'avg_fused_rate',
        'simple_fusion_rate': 'avg_simple_fusion_rate',  # ← new
        'smartwatch_rate': 'avg_smartwatch_rate',
        'video_rate': 'avg_video_rate',
        'gt_rate': 'avg_gt_rate',
        'smartwatch_depth': 'avg_smartwatch_depth',
        'video_depth': 'avg_video_depth',
        'fused_depth': 'avg_fused_depth',
        'gt_depth': 'avg_gt_depth',
        'rmse_depth': 'avg_rmse_depth'
    })
)

# Drop subject_id P10 if present
subject_avg = subject_avg[subject_avg['subject_id'] != 'P10']

# function to compute rmse between two series
def compute_rmse(a, b):
    return np.sqrt(np.mean((a - b) ** 2))

# Compute per-subject RMSEs, including simple_fusion
rmse_metrics = (
    df
    .groupby('subject_id')
    .apply(lambda g: pd.Series({
        'rmse_smartwatch_cpm':    compute_rmse(g['smartwatch_rate'], g['gt_rate']),
        'rmse_video_cpm':         compute_rmse(g['video_rate'],     g['gt_rate']),
        'rmse_fused_cpm':         compute_rmse(g['fused_rate'],     g['gt_rate']),
        'rmse_simple_fusion_cpm': compute_rmse(g['simple_fusion_rate'], g['gt_rate']),  # ← new
        'rmse_smartwatch_depth':  compute_rmse(g['smartwatch_depth'], g['gt_depth']),
        'rmse_video_depth':       compute_rmse(g['video_depth'],     g['gt_depth']),
        'rmse_fused_depth':       compute_rmse(g['fused_depth'],     g['gt_depth']),
    }))
    .reset_index()
)

# Merge everything
subject_summary = subject_avg.merge(rmse_metrics, on='subject_id').reset_index(drop=True)

# Reorder columns to include the new fusion metrics
subject_summary = subject_summary[
    [
        'subject_id',
        'avg_gt_rate',
        'avg_simple_fusion_rate', 'rmse_simple_fusion_cpm',   # ← new
        'avg_fused_rate',       'rmse_fused_cpm',
        'avg_smartwatch_rate',  'rmse_smartwatch_cpm',
        'avg_video_rate',       'rmse_video_cpm',
        'avg_smartwatch_depth', 'rmse_smartwatch_depth',
        'avg_video_depth',      'rmse_video_depth',
        'avg_fused_depth',      'rmse_fused_depth',
        'avg_gt_depth'
    ]
]

# Print and save
print(subject_summary)

output_path = f'./results/{test_result_folder}/final_{test_id}_test_subject_wise_avg_rates.csv'
subject_summary.to_csv(output_path, index=False)
print(f"Saved summary to {output_path}")


Processing test id: test
   subject_id  avg_gt_rate  avg_simple_fusion_rate  rmse_simple_fusion_cpm  \
0          P0   177.878151              125.961134               63.234073   
1          P1    62.400000               81.900000               19.500000   
2         P11   123.311111              110.442857               14.012738   
3         P12   135.300000              129.400000               29.496101   
4         P14    64.125000               89.175000               25.286459   
5         P15   105.366667               98.100000               19.349648   
6         P16   117.857143              105.857143               19.209373   
7          P2   140.787879              125.272727               18.376958   
8         P20    88.400000               81.400000                7.000000   
9         P21    87.115152               91.078788               14.551612   
10        P22   120.666667              112.000000                8.666667   
11         P3    92.475000             

  df


# Summary for all subjects

In [1]:
import pandas as pd
import numpy as np
import os

# Load the CSV file (replace with your actual path if needed)
test_results_path = './results/2025-06-18_18-54-57/debug_3817147_fusion_weights_test_fusion_rmse_per_batch.csv'
fname = os.path.basename(test_results_path)
test_id = fname.split('_')[1]  # e.g., "3817147"
print(f"Processing test id: {test_id}")

df = pd.read_csv(test_results_path)

# Clean the 'subject_id' column
df['subject_id'] = df['subject_id'].str.strip("[]").str.replace("'", "", regex=False)

# Drop the trial_id column
df = df.drop(columns=['trial_id'])

# Ensure rate columns are numeric (including fused_rate)
rate_cols = ['fused_rate', 'smartwatch_rate', 'video_rate', 'gt_rate']
df[rate_cols] = df[rate_cols].apply(pd.to_numeric, errors='coerce')

# Drop any rows with invalid or empty subject_ids
df = df[df['subject_id'].notna() & (df['subject_id'] != '')]

# Functions to compute error metrics
def compute_rmse(pred, gt):
    return np.sqrt(np.mean((pred - gt) ** 2))

def compute_mae(pred, gt):
    return np.mean(np.abs(pred - gt))

# --- Per-subject summary (existing) ---
subject_metrics = (
    df
    .groupby('subject_id')
    .apply(lambda g: pd.Series({
        'rmse_fused_cpm':      compute_rmse(g['fused_rate'],     g['gt_rate']),
        'rmse_smartwatch_cpm': compute_rmse(g['smartwatch_rate'], g['gt_rate']),
        'rmse_video_cpm':      compute_rmse(g['video_rate'],     g['gt_rate'])
    }))
    .reset_index()
)

# Save per-subject CSV
subject_output = test_results_path.replace(
    'fusion_weights_test_fusion_rmse_per_batch.csv',
    'subject_wise_rmse.csv'
)
subject_metrics.to_csv(subject_output, index=False)
print(f"Saved per-subject RMSE to {subject_output}")

# --- Overall RMSE (all subjects combined) ---
overall = {
    'rmse_fused_cpm':      compute_rmse(df['fused_rate'],     df['gt_rate']),
    'rmse_smartwatch_cpm': compute_rmse(df['smartwatch_rate'], df['gt_rate']),
    'rmse_video_cpm':      compute_rmse(df['video_rate'],     df['gt_rate'])
}
overall_df = pd.DataFrame([overall])

# Save overall CSV
overall_output = test_results_path.replace(
    'fusion_weights_test_fusion_rmse_per_batch.csv',
    'overall_rmse.csv'
)
overall_df.to_csv(overall_output, index=False)
print(f"Saved overall RMSE to {overall_output}")


Processing test id: 3817147
Saved per-subject RMSE to ./results/2025-06-18_18-54-57/debug_3817147_subject_wise_rmse.csv
Saved overall RMSE to ./results/2025-06-18_18-54-57/debug_3817147_overall_rmse.csv


  df


# New Evaluation based on rules and feedback

In [3]:
import pandas as pd
import ast

def rule_rate(ground_truth,comparison):
   result = []
   for i in range(len(comparison)):
      gt = ground_truth[i]
      assistant = comparison[i]
      #check the ground truth:
      if gt >= 100 and gt <= 120: #if ground truth is doing good
         #true negative scenario
         if assistant >= 100 and assistant <= 120:
            result.append('TN')
         else: #false positive scenario
            result.append('FP')
      else: #some form of correction is necessitated
         if gt < 100: #must go faster as according to ground truth
            if assistant < 100:
               result.append('TP')
            else:
               result.append('FN')
         else: #must go slower on compression rate
            if assistant > 120:
               result.append('TP')
            else:
               result.append('FN')
   return result


def rule_depth(ground_truth,comparison):
   result = []
   for i in range(len(comparison)):
      gt = ground_truth[i]
      assistant = comparison[i]



      #if ground truth is doing great
      if gt >= 5 and gt <= 6:
         if assistant >= 5 and assistant <= 6: #If ground truth and assistant both say that the compression depth is great
             result.append('TN')
         else: #If ground truth says no correction but assistant says correction
            result.append('FP')
      else:
         if  gt  < 5: #compression depth is too shallow
            if assistant < 5:
               result.append('TP')
            else:
               result.append('FN')
         else:
            if gt > 6: #depth is too deep
               if assistant > 6:
                  result.append('TP')
               else:
                  result.append('FN')  
   return result


results_dir = './results/fusion_train_2025-07-03_09-41-31'
file_path = f'{results_dir}/debug_752743_train_fusion_debug_per_batch.csv'
dataframe = pd.read_csv(file_path)
row_count = len(dataframe)
print("row count: "+ str(row_count) )

column_names = [
    'subject_id', 'trial_id', 'start_idx', 'smartwatch_rates', 
    'video_rates', 'gt_rates', 'smartwatch_depths', 'video_depths', 'gt_depths'
]
dataframe = pd.read_csv(file_path, header=None, names=column_names)

dataframe['Smart Watch Hypothesis Analysis'] = None
dataframe['Video Assistant Hypothesis Analysis'] = None

for index, row in dataframe.iterrows():
   sw_compressions_rate = ast.literal_eval(row["smartwatch_rates"])
   vi_comrpessions_rate = ast.literal_eval(row["video_rates"])
   gt_rate = ast.literal_eval(row["gt_rates"])
   sw_depth = ast.literal_eval(row["smartwatch_depths"])
   vi_dept = ast.literal_eval(row["video_depths"])
   gt_depth = ast.literal_eval(row["gt_depths"])

   sw_hypothesis_rate = rule_rate(gt_rate,sw_compressions_rate)
   sw_hypothesis_depth = rule_depth(gt_depth,sw_depth)
   vi_hypothesis_rate = rule_rate(gt_rate,vi_comrpessions_rate)
   vi_hypothesis_depth = rule_depth(gt_depth,vi_dept)

   print(f"Rate Hypothesis: \n {sw_hypothesis_rate}")
   print(f"Smart Watch Compression List: \n {sw_compressions_rate}")
   print(f"Ground Truth: \n {gt_rate}")
   

   print(f"depth Hypothesis: \n {sw_hypothesis_depth}")
   print(f"Smart Watch Compression List: \n {sw_depth}")
   print(f"Ground Truth: \n {gt_depth}")
   
   
   sw = sw_hypothesis_rate
   sw.append(sw_hypothesis_depth)
   vi = vi_hypothesis_rate
   vi.append(vi_hypothesis_depth)

   dataframe.at[index, 'Smart Watch Hypothesis Analysis'] = sw
   dataframe.at[index, 'Video Assistant Hypothesis Analysis'] = vi
   dataframe.to_csv(f'{results_dir}/debug_752743_train_fusion_debug_per_batch.csv',index=False)


print("Updated the CSV File")

row count: 779
Rate Hypothesis: 
 ['TN', 'FN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'FP', 'TN', 'TN']
Smart Watch Compression List: 
 [108.0, 108.0, 102.0, 102.0, 102.0, 102.0, 102.0, 102.0, 96.0, 102.0, 102.0]
Ground Truth: 
 [102.0, 96.0, 108.0, 102.0, 102.0, 108.0, 102.0, 102.0, 102.0, 102.0, 102.0]
depth Hypothesis: 
 ['FN', 'FN', 'FN', 'FN', 'FN', 'TP', 'TP', 'TP', 'TP', 'TP', 'TP', 'TP']
Smart Watch Compression List: 
 [-3.1141271591186523, -1.0423502922058105, 1.1916289329528809, 3.2046289443969727, 5.487999439239502, 7.9385085105896, 10.516573905944824, 13.428730964660645, 16.598222732543945, 20.447240829467773, 24.587739944458008, 28.970897674560547]
Ground Truth: 
 [34.06699752807617, 35.70750045776367, 32.58250045776367, 34.77888488769531, 29.540000915527344, 29.25, 27.66666603088379, 25.0, 26.916250228881836, 26.708749771118164, 27.959999084472656, 27.041250228881836]
Rate Hypothesis: 
 ['TP', 'TP', 'TP', 'TP', 'TP', 'FN', 'FN', 'TP', 'TP', 'TP']
Smart Watch Compression List

In [5]:
import pandas as pd
import ast

def rule_rate(ground_truth, comparison):
    result = []
    for i in range(len(comparison)):
        gt = ground_truth[i]
        assistant = comparison[i]
        if gt >= 100 and gt <= 120:
            if assistant >= 100 and assistant <= 120:
                result.append('TN')
            else:
                result.append('FP')
        else:
            if gt < 100:
                if assistant < 100:
                    result.append('TP')
                else:
                    result.append('FN')
            else:
                if assistant > 120:
                    result.append('TP')
                else:
                    result.append('FN')
    return result

def rule_depth(ground_truth, comparison):
    result = []
    for i in range(len(comparison)):
        gt = ground_truth[i]
        assistant = comparison[i]
        if gt >= 5 and gt <= 6:
            if assistant >= 5 and assistant <= 6:
                result.append('TN')
            else:
                result.append('FP')
        else:
            if gt < 5:
                if assistant < 5:
                    result.append('TP')
                else:
                    result.append('FN')
            else:
                if assistant > 6:
                    result.append('TP')
                else:
                    result.append('FN')
    return result

def compute_metrics(labels):
    tp = labels.count('TP')
    fp = labels.count('FP')
    tn = labels.count('TN')
    fn = labels.count('FN')

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    feedback_acc = (tp + tn) / len(labels) if len(labels) > 0 else 0.0

    return precision, recall, f1, feedback_acc

results_dir = './results/fusion_train_2025-07-03_09-41-31'
file_path = f'{results_dir}/debug_752743_train_fusion_debug_per_batch.csv'
dataframe = pd.read_csv(file_path)

column_names = [
    'subject_id', 'trial_id', 'start_idx', 'smartwatch_rates', 
    'video_rates', 'gt_rates', 'smartwatch_depths', 'video_depths', 'gt_depths'
]
# sometimes csv may have extra columns, so only get the first 9 columns
dataframe = pd.read_csv(file_path, usecols=column_names)
dataframe.columns = column_names

dataframe['Smart Watch Hypothesis Analysis'] = None
dataframe['Video Assistant Hypothesis Analysis'] = None

# Store per-subject labels
subject_labels = {}

for index, row in dataframe.iterrows():
    sw_compressions_rate = ast.literal_eval(row["smartwatch_rates"])
    vi_compressions_rate = ast.literal_eval(row["video_rates"])
    gt_rate = ast.literal_eval(row["gt_rates"])
    sw_depth = ast.literal_eval(row["smartwatch_depths"])
    vi_depth = ast.literal_eval(row["video_depths"])
    gt_depth = ast.literal_eval(row["gt_depths"])

    sw_hypothesis_rate = rule_rate(gt_rate, sw_compressions_rate)
    sw_hypothesis_depth = rule_depth(gt_depth, sw_depth)

    subject_id = row["subject_id"]

    # Combine rate and depth hypotheses for this subject
    if subject_id not in subject_labels:
        subject_labels[subject_id] = []

    subject_labels[subject_id].extend(sw_hypothesis_rate)
    subject_labels[subject_id].extend(sw_hypothesis_depth)

    dataframe.at[index, 'Smart Watch Hypothesis Analysis'] = sw_hypothesis_rate + sw_hypothesis_depth

# Write updated batch CSV
dataframe.to_csv(f'{results_dir}/debug_752743_train_fusion_debug_per_batch.csv', index=False)
print("Updated the CSV File with hypotheses per trial.")

# Prepare per-subject summary metrics
subject_metrics = []

for subject_id, labels in subject_labels.items():
    precision, recall, f1, feedback_acc = compute_metrics(labels)
    subject_metrics.append({
        'subject_id': subject_id,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'feedback_accuracy': feedback_acc
    })

# Create DataFrame and save to new CSV
metrics_df = pd.DataFrame(subject_metrics)
metrics_df.to_csv(f'{results_dir}/subject_summary_metrics.csv', index=False)
print("Created per-subject summary metrics CSV.")


Updated the CSV File with hypotheses per trial.
Created per-subject summary metrics CSV.


In [1]:
import pandas as pd
import ast

def rule_rate(ground_truth, comparison):
    result = []
    for i in range(len(comparison)):
        gt = ground_truth[i]
        assistant = comparison[i]
        if gt >= 100 and gt <= 120:
            if assistant >= 100 and assistant <= 120:
                result.append('TN')
            else:
                result.append('FP')
        else:
            if gt < 100:
                if assistant < 100:
                    result.append('TP')
                else:
                    result.append('FN')
            else:
                if assistant > 120:
                    result.append('TP')
                else:
                    result.append('FN')
    return result

def rule_depth(ground_truth, comparison):
    result = []
    for i in range(len(comparison)):
        gt = ground_truth[i]
        assistant = comparison[i]
        if gt >= 5 and gt <= 6:
            if assistant >= 5 and assistant <= 6:
                result.append('TN')
            else:
                result.append('FP')
        else:
            if gt < 5:
                if assistant < 5:
                    result.append('TP')
                else:
                    result.append('FN')
            else:
                if assistant > 6:
                    result.append('TP')
                else:
                    result.append('FN')
    return result

def compute_metrics(labels):
    tp = labels.count('TP')
    fp = labels.count('FP')
    tn = labels.count('TN')
    fn = labels.count('FN')

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall    = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1        = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    feedback_acc = (tp + tn) / len(labels) if len(labels) > 0 else 0.0

    return precision, recall, f1, feedback_acc

results_dir = './results/fusion_train_2025-07-03_09-41-31'
file_path   = f'{results_dir}/debug_752743_train_fusion_debug_per_batch.csv'
df = pd.read_csv(file_path, usecols=[
    'subject_id', 'trial_id', 'start_idx',
    'smartwatch_rates', 'video_rates', 'gt_rates',
    'smartwatch_depths','video_depths','gt_depths'
])
df.columns = [
    'subject_id','trial_id','start_idx',
    'smartwatch_rates','video_rates','gt_rates',
    'smartwatch_depths','video_depths','gt_depths'
]

# New per-row columns
df['Smart Watch Rate Analysis']    = None
df['Smart Watch Depth Analysis']   = None
df['Video Assistant Rate Analysis']= None
df['Video Assistant Depth Analysis']= None

# Per-subject accumulators, separated
subject_labels_sw_rate  = {}
subject_labels_sw_depth = {}
subject_labels_vi_rate  = {}
subject_labels_vi_depth = {}

for idx, row in df.iterrows():
    sw_rates = ast.literal_eval(row["smartwatch_rates"])
    vi_rates = ast.literal_eval(row["video_rates"])
    gt_rates = ast.literal_eval(row["gt_rates"])
    sw_deps  = ast.literal_eval(row["smartwatch_depths"])
    vi_deps  = ast.literal_eval(row["video_depths"])
    gt_deps  = ast.literal_eval(row["gt_depths"])

    # get per-row labels
    sw_labels_rate  = rule_rate(gt_rates, sw_rates)
    sw_labels_depth = rule_depth(gt_deps, sw_deps)
    vi_labels_rate  = rule_rate(gt_rates, vi_rates)
    vi_labels_depth = rule_depth(gt_deps, vi_deps)

    sid = row["subject_id"]
    # init
    subject_labels_sw_rate.setdefault(sid, []).extend(sw_labels_rate)
    subject_labels_sw_depth.setdefault(sid,[]).extend(sw_labels_depth)
    subject_labels_vi_rate.setdefault(sid, []).extend(vi_labels_rate)
    subject_labels_vi_depth.setdefault(sid,[]).extend(vi_labels_depth)

    # write back to dataframe
    df.at[idx, 'Smart Watch Rate Analysis']    = sw_labels_rate
    df.at[idx, 'Smart Watch Depth Analysis']   = sw_labels_depth
    df.at[idx, 'Video Assistant Rate Analysis']= vi_labels_rate
    df.at[idx, 'Video Assistant Depth Analysis']= vi_labels_depth

# save enhanced per-batch CSV
df.to_csv(f'{results_dir}/new_evaluation_debug_752743_train_fusion_debug_per_batch.csv', index=False)
print("Updated the CSV File with separate rate/depth hypotheses per trial.")

# --- per-subject summary metrics ---
subject_metrics = []

for sid in subject_labels_sw_rate:
    # SW rate
    sw_r_labels = subject_labels_sw_rate[sid]
    sw_r_prec, sw_r_rec, sw_r_f1, sw_r_acc = compute_metrics(sw_r_labels)
    # SW depth
    sw_d_labels = subject_labels_sw_depth[sid]
    sw_d_prec, sw_d_rec, sw_d_f1, sw_d_acc = compute_metrics(sw_d_labels)
    # VI rate
    vi_r_labels = subject_labels_vi_rate[sid]
    vi_r_prec, vi_r_rec, vi_r_f1, vi_r_acc = compute_metrics(vi_r_labels)
    # VI depth
    vi_d_labels = subject_labels_vi_depth[sid]
    vi_d_prec, vi_d_rec, vi_d_f1, vi_d_acc = compute_metrics(vi_d_labels)

    subject_metrics.append({
        'subject_id':                 sid,
        # smartwatch rate
        'sw_rate_precision':          sw_r_prec,
        'sw_rate_recall':             sw_r_rec,
        'sw_rate_f1':                 sw_r_f1,
        'sw_rate_feedback_accuracy':  sw_r_acc,
        # smartwatch depth
        'sw_depth_precision':         sw_d_prec,
        'sw_depth_recall':            sw_d_rec,
        'sw_depth_f1':                sw_d_f1,
        'sw_depth_feedback_accuracy': sw_d_acc,
        # video rate
        'vi_rate_precision':          vi_r_prec,
        'vi_rate_recall':             vi_r_rec,
        'vi_rate_f1':                 vi_r_f1,
        'vi_rate_feedback_accuracy':  vi_r_acc,
        # video depth
        'vi_depth_precision':         vi_d_prec,
        'vi_depth_recall':            vi_d_rec,
        'vi_depth_f1':                vi_d_f1,
        'vi_depth_feedback_accuracy': vi_d_acc,
    })

metrics_df = pd.DataFrame(subject_metrics)
metrics_df.to_csv(f'{results_dir}/subject_summary_metrics.csv', index=False)
print("Created per-subject summary metrics CSV with separate rate/depth.")


Updated the CSV File with separate rate/depth hypotheses per trial.
Created per-subject summary metrics CSV with separate rate/depth.


In [3]:
import pandas as pd
import ast

# (Re-use your rule_rate, rule_depth, compute_metrics functions)

results_dir = './results/fusion_train_2025-07-03_09-41-31'
file_path   = f'{results_dir}/debug_752743_train_fusion_debug_per_batch.csv'

df = pd.read_csv(file_path, usecols=[
    'subject_id','smartwatch_rates','video_rates','gt_rates',
    'smartwatch_depths','video_depths','gt_depths'
])

all_sw_rate_labels   = []
all_sw_depth_labels  = []
all_vi_rate_labels   = []
all_vi_depth_labels  = []

for _, row in df.iterrows():
    sw_rates = ast.literal_eval(row["smartwatch_rates"])
    vi_rates = ast.literal_eval(row["video_rates"])
    gt_rates = ast.literal_eval(row["gt_rates"])
    sw_deps  = ast.literal_eval(row["smartwatch_depths"])
    vi_deps  = ast.literal_eval(row["video_depths"])
    gt_deps  = ast.literal_eval(row["gt_depths"])

    # label each frame
    all_sw_rate_labels  .extend( rule_rate(gt_rates, sw_rates) )
    all_vi_rate_labels  .extend( rule_rate(gt_rates, vi_rates) )
    all_sw_depth_labels .extend( rule_depth(gt_deps, sw_deps) )
    all_vi_depth_labels .extend( rule_depth(gt_deps, vi_deps) )

# now compute once for each modality
sw_rate_prec, sw_rate_rec, sw_rate_f1, sw_rate_acc   = compute_metrics(all_sw_rate_labels)
vi_rate_prec, vi_rate_rec, vi_rate_f1, vi_rate_acc   = compute_metrics(all_vi_rate_labels)
sw_dep_prec, sw_dep_rec, sw_dep_f1, sw_dep_acc       = compute_metrics(all_sw_depth_labels)
vi_dep_prec, vi_dep_rec, vi_dep_f1, vi_dep_acc       = compute_metrics(all_vi_depth_labels)

print("=== Micro-averaged over all subjects ===")
print(f"Smartwatch rate   : precision={sw_rate_prec:.3f}, recall={sw_rate_rec:.3f}, F1={sw_rate_f1:.3f}, acc={sw_rate_acc:.3f}")
print(f"Video rate        : precision={vi_rate_prec:.3f}, recall={vi_rate_rec:.3f}, F1={vi_rate_f1:.3f}, acc={vi_rate_acc:.3f}")
print(f"Smartwatch depth  : precision={sw_dep_prec:.3f}, recall={sw_dep_rec:.3f}, F1={sw_dep_f1:.3f}, acc={sw_dep_acc:.3f}")
print(f"Video depth       : precision={vi_dep_prec:.3f}, recall={vi_dep_rec:.3f}, F1={vi_dep_f1:.3f}, acc={vi_dep_acc:.3f}")


=== Micro-averaged over all subjects ===
Smartwatch rate   : precision=0.817, recall=0.522, F1=0.637, acc=0.672
Video rate        : precision=0.601, recall=0.585, F1=0.593, acc=0.557
Smartwatch depth  : precision=0.995, recall=0.960, F1=0.977, acc=0.955
Video depth       : precision=0.994, recall=0.814, F1=0.895, acc=0.810
