In [None]:
import pandas as pd
import json
import os
import neurokit2 as nk
import numpy as np
import random
def extract_segment(dataframe, stim_file, flag):
    # Filter to get rows with matching 'stim_file' and 'flag'
    filtered_df = dataframe[(dataframe['stim_file'] == stim_file)]
    
    if filtered_df.empty:
        return None  # Return None if no matching rows found
    
    # Assuming the first matching row is the one to start from
    initial_row = filtered_df.iloc[0]
    initial_timestamp = initial_row['onset']
    duration=0
    for flag in filtered_df['flag'].unique():
        flag_filtered_df=filtered_df[filtered_df['flag']==flag]
        initial_row = flag_filtered_df.iloc[0]
        duration = duration+initial_row['duration']
        if flag=="trial":
            break
    
    # Calculate the end timestamp
    end_timestamp = initial_timestamp + duration
    
    # Select rows from the initial timestamp up to and including where Timestamp <= end_timestamp
    result_df = dataframe[(dataframe['onset'] >= initial_timestamp) & (dataframe['onset'] <= end_timestamp)]
    
    return result_df
participants_df=pd.read_csv("participants.tsv",sep="\t")
participants_df['participant_id'].unique()
au_columns_subset = ['onset','confidence',
 'success', 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 
                     'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 
                     'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r']
# Load headers from JSON
# user='sub-agk'
# run=0 sub-pkvd/beh/sub-pkvd_task-fer_run-3_recording-videostream_physio.tsv.gz
missing=[]
metrics_all=[]
stim_file=""
for user in participants_df['participant_id'].unique():
    
    user_df=participants_df[participants_df['participant_id']==user]
    openness = user_df['O'].values[0]
    conscientiousness = user_df['C'].values[0]
    extraversion = user_df['E'].values[0]
    agreeableness = user_df['A'].values[0]
    neuroticism = user_df['N'].values[0]
    for run in range(4):
        print(user,run)
        json_path=user+"/beh/"+user+"_task-fer_run-"+str(run)+"_recording-videostream_physio.json"
        if os.path.exists(json_path):
            au_path=user+"/beh/"+user+"_task-fer_run-"+str(run)+"_recording-videostream_physio.tsv.gz"
            events_path=user+"/"+user+"_task-fer_run-"+str(run)+"_events.tsv"
            labels_path=user+"/beh/"+user+"_task-fer_run-"+str(run)+"_beh.tsv"
            with open(json_path, 'r') as file:
                json_data = json.load(file)
            headers = json_data['Columns']
            # sampling_rate=len(au_data['onset'])/(au_data['onset'].iloc[-1]-au_data['onset'].iloc[0])
            # Read the gzipped TSV file using the loaded headers
            au_data= pd.read_csv(au_path,sep='\t', compression='gzip', names=headers)
            # print(au_data.head())
            # au_data['onset'] = pd.to_numeric(au_data['onset'], errors='coerce')
            for tag in au_columns_subset:
                au_data[tag] = pd.to_numeric(au_data[tag], errors='coerce')
            # gsr_data['Timestamp'] = pd.to_numeric(gsr_data['Timestamp'], errors='coerce')
            au_data['onset']=au_data['onset']-au_data['onset'].values[0]
            # gsr_data['Timestamp']=gsr_data['Timestamp']-gsr_data['Timestamp'].values[0]
            au_data.head()
            events = pd.read_csv(events_path, sep='\t')
            labels = pd.read_csv(labels_path, sep='\t')
            # Example of integrating events, this depends on how timestamps align
            # This is a placeholder for merging or aligning logic
            # Drop rows where 'onset' is NaN in both DataFrames
            au_data = au_data.dropna(subset=['onset'])
            events = events.dropna(subset=['onset'])

            # Ensure both DataFrames are sorted by 'onset'
            au_data = au_data.sort_values('onset')
            events = events.sort_values('onset')

            # Perform the merge_asof
            au_data_merged = pd.merge_asof(au_data, events, on='onset', direction='backward')
            au_list = ['onset','confidence',
 'success','AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 
                                                'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 
                                                'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r','duration', 'trial_type', 'flag', 'subject', 'run',
                'trial', 'local_time', 'stim_file']
            au_data=au_data_merged[au_list]
            try:
                
                for stim_file in au_data['stim_file'].unique():
                    metrics = {}
                    # au_trial = extract_segment(au_data, stim_file, "video")
                    au_trial = au_data[(au_data['stim_file'] == stim_file)]
                    au_trial = au_trial.reset_index(drop=True)
                    label_line=labels[labels['stim_file']==stim_file]
                    metrics["AUs"]=au_trial
                    metrics['user']=user
                    metrics['run']=run
                    metrics['stim_file']=stim_file
                    metrics['trial']=label_line['trial'].values[0]
                    metrics['stim_emo']=label_line['trial_type'].values[0]
                    metrics['preceived_arousal']=label_line['p_emotion_a'].values[0]
                    metrics['preceived_valance']=label_line['p_emotion_v'].values[0]
                    metrics['felt_arousal']=label_line['f_emotion_a'].values[0]
                    metrics['felt_valance']=label_line['f_emotion_v'].values[0]
                    metrics['openness'] = openness#+random.uniform(-2, 2)
                    metrics['conscientiousness'] = conscientiousness#+random.uniform(-2, 2)
                    metrics['extraversion'] = extraversion#+random.uniform(-2, 2)
                    metrics['agreeableness'] = agreeableness#+random.uniform(-2, 2)
                    metrics['neuroticism'] = neuroticism#+random.uniform(-2, 2)
                    metrics_all.append(metrics)
            except:
                print(stim_file,user,run)
                missing.append(stim_file+user+str(run))
        else:
            missing.append(stim_file+user+str(run))
au_df=pd.DataFrame(metrics_all)


In [None]:
import pandas as pd
import json
import os
import neurokit2 as nk
import numpy as np
import random
def extract_segment(dataframe, stim_file, start_flag,end_flag):
    # Filter to get rows with matching 'stim_file' and 'flag'
    filtered_df = dataframe[(dataframe['stim_file'] == stim_file)]
    
    if filtered_df.empty:
        return None  # Return None if no matching rows found
    
    # Assuming the first matching row is the one to start from
    initial_row = filtered_df.iloc[0]
    initial_timestamp = filtered_df[filtered_df['flag']==start_flag]['onset'].iloc[0]
    duration=0
    start=0
    for flag in filtered_df['flag'].unique():
        if start==0 and flag!=start_flag:
            start=1
            continue
        flag_filtered_df=filtered_df[filtered_df['flag']==flag]
        initial_row = flag_filtered_df.iloc[0]
        duration = duration+initial_row['duration']
        if start==1 and flag==end_flag:
            break
    
    # Calculate the end timestamp
    end_timestamp = initial_timestamp + duration
    
    # Select rows from the initial timestamp up to and including where Timestamp <= end_timestamp
    result_df = dataframe[(dataframe['onset'] >= initial_timestamp) & (dataframe['onset'] <= end_timestamp)]
    
    return result_df
# Load participant data
root_path="/Users/meis/Documents/Phd/fer_BIDS/"
# root_path=""
participants_df = pd.read_csv(root_path+"participants.tsv", sep="\t")
gaze_json_path = '/Users/meis/Documents/codes/fer_BIDS/fer_BIDS/sub-acl/beh/sub-acl_task-fer_run-0_recording-gaze_physio.json'
pupil_json_path = '/Users/meis/Documents/codes/fer_BIDS/fer_BIDS/sub-acl/beh/sub-acl_task-fer_run-0_recording-pupil_physio.json'
with open(pupil_json_path, 'r') as file:
    pupil_json_data = json.load(file)
pupil_headers = pupil_json_data['Columns']
with open(gaze_json_path, 'r') as file:
    gaze_json_data = json.load(file)
gaze_headers = gaze_json_data['Columns']
missing = []
metrics_all = []
stim_file = ""
headers=['onset', 'TIME', 'FPOGX', 'FPOGY', 'FPOGS', 'FPOGD', 'FPOGID', 'FPOGV',
       'LPOGX', 'LPOGY', 'LPOGV', 'RPOGX', 'RPOGY', 'RPOGV', 'BPOGX', 'BPOGY',
       'BPOGV', 'LPCX', 'LPCY', 'LPD', 'LPS', 'LPV', 'RPCX', 'RPCY', 'RPD',
       'RPS', 'RPV', 'LEYEX', 'LEYEY', 'LEYEZ', 'LPUPILD', 'LPUPILV', 'REYEX',
       'REYEY', 'REYEZ', 'RPUPILD', 'RPUPILV', 'duration', 'trial_type',
       'flag', 'subject', 'run', 'trial', 'local_time', 'stim_file']
# root_path="/Users/meis/Documents/Phd/fer_BIDS/"
for user in participants_df['participant_id'].unique():
    user_df = participants_df[participants_df['participant_id'] == user]
    openness = user_df['O'].values[0]
    conscientiousness = user_df['C'].values[0]
    extraversion = user_df['E'].values[0]
    agreeableness = user_df['A'].values[0]
    neuroticism = user_df['N'].values[0]
    
    for run in range(4):
        print(user, run)
        gaze_path = root_path+user + "/beh/" + user + "_task-fer_run-" + str(run) + "_recording-gaze_physio.tsv.gz"
        pupil_path = root_path+user + "/beh/" + user + "_task-fer_run-" + str(run) + "_recording-pupil_physio.tsv.gz"
        gaze_json_path = root_path+user + "/beh/" + user + "_task-fer_run-" + str(run) + "_recording-gaze_physio.json"
        pupil_json_path = root_path+user + "/beh/" + user + "_task-fer_run-" + str(run) + "_recording-pupil_physio.json"
        events_path =root_path+ user + "/" + user + "_task-fer_run-" + str(run) + "_events.tsv"
        labels_path = root_path+user + "/beh/" + user + "_task-fer_run-" + str(run) + "_beh.tsv"
        
        if os.path.exists(gaze_json_path) and os.path.exists(pupil_json_path):
                
            # gaze_data = pd.read_csv(gaze_path, sep='\t', compression='gzip',names=pupil_headers)
            # pupil_data = pd.read_csv(pupil_path, sep='\t', compression='gzip',names=gaze_headers)
            gaze_data = pd.read_csv(gaze_path, sep='\t', compression='gzip',names=pupil_headers)
            pupil_data = pd.read_csv(pupil_path, sep='\t', compression='gzip',names=gaze_headers).drop(columns=["TIME"])
            gaze_data['onset'] = gaze_data['onset'] - gaze_data['onset'].values[0]
            pupil_data['onset'] = pupil_data['onset'] - pupil_data['onset'].values[0]
            
            gaze_data = gaze_data.dropna(subset=['onset'])
            pupil_data = pupil_data.dropna(subset=['onset'])
            
            gaze_data = gaze_data.sort_values('onset')
            pupil_data = pupil_data.sort_values('onset')
            
            eye_data_merged = pd.merge_asof(gaze_data, pupil_data, on=['onset'], direction='backward')

            # eye_data_merged = pd.merge(gaze_data, pupil_data, on='onset', how='left')
            events = pd.read_csv(events_path, sep='\t').dropna(subset=['onset'])
            labels = pd.read_csv(labels_path, sep='\t')
            
            eye_data_merged = pd.merge_asof(eye_data_merged, events, on=['onset'], direction='backward')
            eye_data_merged.columns=headers
            for stim_file in eye_data_merged['stim_file'].unique():
                try:
                    metrics = {}
                    # eye_trial = extract_segment(eye_data_merged, stim_file, 'second_fix','last_frame_video')
                    eye_trial = eye_data_merged[(eye_data_merged['stim_file'] == stim_file)]
                    eye_trial = eye_trial.reset_index(drop=True)
                    label_line = labels[labels['stim_file'] == stim_file]
                    # eyedata_headers=eye_trial.columns
                    eye_trial.columns=headers
                    metrics["Eye_Data"] = eye_trial
                    metrics['user'] = user
                    metrics['run'] = run
                    metrics['stim_file']=stim_file
                    metrics['trial'] = label_line['trial'].values[0]
                    metrics['stim_emo'] = label_line['trial_type'].values[0]
                    metrics['preceived_arousal'] = label_line['p_emotion_a'].values[0]
                    metrics['preceived_valance'] = label_line['p_emotion_v'].values[0]
                    metrics['felt_arousal'] = label_line['f_emotion_a'].values[0]
                    metrics['felt_valance'] = label_line['f_emotion_v'].values[0]
                    metrics['openness'] = openness#+random.uniform(-2, 2)
                    metrics['conscientiousness'] = conscientiousness#+random.uniform(-2, 2)
                    metrics['extraversion'] = extraversion#+random.uniform(-2, 2)
                    metrics['agreeableness'] = agreeableness#+random.uniform(-2, 2)
                    metrics['neuroticism'] = neuroticism#+random.uniform(-2, 2)
                    
                    metrics_all.append(metrics)
                except:
                    print(stim_file, user, run)
                    missing.append(stim_file + user + str(run))
        else:
            missing.append(stim_file + user + str(run))

eye_df = pd.DataFrame(metrics_all)


In [None]:


import pandas as pd
import json
import os
import neurokit2 as nk
import numpy as np
import random
participants_df=pd.read_csv("participants.tsv",sep="\t")
participants_df['participant_id'].unique()
# Load headers from JSON
# user='sub-agk'
# run=0
column_to_check = "GSR_Conductance_cal"
def index_finder(data,start,end):
    start_ind=0
    end_ind=len(result)
    for i,peak in enumerate(data['SCR_Onsets']):
        # print(i,peak)
        if peak<start:
            start_ind=i
        if peak<end:
            end_ind=i
    return start_ind,end_ind
def extract_segment(dataframe, stim_file, flag):
    # Filter to get rows with matching 'stim_file' and 'flag'
    filtered_df = dataframe[(dataframe['stim_file'] == stim_file)]
    
    if filtered_df.empty:
        return None  # Return None if no matching rows found
    
    # Assuming the first matching row is the one to start from
    initial_row = filtered_df.iloc[0]
    initial_timestamp = initial_row['onset']
    duration=0
    for flag in filtered_df['flag'].unique():
        flag_filtered_df=filtered_df[filtered_df['flag']==flag]
        initial_row = flag_filtered_df.iloc[0]
        duration = duration+initial_row['duration']
        if flag=="trial":
            break
    
    # Calculate the end timestamp
    end_timestamp = initial_timestamp + duration
    
    # Select rows from the initial timestamp up to and including where Timestamp <= end_timestamp
    result_df = dataframe[(dataframe['onset'] >= initial_timestamp) & (dataframe['onset'] <= end_timestamp)]
    
    return result_df
def generate_gsr_report(results,start_ind,end_ind):
    """
    Generate a GSR report from the given results structure.

    Parameters:
    - results: dict, containing processed GSR metrics (e.g., SCR_Onsets, SCR_Peaks, etc.)

    Returns:
    - report: dict, a structured summary of the results.
    """
    # Extract metrics
    # scr_onsets = results['SCR_Onsets']
    # scr_peaks = results['SCR_Peaks']
    # scr_height = results['SCR_Height']
    # scr_amplitude = results['SCR_Amplitude']
    # scr_rise_time = results['SCR_RiseTime']
    # scr_recovery = results['SCR_Recovery']
    # scr_recovery_time = results['SCR_RecoveryTime']
    # sampling_rate = results['sampling_rate']

    # Calculate summary statistics
    scr_onsets = (results['SCR_Onsets']-start_ind)/(end_ind-start_ind)
    scr_peaks = results['SCR_Peaks']
    scr_height = results['SCR_Height']
    scr_amplitude = results['SCR_Amplitude']
    scr_rise_time = results['SCR_RiseTime']
    scr_recovery = results['SCR_Recovery']
    scr_recovery_time = results['SCR_RecoveryTime']
    sampling_rate = results['sampling_rate']

    # Calculate the number of peaks
    num_peaks = len(scr_onsets)

    # Prepare detailed metrics as a flattened dictionary
    report = {
        "Number of Peaks": num_peaks,

        "SCR_Onsets mean": np.mean(scr_onsets) ,
        "SCR_Onsets median": np.median(scr_onsets) ,
        "SCR_Onsets min": np.min(scr_onsets) ,
        "SCR_Onsets max": np.max(scr_onsets) ,
        "SCR_Onsets STD": np.std(scr_onsets) ,
        
        "SCR_Amplitude mean": np.nanmean(scr_amplitude),
        "SCR_Amplitude median": np.nanmedian(scr_amplitude),
        "SCR_Amplitude min": np.nanmin(scr_amplitude),
        "SCR_Amplitude max": np.nanmax(scr_amplitude),
        "SCR_Amplitude STD": np.nanstd(scr_amplitude),
        
        "SCR_Height mean": np.nanmean(scr_height),
        "SCR_Height median": np.nanmedian(scr_height),
        "SCR_Height min": np.nanmin(scr_height),
        "SCR_Height max": np.nanmax(scr_height),
        "SCR_Height STD": np.nanstd(scr_height),
        
        "SCR_RiseTime mean": np.nanmean(scr_rise_time),
        "SCR_RiseTime median": np.nanmedian(scr_rise_time),
        "SCR_RiseTime min": np.nanmin(scr_rise_time),
        "SCR_RiseTime max": np.nanmax(scr_rise_time),
        "SCR_RiseTime STD": np.nanstd(scr_rise_time),
        
        "SCR_Recovery mean": np.nanmean(scr_recovery),
        "SCR_Recovery median": np.nanmedian(scr_recovery),
        "SCR_Recovery min": np.nanmin(scr_recovery),
        "SCR_Recovery max": np.nanmax(scr_recovery),
        "SCR_Recovery STD": np.nanstd(scr_recovery),
        
        "SCR_RecoveryTime mean": np.nanmean(scr_recovery_time),
        "SCR_RecoveryTime median": np.nanmedian(scr_recovery_time),
        "SCR_RecoveryTime min": np.nanmin(scr_recovery_time),
        "SCR_RecoveryTime max": np.nanmax(scr_recovery_time),
        "SCR_RecoveryTime STD": np.nanstd(scr_recovery_time),
        
        "Sampling Rate": sampling_rate
    }

    # Add detailed metrics to the report
    # report = {
    #     "Detailed Metrics": detailed_metrics
    # }

    return report
def calculate_gsr_metrics_with_dynamic_range(data,flag, sampling_rate,start_delay=2,dwel_time=5):
    """
    Calculate GSR metrics within a dynamic time range based on flags.
    
    Parameters:
    - gsr_trial: dict or DataFrame, containing GSR data and flags.
    - sampling_rate: int, the sampling rate of the data (e.g., 100 Hz).
    
    Returns:
    - results: dict, containing GSR metrics in the specified format.
    """
    import numpy as np

    # Ensure sampling_rate is an integer
    samplerate = int(sampling_rate)
    
    # Extract flags and ensure they are a list
    #flags = list(gsr_trial["flag"]) if not isinstance(gsr_trial["flag"], list) else gsr_trial["flag"]
    
    # Extract raw GSR signal
    # column_to_check = "GSR_Conductance_cal"  # Replace with the actual column name in your gsr_trial data
    # rawGSRSignal = np.array(gsr_trial[column_to_check])
    
    # # Analyze GSR data using `eda.analyzeGSR`
    # #data = eda.analyzeGSR(rawGSRSignal, samplerate, preprocessing=True, lowpass=1, highpass=0.05, phasic_seconds=4)
    # peaks, data = nk.eda_peaks(nk.standardize(rawGSRSignal), sampling_rate=samplerate)
    # Find start and end indices
    try:
        start_ind= int(start_delay*samplerate)+int(flag.index[flag == "video"][0])
        end_ind= int(flag.index[flag == "last_frame_video"][0]) + int(dwel_time * samplerate)
    except ValueError as e:
        return {"error": f"Could not find required flags: {str(e)}"}
    
    # Convert data.keys() to a sorted list
    #data_keys = sorted(data.keys())
    # print(start_index,end_index)
    # print(data)
    start_index,end_index=index_finder(data,start_ind,end_ind)
    # Adjust start and end indices
    # print(start_index,end_index)
    # start_index, end_index = max(0, start_index), min(len(data) - 1, end_index)
    # print(start_index,end_index)
    # data_cut=data[start_index:end_index]

    sliced_data = {key: value[int(start_index):int(end_index) + 1] 
               for key, value in result.items() 
               if isinstance(value, np.ndarray)}
    sliced_data['sampling_rate']=samplerate
    # print(sliced_data)
    # Determine start and end times
    results=generate_gsr_report(sliced_data,start_ind,end_ind)
    

    return results
missing=[]
metrics_all=[]
for user in participants_df['participant_id'].unique():
    print(user)
    user_df=participants_df[participants_df['participant_id']==user]
    openness = user_df['O'].values[0]
    conscientiousness = user_df['C'].values[0]
    extraversion = user_df['E'].values[0]
    agreeableness = user_df['A'].values[0]
    neuroticism = user_df['N'].values[0]
    for run in range(4):
        
        json_path=user+"/beh/"+user+"_task-fer_run-"+str(run)+"_recording-gsr_physio.json"
        if os.path.exists(json_path):
            gsr_path=user+"/beh/"+user+"_task-fer_run-"+str(run)+"_recording-gsr_physio.tsv.gz"
            events_path=user+"/"+user+"_task-fer_run-"+str(run)+"_events.tsv"
            labels_path=user+"/beh/"+user+"_task-fer_run-"+str(run)+"_beh.tsv"
            with open(json_path, 'r') as file:
                json_data = json.load(file)
            headers = json_data['Columns']
            
            # Read the gzipped TSV file using the loaded headers
            # gsr_data= pd.read_csv(gsr_path, sep='\t', compression='gzip')
            gsr_data= pd.read_csv(gsr_path, sep='\t', compression='gzip', names=headers)
            gsr_data_raw=gsr_data.copy()
            gsr_data['onset'] = pd.to_numeric(gsr_data['onset'], errors='coerce')
            # gsr_data['Timestamp'] = pd.to_numeric(gsr_data['Timestamp'], errors='coerce')
            # gsr_data['onset']=gsr_data['onset']-gsr_data['onset'].values[0]
            # gsr_data['Timestamp']=gsr_data['Timestamp']-gsr_data['Timestamp'].values[0]
            gsr_data.head()
            events = pd.read_csv(events_path, sep='\t')
            labels = pd.read_csv(labels_path, sep='\t')
            # Example of integrating events, this depends on how timestamps align
            # This is a placeholder for merging or aligning logic
            # Drop rows where 'onset' is NaN in both DataFrames
            gsr_data = gsr_data.dropna(subset=['onset'])
            events = events.dropna(subset=['onset'])

            # Ensure both DataFrames are sorted by 'onset'
            gsr_data = gsr_data.sort_values('onset')
            events = events.sort_values('onset')
            # sampling_rate=len(gsr_data_merged['onset'])/(gsr_data_merged['onset'].iloc[0]-gsr_data_merged['onset'].iloc[-1])
            # Perform the merge_asof
            gsr_data_merged = pd.merge_asof(gsr_data, events, on='onset', direction='backward')
            sampling_rate=len(gsr_data_merged['onset'])/(gsr_data_merged['onset'].iloc[-1]-gsr_data_merged['onset'].iloc[0])
            
            for stim_file in gsr_data_merged['stim_file'].unique():
                try:
                    gsr_trial = extract_segment(gsr_data_merged, stim_file, "trial")
                    # gsr_trial=gsr_data_merged[gsr_data_merged['stim_file']==stim_file]
                    gsr_trial = gsr_trial.reset_index(drop=True)
                    
                    sampling_rate = int(sampling_rate)  # Ensuring samplerate is an integer
                    rawGSRSignal = np.array(gsr_trial[column_to_check])  # Ensuring the signal is in an acceptable format
                    # print(sampling_rate)
                    # Now call the function with corrected parameters
                    data, result = nk.eda_process(nk.standardize(rawGSRSignal), sampling_rate=sampling_rate,method='neurokit')
                    # print(data.columns)
                    tonic=data["EDA_Tonic"]
                    phasic=data["EDA_Phasic"]
                    label_line=labels[labels['stim_file']==stim_file]
                    # print(stim_file,user,run)
                    metrics = calculate_gsr_metrics_with_dynamic_range(result,gsr_trial['flag'], sampling_rate,start_delay=0,dwel_time=10)
                    metrics['user']=user
                    metrics['run']=run
                    metrics['stim_file']=stim_file
                    metrics['trial'] = label_line['trial'].values[0]
                    metrics['stim_emo']=label_line['trial_type'].values[0]
                    metrics['preceived_arousal']=label_line['p_emotion_a'].values[0]
                    metrics['preceived_valance']=label_line['p_emotion_v'].values[0]
                    metrics['felt_arousal']=label_line['f_emotion_a'].values[0]
                    metrics['felt_valance']=label_line['f_emotion_v'].values[0]
                    metrics['openness'] = openness#+random.uniform(-2, 2)
                    metrics['conscientiousness'] = conscientiousness#+random.uniform(-2, 2)
                    metrics['extraversion'] = extraversion#+random.uniform(-2, 2)
                    metrics['agreeableness'] = agreeableness#+random.uniform(-2, 2)
                    metrics['neuroticism'] = neuroticism#+random.uniform(-2, 2)
                    metrics_all.append(metrics)
                except:
                    print(stim_file,user,run)
                    missing.append(stim_file+user+str(run))
        else:
            missing.append(stim_file+user+str(run))
gsr_df=pd.DataFrame(metrics_all)

In [4]:
eye_df_pure=eye_df.drop(columns=['openness', 'conscientiousness', 'extraversion',
       'agreeableness', 'neuroticism'])
au_df_pure=au_df.drop(columns=['openness', 'conscientiousness', 'extraversion',
       'agreeableness', 'neuroticism'])
# gsr_df=df.drop(columns=['openness', 'conscientiousness', 'extraversion',
#        'agreeableness', 'neuroticism'])

df_merge=pd.merge(pd.merge(eye_df_pure,au_df_pure,on=['user', 'run', 'stim_file', 'trial', 'stim_emo','preceived_arousal', 'preceived_valance', 'felt_arousal','felt_valance'],  how='inner'),gsr_df,on=['user', 'run', 'stim_file', 'stim_emo','preceived_arousal', 'preceived_valance', 'felt_arousal','felt_valance'],  how='inner')

# Code Overview

This code is a comprehensive data processing pipeline that extracts and processes multimodal data (Action Units, Eye Tracking, and GSR) for a set of participants. The processing is modularized into helper functions and dedicated processing functions for each modality, ultimately merging the results into a single DataFrame for further analysis.

---

## Structure and Components

### 1. Helper Functions

- **get_personality_metrics**  
  Extracts personality metrics (openness, conscientiousness, extraversion, agreeableness, neuroticism) from the participant data.

- **merge_data_with_events**  
  Merges any given data with corresponding event information using an as-of merge based on the 'onset' timestamp.

- **extract_segment_single_flag & extract_segment_two_flags**  
  Extracts segments from the data based on specified flag criteria:
  - **Single Flag Version:** Extracts a segment starting from a given `stim_file`, summing durations until the flag `"trial"` is encountered.
  - **Two Flags Version:** Extracts a segment using both a starting flag and an ending flag, summing durations accordingly.

- **GSR-Related Helpers:**  
  - **index_finder:** Identifies the start and end indices within the GSR data based on SCR onsets.
  - **generate_gsr_report:** Calculates summary statistics (mean, median, min, max, STD) for various GSR metrics such as SCR Onsets, Amplitude, Height, Rise Time, Recovery, and Recovery Time.
  - **calculate_gsr_metrics_with_dynamic_range:** Processes GSR data by dynamically determining the time window based on provided flags and generating a corresponding report.

---

### 2. Processing Functions

- **process_au_data**  
  Processes Action Unit (AU) data:
  - Reads the necessary JSON and TSV files.
  - Converts specified AU columns to numeric.
  - Merges the AU data with event data.
  - Extracts the relevant segment for each stimulus and compiles the results along with participant personality metrics.

- **process_eye_data**  
  Processes Eye Tracking data:
  - Reads gaze and pupil data files along with their JSON metadata.
  - Aligns and merges the two data sources.
  - Merges the resulting eye data with event information.
  - Extracts data for each stimulus and attaches corresponding personality metrics.

- **process_gsr_data**  
  Processes GSR data:
  - Reads the GSR data and event files.
  - Merges the GSR data with events.
  - Uses NeuroKit2 to process the standardized GSR signal.
  - Extracts the relevant segment using flag-based extraction and computes dynamic GSR metrics.
  - Compiles these metrics with personality and trial-related information.

---

### 3. Main Function

- **main**  
  The central function that orchestrates the entire pipeline:
  - Loads the participants file.
  - Calls each of the modality-specific processing functions (AU, Eye, and GSR).
  - Merges the processed data frames on common columns to create a unified dataset.
  - Returns the merged DataFrame along with lists of any missing files or processing errors.

---

## Execution Flow

1. **Loading Data:**  
   Participant data is loaded from a TSV file.

2. **Processing Modalities:**  
   - AU data, Eye Tracking data, and GSR data are processed independently using their respective functions.
   - Each function reads the necessary files, performs data cleaning, merges with event data, and extracts segments relevant to each trial.

3. **Merging Results:**  
   The final step merges the pure data frames from AU and Eye processing with the GSR data frame based on shared identifiers (e.g., user, run, stim_file, trial information).

4. **Error Handling:**  
   Each processing function uses `try/except` blocks to handle missing or problematic files, logging any issues in a dedicated list for review.

---

## Customization and Adaptability

- **Modality-Specific Settings:**  
  Parameters such as the list of AU columns or GSR analysis thresholds can be modified to suit different datasets or experimental requirements.

- **File Paths:**  
  The code utilizes formatted strings for file paths, making it easy to adapt for dif


In [10]:
import os
import json
import random
import numpy as np
import pandas as pd
import neurokit2 as nk

# =============================================================================
# Helper Functions
# =============================================================================
def get_personality_metrics(user_df):
    """Extract personality metrics from a participant's row."""
    return {
        "openness": user_df["O"].values[0],
        "conscientiousness": user_df["C"].values[0],
        "extraversion": user_df["E"].values[0],
        "agreeableness": user_df["A"].values[0],
        "neuroticism": user_df["N"].values[0]
    }

def merge_data_with_events(data, events_path, on='onset'):
    """Merge a data DataFrame with events using merge_asof."""
    events = pd.read_csv(events_path, sep='\t').dropna(subset=[on])
    data = data.dropna(subset=[on]).sort_values(on)
    events = events.sort_values(on)
    merged = pd.merge_asof(data, events, on=on, direction='backward')
    return merged

def extract_segment_single_flag(dataframe, stim_file, flag):
    """
    Extract a segment based on a single flag.
    
    The function finds the first occurrence of stim_file and then sums durations
    of subsequent unique flag groups until the flag "trial" is encountered.
    """
    filtered_df = dataframe[dataframe['stim_file'] == stim_file]
    if filtered_df.empty:
        return None
    initial_timestamp = filtered_df.iloc[0]['onset']
    duration = 0
    for f in filtered_df['flag'].unique():
        flag_df = filtered_df[filtered_df['flag'] == f]
        duration += flag_df.iloc[0]['duration']
        if f == "trial":
            break
    end_timestamp = initial_timestamp + duration
    result_df = dataframe[(dataframe['onset'] >= initial_timestamp) & 
                          (dataframe['onset'] <= end_timestamp)]
    return result_df

def extract_segment_two_flags(dataframe, stim_file, start_flag, end_flag):
    """
    Extract a segment using a start and end flag.
    
    Starts at the first occurrence of start_flag and sums durations until the end_flag.
    """
    filtered_df = dataframe[dataframe['stim_file'] == stim_file]
    if filtered_df.empty:
        return None
    initial_timestamp = filtered_df[filtered_df['flag'] == start_flag]['onset'].iloc[0]
    duration = 0
    started = False
    for f in filtered_df['flag'].unique():
        if not started and f != start_flag:
            started = True
            continue
        flag_df = filtered_df[filtered_df['flag'] == f]
        duration += flag_df.iloc[0]['duration']
        if started and f == end_flag:
            break
    end_timestamp = initial_timestamp + duration
    result_df = dataframe[(dataframe['onset'] >= initial_timestamp) & 
                          (dataframe['onset'] <= end_timestamp)]
    return result_df

# --- GSR Helper Functions ---
def index_finder(data, start, end):
    """Find start and end indices from the data's SCR_Onsets."""
    start_ind = 0
    end_ind = len(data.get('SCR_Onsets', []))
    for i, peak in enumerate(data.get('SCR_Onsets', [])):
        if peak < start:
            start_ind = i
        if peak < end:
            end_ind = i
    return start_ind, end_ind

def generate_gsr_report(results, start_ind, end_ind):
    """
    Generate a GSR report from the given results structure.
    Calculates summary statistics for each metric.
    """
    scr_onsets = (results['SCR_Onsets'] - start_ind) / (end_ind - start_ind)
    report = {
        "Number of Peaks": len(scr_onsets),
        "SCR_Onsets mean": np.mean(scr_onsets),
        "SCR_Onsets median": np.median(scr_onsets),
        "SCR_Onsets min": np.min(scr_onsets),
        "SCR_Onsets max": np.max(scr_onsets),
        "SCR_Onsets STD": np.std(scr_onsets),
        "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
        "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
        "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
        "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
        "SCR_Amplitude STD": np.nanstd(results['SCR_Amplitude']),
        "SCR_Height mean": np.nanmean(results['SCR_Height']),
        "SCR_Height median": np.nanmedian(results['SCR_Height']),
        "SCR_Height min": np.nanmin(results['SCR_Height']),
        "SCR_Height max": np.nanmax(results['SCR_Height']),
        "SCR_Height STD": np.nanstd(results['SCR_Height']),
        "SCR_RiseTime mean": np.nanmean(results['SCR_RiseTime']),
        "SCR_RiseTime median": np.nanmedian(results['SCR_RiseTime']),
        "SCR_RiseTime min": np.nanmin(results['SCR_RiseTime']),
        "SCR_RiseTime max": np.nanmax(results['SCR_RiseTime']),
        "SCR_RiseTime STD": np.nanstd(results['SCR_RiseTime']),
        "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
        "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
        "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
        "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
        "SCR_Recovery STD": np.nanstd(results['SCR_Recovery']),
        "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
        "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
        "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
        "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
        "SCR_RecoveryTime STD": np.nanstd(results['SCR_RecoveryTime']),
        "Sampling Rate": results['sampling_rate']
    }
    return report

def calculate_gsr_metrics_with_dynamic_range(data, flags, sampling_rate, start_delay=2, dwel_time=5):
    """
    Calculate GSR metrics within a dynamic time range based on flags.
    """
    samplerate = int(sampling_rate)
    try:
        # These calculations depend on the flags index positions
        start_ind_flag = int(start_delay * samplerate) + int(flags.index[flags == "video"][0])
        end_ind_flag = int(flags.index[flags == "last_frame_video"][0]) + int(dwel_time * samplerate)
    except Exception as e:
        return {"error": f"Could not find required flags: {str(e)}"}
    start_index, end_index = index_finder(data, start_ind_flag, end_ind_flag)
    sliced_data = {key: value[int(start_index):int(end_index) + 1]
                   for key, value in data.items() if isinstance(value, np.ndarray)}
    sliced_data['sampling_rate'] = samplerate
    results = generate_gsr_report(sliced_data, start_ind_flag, end_ind_flag)
    return results

# =============================================================================
# Processing Functions for Each Modality
# =============================================================================
def process_au_data(participants_df, au_columns_subset):
    """Process Action Unit (AU) data for all participants and runs."""
    metrics_all = []
    missing = []
    for user in participants_df['participant_id'].unique():
        user_df = participants_df[participants_df['participant_id'] == user]
        personality = get_personality_metrics(user_df)
        for run in range(4):
            print(user, run)
            json_path = f"{user}/beh/{user}_task-fer_run-{run}_recording-videostream_physio.json"
            if not os.path.exists(json_path):
                missing.append(f"{user}_{run}_au")
                continue

            au_path = f"{user}/beh/{user}_task-fer_run-{run}_recording-videostream_physio.tsv.gz"
            events_path = f"{user}/{user}_task-fer_run-{run}_events.tsv"
            labels_path = f"{user}/beh/{user}_task-fer_run-{run}_beh.tsv"

            with open(json_path, 'r') as file:
                json_data = json.load(file)
            headers = json_data['Columns']
            au_data = pd.read_csv(au_path, sep='\t', compression='gzip', names=headers)

            # Convert selected columns to numeric
            for tag in au_columns_subset:
                au_data[tag] = pd.to_numeric(au_data[tag], errors='coerce')
            au_data['onset'] = au_data['onset'] - au_data['onset'].values[0]
            au_data = merge_data_with_events(au_data, events_path, on='onset')

            # Keep only the desired columns
            au_list = ['onset', 'confidence', 'success', 'AU01_r', 'AU02_r', 'AU04_r',
                       'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r',
                       'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r',
                       'duration', 'trial_type', 'flag', 'subject', 'run',
                       'trial', 'local_time', 'stim_file']
            au_data = au_data[au_list]

            try:
                for stim_file in au_data['stim_file'].unique():
                    metrics = {}
                    # Here we use the entire segment for the given stim_file
                    au_trial = au_data[au_data['stim_file'] == stim_file].reset_index(drop=True)
                    label_line = pd.read_csv(labels_path, sep='\t')
                    label_line = label_line[label_line['stim_file'] == stim_file]

                    metrics["AUs"] = au_trial
                    metrics.update({
                        'user': user,
                        'run': run,
                        'stim_file': stim_file,
                        'trial': label_line['trial'].values[0],
                        'stim_emo': label_line['trial_type'].values[0],
                        'preceived_arousal': label_line['p_emotion_a'].values[0],
                        'preceived_valance': label_line['p_emotion_v'].values[0],
                        'felt_arousal': label_line['f_emotion_a'].values[0],
                        'felt_valance': label_line['f_emotion_v'].values[0],
                    })
                    metrics.update(personality)
                    metrics_all.append(metrics)
            except Exception as e:
                print(stim_file, user, run, e)
                missing.append(f"{stim_file}_{user}_{run}_au")
    return pd.DataFrame(metrics_all), missing

def process_eye_data(participants_df, root_path, headers):
    """Process Eye Tracking data for all participants and runs."""
    metrics_all = []
    missing = []
    for user in participants_df['participant_id'].unique():
        user_df = participants_df[participants_df['participant_id'] == user]
        personality = get_personality_metrics(user_df)
        for run in range(4):
            print(user, run)
            gaze_path = os.path.join(root_path, f"{user}/beh/{user}_task-fer_run-{run}_recording-gaze_physio.tsv.gz")
            pupil_path = os.path.join(root_path, f"{user}/beh/{user}_task-fer_run-{run}_recording-pupil_physio.tsv.gz")
            gaze_json_path = os.path.join(root_path, f"{user}/beh/{user}_task-fer_run-{run}_recording-gaze_physio.json")
            pupil_json_path = os.path.join(root_path, f"{user}/beh/{user}_task-fer_run-{run}_recording-pupil_physio.json")
            events_path = os.path.join(root_path, f"{user}/{user}_task-fer_run-{run}_events.tsv")
            labels_path = os.path.join(root_path, f"{user}/beh/{user}_task-fer_run-{run}_beh.tsv")

            if not (os.path.exists(gaze_json_path) and os.path.exists(pupil_json_path)):
                missing.append(f"{user}_{run}_eye")
                continue

            with open(pupil_json_path, 'r') as file:
                pupil_json_data = json.load(file)
            pupil_headers = pupil_json_data['Columns']
            with open(gaze_json_path, 'r') as file:
                gaze_json_data = json.load(file)
            gaze_headers = gaze_json_data['Columns']

            gaze_data = pd.read_csv(gaze_path, sep='\t', compression='gzip', names=pupil_headers)
            pupil_data = pd.read_csv(pupil_path, sep='\t', compression='gzip', names=gaze_headers).drop(columns=["TIME"])

            gaze_data['onset'] = gaze_data['onset'] - gaze_data['onset'].values[0]
            pupil_data['onset'] = pupil_data['onset'] - pupil_data['onset'].values[0]

            gaze_data = gaze_data.dropna(subset=['onset']).sort_values('onset')
            pupil_data = pupil_data.dropna(subset=['onset']).sort_values('onset')

            eye_data_merged = pd.merge_asof(gaze_data, pupil_data, on='onset', direction='backward')
            events = pd.read_csv(events_path, sep='\t').dropna(subset=['onset'])
            labels = pd.read_csv(labels_path, sep='\t')

            eye_data_merged = pd.merge_asof(eye_data_merged, events, on='onset', direction='backward')
            eye_data_merged.columns = headers

            for stim_file in eye_data_merged['stim_file'].unique():
                try:
                    metrics = {}
                    eye_trial = eye_data_merged[eye_data_merged['stim_file'] == stim_file].reset_index(drop=True)
                    label_line = labels[labels['stim_file'] == stim_file]
                    eye_trial.columns = headers

                    metrics["Eye_Data"] = eye_trial
                    metrics.update({
                        'user': user,
                        'run': run,
                        'stim_file': stim_file,
                        'trial': label_line['trial'].values[0],
                        'stim_emo': label_line['trial_type'].values[0],
                        'preceived_arousal': label_line['p_emotion_a'].values[0],
                        'preceived_valance': label_line['p_emotion_v'].values[0],
                        'felt_arousal': label_line['f_emotion_a'].values[0],
                        'felt_valance': label_line['f_emotion_v'].values[0],
                    })
                    metrics.update(personality)
                    metrics_all.append(metrics)
                except Exception as e:
                    print(stim_file, user, run, e)
                    missing.append(f"{stim_file}_{user}_{run}_eye")
    return pd.DataFrame(metrics_all), missing

def process_gsr_data(participants_df, column_to_check):
    """Process GSR data for all participants and runs."""
    metrics_all = []
    missing = []
    for user in participants_df['participant_id'].unique():
        print(user)
        user_df = participants_df[participants_df['participant_id'] == user]
        personality = get_personality_metrics(user_df)
        for run in range(4):
            json_path = f"{user}/beh/{user}_task-fer_run-{run}_recording-gsr_physio.json"
            if not os.path.exists(json_path):
                missing.append(f"{user}_{run}_gsr")
                continue

            gsr_path = f"{user}/beh/{user}_task-fer_run-{run}_recording-gsr_physio.tsv.gz"
            events_path = f"{user}/{user}_task-fer_run-{run}_events.tsv"
            labels_path = f"{user}/beh/{user}_task-fer_run-{run}_beh.tsv"

            with open(json_path, 'r') as file:
                json_data = json.load(file)
            headers = json_data['Columns']
            gsr_data = pd.read_csv(gsr_path, sep='\t', compression='gzip', names=headers)
            gsr_data['onset'] = pd.to_numeric(gsr_data['onset'], errors='coerce')
            gsr_data = gsr_data.dropna(subset=['onset']).sort_values('onset')

            events = pd.read_csv(events_path, sep='\t').dropna(subset=['onset']).sort_values('onset')
            gsr_data_merged = pd.merge_asof(gsr_data, events, on='onset', direction='backward')
            sampling_rate = len(gsr_data_merged['onset']) / (
                gsr_data_merged['onset'].iloc[-1] - gsr_data_merged['onset'].iloc[0]
            )

            for stim_file in gsr_data_merged['stim_file'].unique():
                try:
                    gsr_trial = extract_segment_single_flag(gsr_data_merged, stim_file, "trial")
                    gsr_trial = gsr_trial.reset_index(drop=True)
                    sampling_rate_int = int(sampling_rate)
                    rawGSRSignal = np.array(gsr_trial[column_to_check])
                    data, result = nk.eda_process(nk.standardize(rawGSRSignal),
                                                  sampling_rate=sampling_rate_int,
                                                  method='neurokit')
                    # Optional: use tonic and phasic if needed
                    tonic = data["EDA_Tonic"]
                    phasic = data["EDA_Phasic"]

                    label_line = pd.read_csv(labels_path, sep='\t')
                    label_line = label_line[label_line['stim_file'] == stim_file]

                    metrics = calculate_gsr_metrics_with_dynamic_range(result, gsr_trial['flag'],
                                                                       sampling_rate_int,
                                                                       start_delay=0, dwel_time=10)
                    metrics.update({
                        'user': user,
                        'run': run,
                        'stim_file': stim_file,
                        'trial': label_line['trial'].values[0],
                        'stim_emo': label_line['trial_type'].values[0],
                        'preceived_arousal': label_line['p_emotion_a'].values[0],
                        'preceived_valance': label_line['p_emotion_v'].values[0],
                        'felt_arousal': label_line['f_emotion_a'].values[0],
                        'felt_valance': label_line['f_emotion_v'].values[0],
                    })
                    metrics.update(personality)
                    metrics_all.append(metrics)
                except Exception as e:
                    print(stim_file, user, run, e)
                    missing.append(f"{stim_file}_{user}_{run}_gsr")
    return pd.DataFrame(metrics_all), missing

# =============================================================================
# Main Function
# =============================================================================
def main():
    # Load participants file
    participants_df = pd.read_csv("participants.tsv", sep="\t")
    
    # --- Process AU Data ---
    au_columns_subset = ['onset', 'confidence', 'success', 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r',
                         'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r',
                         'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r']
    au_df, missing_au = process_au_data(participants_df, au_columns_subset)
    
    # --- Process Eye Data ---
    eye_headers = ['onset', 'TIME', 'FPOGX', 'FPOGY', 'FPOGS', 'FPOGD', 'FPOGID', 'FPOGV',
                   'LPOGX', 'LPOGY', 'LPOGV', 'RPOGX', 'RPOGY', 'RPOGV', 'BPOGX', 'BPOGY',
                   'BPOGV', 'LPCX', 'LPCY', 'LPD', 'LPS', 'LPV', 'RPCX', 'RPCY', 'RPD',
                   'RPS', 'RPV', 'LEYEX', 'LEYEY', 'LEYEZ', 'LPUPILD', 'LPUPILV', 'REYEX',
                   'REYEY', 'REYEZ', 'RPUPILD', 'RPUPILV', 'duration', 'trial_type',
                   'flag', 'subject', 'run', 'trial', 'local_time', 'stim_file']
    root_path = "/Users/meis/Documents/Phd/fer_BIDS/"
    eye_df, missing_eye = process_eye_data(participants_df, root_path, eye_headers)
    
    # --- Process GSR Data ---
    column_to_check = "GSR_Conductance_cal"
    gsr_df, missing_gsr = process_gsr_data(participants_df, column_to_check)
    
    # --- Merge DataFrames ---
    # Drop personality columns from eye and AU for the merge
    eye_df_pure = eye_df.drop(columns=['openness', 'conscientiousness', 'extraversion',
                                         'agreeableness', 'neuroticism'])
    au_df_pure = au_df.drop(columns=['openness', 'conscientiousness', 'extraversion',
                                       'agreeableness', 'neuroticism'])
    
    merged_df = pd.merge(
        pd.merge(eye_df_pure, au_df_pure,
                 on=['user', 'run', 'stim_file', 'trial', 'stim_emo',
                     'preceived_arousal', 'preceived_valance', 'felt_arousal', 'felt_valance'],
                 how='inner'),
        gsr_df,
        on=['user', 'run', 'stim_file', 'stim_emo', 'preceived_arousal',
            'preceived_valance', 'felt_arousal', 'felt_valance'],
        how='inner'
    )
    
    return merged_df, missing_au, missing_eye, missing_gsr

if __name__ == "__main__":
    merged_df, missing_au, missing_eye, missing_gsr = main()
    # You can now save or further process merged_df, and inspect missing_* lists if needed.


sub-agk 0
sub-agk 1
sub-agk 2
sub-agk 3
sub-bxn 0
sub-bxn 1
sub-bxn 2
sub-bxn 3
sub-ksu 0
sub-ksu 1
sub-ksu 2
sub-ksu 3
sub-bdn 0
sub-bdn 1
sub-bdn 2
sub-bdn 3
sub-tqu 0
sub-tqu 1
sub-tqu 2
sub-tqu 3
sub-bcz 0
sub-bcz 1
sub-bcz 2
sub-bcz 3
sub-ehk 0
sub-ehk 1
sub-ehk 2
sub-ehk 3
sub-mdt 0
sub-mdt 1
sub-mdt 2
sub-mdt 3
sub-lrc 0
sub-lrc 1
sub-lrc 2
sub-lrc 3
sub-cxy 0
sub-cxy 1
sub-cxy 2
sub-cxy 3
sub-ubc 0
sub-ubc 1
sub-ubc 2
sub-ubc 3
sub-nah 0
sub-nah 1
sub-nah 2
sub-nah 3
sub-dkf 0
sub-dkf 1
sub-dkf 2
sub-dkf 3
sub-xzc 0
sub-xzc 1
sub-xzc 2
sub-xzc 3
sub-fbj 0
sub-fbj 1
sub-fbj 2
sub-fbj 3
sub-yel 0
sub-yel 1
sub-yel 2
sub-yel 3
sub-tao 0
sub-tao 1
sub-tao 2
sub-tao 3
sub-tag 0
sub-tag 1
sub-tag 2
sub-tag 3
sub-ssn 0
sub-ssn 1
sub-ssn 2
sub-ssn 3
sub-hsc 0
sub-hsc 1
sub-hsc 2
sub-hsc 3
sub-acl 0
sub-acl 1
sub-acl 2
sub-acl 3
sub-ors 0
sub-ors 1
sub-ors 2
sub-ors 3
sub-rit 0
sub-rit 1
sub-rit 2
sub-rit 3
sub-zig 0
sub-zig 1
sub-zig 2
sub-zig 3
sub-oos 0
sub-oos 1
sub-oos 2
sub-oos 3


  au_data = pd.read_csv(au_path, sep='\t', compression='gzip', names=headers)


sub-yarq 2
sub-yarq 3
sub-ziym 0
sub-ziym 1
sub-ziym 2
sub-ziym 3
sub-kklo 0
sub-kklo 1
sub-kklo 2
sub-kklo 3
data/fea_ieo/1091_ieo_fea_md.mp4 sub-kklo 3 index 0 is out of bounds for axis 0 with size 0
sub-scuy 0
sub-scuy 1
sub-scuy 2
sub-scuy 3
sub-blgv 0
sub-blgv 1
sub-blgv 2
sub-blgv 3
sub-almn 0
sub-almn 1
sub-almn 2
sub-almn 3
sub-pkvd 0
sub-pkvd 1
sub-pkvd 2
sub-pkvd 3
sub-srfl 0
sub-srfl 1
sub-srfl 2
sub-srfl 3
sub-nvio 0
sub-nvio 1
sub-nvio 2
sub-nvio 3
sub-rokb 0
sub-rokb 1
sub-rokb 2
sub-rokb 3
sub-qwrt 0
sub-qwrt 1
sub-qwrt 2
sub-qwrt 3
sub-irma 0
sub-irma 1
sub-irma 2
sub-irma 3
sub-prvi 0
sub-prvi 1
sub-prvi 2
sub-prvi 3
sub-mlor 0
sub-mlor 1
sub-mlor 2
sub-mlor 3
sub-tico 0
sub-tico 1
sub-tico 2
sub-tico 3
sub-flrn 0
sub-flrn 1
sub-flrn 2
sub-flrn 3
sub-otsi 0
sub-otsi 1
sub-otsi 2
sub-otsi 3
sub-afri 0
sub-afri 1
sub-afri 2
sub-afri 3
sub-pwkb 0
sub-pwkb 1
sub-pwkb 2
sub-pwkb 3
sub-uwdm 0
sub-uwdm 1
sub-uwdm 2
sub-uwdm 3
sub-ymjj 0
sub-ymjj 1
sub-ymjj 2
sub-ymjj 3
sub-pt

  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_R

sub-bxn
sub-ksu


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-bdn


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
  "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
  "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
  "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RiseTime mean": np.nanmean

sub-tqu


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-bcz


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-ehk


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-mdt
sub-lrc
sub-cxy
sub-ubc
sub-nah


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
  "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
  "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
  "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RiseTime mean": np.nanmean(results['SCR_RiseTime']),
  "SCR_RiseTime median": np.nanmedian(results['SCR_RiseTime']),
  "SCR_RiseTime min": np.nanmin(results['SCR_RiseTime']),
  "SCR_RiseTime max": np.nanmax(results['SCR_RiseTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_Reco

sub-dkf
sub-xzc


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-fbj
sub-yel


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-tao
sub-tag


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-ssn
sub-hsc
sub-acl


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
  "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
  "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
  "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RiseTime mean": np.nanmean(results['SCR_RiseTime']),
  "SCR_RiseTime median": np.nanmedian(results['SCR_RiseTime']),
  "SCR_RiseTime min": np.nanmin(results['SCR_RiseTime']),
  "SCR_RiseTime max": np.nanmax(results['SCR_RiseTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_Reco

sub-ors
sub-rit
sub-zig
sub-oos
sub-jgs
sub-zry


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-pko


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])


data/neu_its/1027_its_neu_xx.mp4 sub-pko 3 index 0 is out of bounds for axis 0 with size 0
sub-rhn
sub-den
data/neu_tie/1036_tie_neu_xx.mp4 sub-den 1 The length of the input vector x must be greater than padlen, which is 15.
sub-dip
data/sad_its/1018_its_sad_xx.mp4 sub-dip 2 The length of the input vector x must be greater than padlen, which is 15.
sub-adr


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-pic
sub-mal


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-jms


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_R

sub-pli
sub-eop


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
  "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
  "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
  "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RiseTime mean": np.nanmean(results['SCR_RiseTime']),
  "SCR_RiseTime median": np.nanmedian(results['SCR_RiseTime']),
  "SCR_RiseTime min": np.nanmin(results['SCR_RiseTime']),
  "SCR_RiseTime max": np.nanmax(results['SCR_RiseTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_Reco

sub-fcd
sub-etr
sub-mdl
sub-k3d
sub-m3p


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-xx2
sub-qbf2
sub-cim4


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_R

data/hap_ieo/1074_ieo_hap_hi.mp4 sub-cim4 2 index 0 is out of bounds for axis 0 with size 0
sub-m9g


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_R

sub-ywh


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_R

sub-aerj
sub-bcxz
sub-cztf
sub-yarq
sub-ziym
sub-kklo
data/fea_ieo/1091_ieo_fea_md.mp4 sub-kklo 3 index 0 is out of bounds for axis 0 with size 0
sub-scuy
sub-blgv
sub-almn


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-pkvd
sub-srfl
sub-nvio
sub-rokb
sub-qwrt
sub-irma


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
  "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
  "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
  "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RiseTime mean": np.nanmean(results['SCR_RiseTime']),
  "SCR_RiseTime median": np.nanmedian(results['SCR_RiseTime']),
  "SCR_RiseTime min": np.nanmin(results['SCR_RiseTime']),
  "SCR_RiseTime max": np.nanmax(results['SCR_RiseTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_Reco

sub-prvi


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  "SCR_Amplitude mean": np.nanmean(results['SCR_Amplitude']),
  "SCR_Amplitude median": np.nanmedian(results['SCR_Amplitude']),
  "SCR_Amplitude min": np.nanmin(results['SCR_Amplitude']),
  "SCR_Amplitude max": np.nanmax(results['SCR_Amplitude']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RiseTime mean": np.nanmean(results['SCR_RiseTime']),
  "SCR_RiseTime median": np.nanmedian(results['SCR_RiseTime']),
  "SCR_RiseTime min": np.nanmin(results['SCR_RiseTime']),
  "SCR_RiseTime max": np.nanmax(results['SCR_RiseTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_Reco

sub-mlor
sub-tico


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),


sub-flrn
sub-otsi


  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime min": np.nanmin(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime max": np.nanmax(results['SCR_RecoveryTime']),
  "SCR_Recovery mean": np.nanmean(results['SCR_Recovery']),
  "SCR_Recovery median": np.nanmedian(results['SCR_Recovery']),
  "SCR_Recovery min": np.nanmin(results['SCR_Recovery']),
  "SCR_Recovery max": np.nanmax(results['SCR_Recovery']),
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  "SCR_RecoveryTime mean": np.nanmean(results['SCR_RecoveryTime']),
  "SCR_RecoveryTime median": np.nanmedian(results['SCR_R

sub-afri
sub-pwkb
sub-uwdm
sub-ymjj
sub-ptxm


# Multimodal Emotion Classification Pipeline

This code implements a **multimodal emotion classification pipeline** that processes **GSR (Galvanic Skin Response), Eye Tracking, and Facial Action Unit (AU) data**. It extracts features, trains machine learning models using **AutoGluon**, evaluates models with **cross-validation**, and prints performance results in a **readable table format**.

---

## 🔹 Pipeline Overview

1. **Feature Extraction**  
   - Summarizes **GSR, Eye Tracking, and AU** data into meaningful statistical features.
   - Combines selected modalities based on user preferences.
   - Extracts personality features with slight random variations.

2. **Discretizing Emotion Labels**  
   - Converts continuous emotion scores (**perceived arousal, perceived valence, felt arousal, felt valence**) into discrete classes using predefined thresholds.

3. **Model Training with AutoGluon**  
   - Trains **AutoGluon** models for each emotion category using tabular data.
   - Splits data into training/testing and selects the best-performing model.
   - Stores **F1 scores, accuracy, and model rankings** for evaluation.

4. **Cross-Validation (5-Fold Stratified K-Fold)**  
   - Performs **5-fold cross-validation** to assess model generalization.
   - Computes **per-class F1 scores and accuracy metrics** for each emotion category.
   - Re-trains the best model using **only the best-performing AutoGluon model** for each target.

5. **Printing Results in a Readable Table**  
   - Uses **tabulate** to format results in a clean and structured way.
   - Displays **best models** along with **F1 scores and accuracy** in a grid format.

---

## 📌 **Key Functions**
| Function | Description |
|----------|------------|
| `summarize_eye_data(eye_df)` | Computes mean, std, min, max for eye-tracking features. |
| `summarize_au_data(au_df)` | Computes mean, std, min, max for AU features. |
| `extract_features(eye_df, au_df, gsr_df, use_modalities)` | Merges modalities and extracts features. |
| `discretize_labels(targets_df)` | Converts continuous emotion scores into discrete classes. |
| `train_autogluon_models(features_df, targets_df)` | Trains AutoGluon models and selects best-performing models. |
| `get_best_models(predictors)` | Retrieves the best non-ensemble models from AutoGluon. |
| `retrain_best_model_kfold(features_df, targets_df, models, n_splits=5)` | Performs **K-fold cross-validation** using only the best models. |
| `print_classification_results(cv_results, models, modalities)` | Formats and prints the classification results in a structured table. |

---

## 🚀 **Execution Steps**
1. **Load data**: Prepare `eye_df`, `au_df`, and `gsr_df` from the dataset.
2. **Extract features**:  
   ```python
   features_df, targets_df = extract_features(eye_df, au_df, gsr_df, use_modalities)


In [None]:
import os
import random
import numpy as np
import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
import neurokit2 as nk

###############################################################################
# Step 1: Define Feature Extraction for Each Modality
###############################################################################

# Define the columns for each modality

GSR_columns = [
    'Number of Peaks', 'SCR_Onsets mean', 'SCR_Onsets median', 'SCR_Onsets min', 'SCR_Onsets max',
    'SCR_Onsets STD', 'SCR_Amplitude mean', 'SCR_Amplitude median', 'SCR_Amplitude min', 'SCR_Amplitude max',
    'SCR_Amplitude STD', 'SCR_Height mean', 'SCR_Height median', 'SCR_Height min', 'SCR_Height max',
    'SCR_Height STD', 'SCR_RiseTime mean', 'SCR_RiseTime median', 'SCR_RiseTime min', 'SCR_RiseTime max',
    'SCR_RiseTime STD', 'SCR_Recovery mean', 'SCR_Recovery median', 'SCR_Recovery min', 'SCR_Recovery max',
    'SCR_Recovery STD', 'SCR_RecoveryTime mean', 'SCR_RecoveryTime median', 'SCR_RecoveryTime min',
    'SCR_RecoveryTime max', 'SCR_RecoveryTime STD'
]

AU_columns = [
    'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r',
    'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r'
]

Eye_columns = [
    "FPOGX", "FPOGY", "LPOGX", "LPOGY", "RPOGX", "RPOGY",
    "FPOGD", "LPD", "RPD", "LPUPILD", "RPUPILD"
]

Personality_columns = [
    "openness", "conscientiousness", "extraversion", "agreeableness", "neuroticism"
]

def summarize_eye_data(eye_df):
    """
    Summarize eye tracking data by computing mean, std, max, and min for each eye column.
    
    Args:
        eye_df (DataFrame): DataFrame containing eye tracking data.
        
    Returns:
        dict: Summary statistics for each eye metric.
    """
    summary = {}
    # Loop through each eye column defined in Eye_columns
    for col in Eye_columns:
        if col in eye_df.columns:
            values = eye_df[col].dropna().values  # Remove missing values
            # Compute summary statistics if data exists, else default to 0
            summary[f"{col}_mean"] = np.mean(values) if len(values) > 0 else 0
            summary[f"{col}_std"] = np.std(values) if len(values) > 0 else 0
            summary[f"{col}_max"] = np.max(values) if len(values) > 0 else 0
            summary[f"{col}_min"] = np.min(values) if len(values) > 0 else 0
    return summary

def summarize_au_data(au_df):
    """
    Summarize action unit data by computing mean, std, max, and min for each AU column.
    
    Args:
        au_df (DataFrame): DataFrame containing action unit data.
        
    Returns:
        dict: Summary statistics for each action unit.
    """
    summary = {}
    # Loop through each action unit column defined in AU_columns
    for col in AU_columns:
        if col in au_df.columns:
            values = au_df[col].dropna().values  # Remove missing values
            summary[f"{col}_mean"] = np.mean(values) if len(values) > 0 else 0
            summary[f"{col}_std"] = np.std(values) if len(values) > 0 else 0
            summary[f"{col}_max"] = np.max(values) if len(values) > 0 else 0
            summary[f"{col}_min"] = np.min(values) if len(values) > 0 else 0
    return summary

def extract_features(eye_df, au_df, gsr_df, use_modalities):
    """
    Extract and merge features from eye, action unit, and GSR data based on selected modalities.
    
    Args:
        eye_df (DataFrame): Eye tracking data.
        au_df (DataFrame): Action unit data.
        gsr_df (DataFrame): GSR data (assumed pre-summarized).
        use_modalities (dict): Dictionary specifying modalities to use.
        
    Returns:
        tuple: (DataFrame of combined features, DataFrame of target labels)
    """
    # Merge data based on the specified modalities
    if use_modalities.get("eye", False) and use_modalities.get("action_units", False):
        df = pd.merge(
            eye_df, au_df,
            on=['user', 'run', 'stim_file', 'trial', 'stim_emo',
                'preceived_arousal', 'preceived_valance', 'felt_arousal', 'felt_valance',
                'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism'],
            how='inner'
        )
        if use_modalities.get("gsr", False):
            df = pd.merge(
                df, gsr_df,
                on=['user', 'run', 'stim_file', 'trial', 'stim_emo',
                    'preceived_arousal', 'preceived_valance', 'felt_arousal', 'felt_valance',
                    'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism'],
                how='inner'
            )
    elif use_modalities.get("eye", False) and use_modalities.get("gsr", False):
        df = pd.merge(
            eye_df, gsr_df,
            on=['user', 'run', 'stim_file', 'trial', 'stim_emo',
                'preceived_arousal', 'preceived_valance', 'felt_arousal', 'felt_valance',
                'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism'],
            how='inner'
        )
    elif use_modalities.get("action_units", False) and use_modalities.get("gsr", False):
        df = pd.merge(
            au_df, gsr_df,
            on=['user', 'run', 'stim_file', 'trial', 'stim_emo',
                'preceived_arousal', 'preceived_valance', 'felt_arousal', 'felt_valance',
                'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism'],
            how='inner'
        )
    elif use_modalities.get("eye", False):
        df = eye_df
    elif use_modalities.get("action_units", False):
        df = au_df
    elif use_modalities.get("gsr", False):
        df = gsr_df

    all_features, all_targets = [], []
    
    # Iterate over each row to compute feature summaries and extract target labels
    for _, row in df.iterrows():
        combined_features = {}

        if use_modalities.get("eye", False):
            # Summarize eye data and update feature dictionary
            eye_summary = summarize_eye_data(row["Eye_Data"])
            combined_features.update(eye_summary)

        if use_modalities.get("action_units", False):
            # Summarize action unit data and update feature dictionary
            au_summary = summarize_au_data(row["AUs"])
            combined_features.update(au_summary)

        if use_modalities.get("gsr", False):
            # GSR data is assumed to be pre-summarized; update feature dictionary
            gsr_summary = row[GSR_columns]
            combined_features.update(gsr_summary)

        if use_modalities.get("personality", False):
            # Add personality features with added random variation
            personality_features = {
                "openness": row["openness"] + random.uniform(-2, 2),
                "conscientiousness": row["conscientiousness"] + random.uniform(-2, 2),
                "extraversion": row["extraversion"] + random.uniform(-2, 2),
                "agreeableness": row["agreeableness"] + random.uniform(-2, 2),
                "neuroticism": row["neuroticism"] + random.uniform(-2, 2)
            }
            combined_features.update(personality_features)

        # Define target emotion labels
        target_labels = {
            "preceived_arousal": row["preceived_arousal"],
            "preceived_valance": row["preceived_valance"],
            "felt_arousal": row["felt_arousal"],
            "felt_valance": row["felt_valance"]
        }

        all_features.append(combined_features)
        all_targets.append(target_labels)

    return pd.DataFrame(all_features), pd.DataFrame(all_targets)

###############################################################################
# Step 2: Discretize Emotional Labels (Optimal Binning)
###############################################################################

def discretize_labels(targets_df):
    """
    Apply optimal binning to emotion labels.
    
    Args:
        targets_df (DataFrame): DataFrame with continuous emotion labels.
        
    Returns:
        DataFrame: DataFrame with discretized emotion labels.
    """
    bin_thresholds = {
        "preceived_arousal": (4.8, 6.0),
        "preceived_valance": (3.8, 5.5),
        "felt_arousal": (4.6, 6.0),
        "felt_valance": (4.3, 5.2)
    }

    # Discretize each target column based on threshold ranges
    for col, (low_thr, high_thr) in bin_thresholds.items():
        targets_df[col] = targets_df[col].apply(lambda x: 0 if x <= low_thr else (1 if x <= high_thr else 2))

    return targets_df

###############################################################################
# Step 3: Train AutoGluon Models
###############################################################################

def train_autogluon_models(features_df, targets_df):
    """
    Train AutoGluon models for each target emotion and compute performance metrics.
    
    Args:
        features_df (DataFrame): Input features.
        targets_df (DataFrame): Target labels.
        
    Returns:
        tuple: (Dictionary of models, cross-validation results, leaderboards)
    """
    models, cv_results, leaderboards = {}, {}, {}

    # Iterate through each target column and train a model
    for target_col in targets_df.columns:
        print(f"\nTraining AutoGluon model for {target_col}...")

        y = targets_df[target_col]
        # Split the data into training and testing sets with stratification
        X_train, X_test, y_train, y_test = train_test_split(
            features_df, y, test_size=0.2, stratify=y, random_state=123
        )

        # Combine training features with the target
        df_combined_train = X_train.copy()
        df_combined_train[target_col] = y_train

        # Train the AutoGluon predictor with best quality presets, excluding slow KNN model
        predictor = TabularPredictor(label=target_col, eval_metric="f1_macro").fit(
            df_combined_train,
            presets="best_quality",
            excluded_model_types=['KNN'],
            ag_args_fit={"num_cpus": os.cpu_count()}
        )

        # Retrieve the leaderboard and store the best model name
        leaderboard = predictor.leaderboard(silent=True)
        best_model_name = leaderboard.iloc[0]["model"]
        leaderboards[target_col] = best_model_name

        # Predict on the test set and compute F1 and accuracy metrics
        y_pred = predictor.predict(X_test)
        f1_per_class = f1_score(y_test, y_pred, average=None)
        f1_macro = f1_score(y_test, y_pred, average="macro")
        accuracy = accuracy_score(y_test, y_pred)

        cv_results[target_col] = {
            "F1 Low": f"{f1_per_class[0]:.4f}",
            "F1 Medium": f"{f1_per_class[1]:.4f}",
            "F1 High": f"{f1_per_class[2]:.4f}",
            "F1 Macro": f"{f1_macro:.4f}",
            "Accuracy": f"{accuracy:.4f}"
        }

        models[target_col] = predictor

    return models, cv_results, leaderboards

###############################################################################
# Step 4: Retrieve Best Models and Perform Cross-Validation
###############################################################################

MODEL_NAME_MAPPING = {
    "WeightedEnsemble_L2": "ENS_WEIGHTED",
    "ExtraTreesEntr": "XT",
    "ExtraTreesGini": "XT",
    "LightGBM": "GBM",
    "LightGBMXT": "GBM",
    "NeuralNetTorch": "NN_TORCH",
    "NeuralNetFastAI": "FASTAI",
    "XGBoost": "XGB",
    "CatBoost": "CAT",
    "RandomForest": "RF",
    "ExtraTrees": "XT",
    "KNeighbors": "KNN",
    "LogisticRegression": "LR",
    "Transformer": "TRANSF",
    "AG_TEXT_NN": "AG_TEXT_NN",
    "AG_IMAGE_NN": "AG_IMAGE_NN",
    "AG_AUTOMM": "AG_AUTOMM",
    "FT_TRANSFORMER": "FT_TRANSFORMER",
    "TABPFN": "TABPFN",
    "TABPFNMIX": "TABPFNMIX",
    "FASTTEXT": "FASTTEXT",
    "ENS_WEIGHTED": "ENS_WEIGHTED",
    "SIMPLE_ENS_WEIGHTED": "SIMPLE_ENS_WEIGHTED",
    "IM_RULEFIT": "IM_RULEFIT",
    "IM_GREEDYTREE": "IM_GREEDYTREE",
    "IM_FIGS": "IM_FIGS",
    "IM_HSTREE": "IM_HSTREE",
    "IM_BOOSTEDRULES": "IM_BOOSTEDRULES",
    "VW": "VW",
    "DUMMY": "DUMMY"
}

def get_best_models(predictors):
    """
    Extract the best (non-ensemble) model for each emotion target from AutoGluon's leaderboard.
    
    Args:
        predictors (dict): Dictionary of trained AutoGluon predictors.
        
    Returns:
        dict: Mapping from target column to best model name.
    """
    best_models = {}
    for target_col, predictor in predictors.items():
        leaderboard = predictor.leaderboard(silent=True)
        # Filter out ensemble models and select the best non-ensemble model
        non_ensemble_models = leaderboard[~leaderboard["model"].str.contains("WeightedEnsemble")].copy()
        best_model_name = non_ensemble_models.iloc[0]["model"]
        best_models[target_col] = MODEL_NAME_MAPPING.get(best_model_name, best_model_name)
    return best_models

def retrain_best_model_kfold(features_df, targets_df, models, n_splits=5):
    """
    Retrain the best AutoGluon models using K-Fold Cross-Validation.
    
    Args:
        features_df (DataFrame): Input features.
        targets_df (DataFrame): Target labels.
        models (dict): Dictionary of trained models.
        n_splits (int): Number of cross-validation splits.
        
    Returns:
        dict: Cross-validation results for each target.
    """
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=123)
    cv_results = {}
    best_models = get_best_models(models)
    
    for target_col, best_model_name in best_models.items():
        print(f"\nPerforming {n_splits}-Fold Cross-Validation for {target_col} using {best_model_name}...")
        y = targets_df[target_col]
        f1_scores_per_fold = {"High": [], "Medium": [], "Low": [], "Macro Avg": []}
        accuracy_scores = []

        # Loop through each fold in cross-validation
        for train_idx, val_idx in kfold.split(features_df, y):
            X_train_fold, X_val_fold = features_df.iloc[train_idx], features_df.iloc[val_idx]
            y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]

            # Combine training features with target
            df_combined_train = X_train_fold.copy()
            df_combined_train[target_col] = y_train_fold

            # Train predictor using only the best model's hyperparameters
            predictor = TabularPredictor(label=target_col).fit(
                df_combined_train,
                hyperparameters={best_model_name: {}},
                keep_only_best=True,
                num_bag_folds=0,  # Disable bagging
                num_stack_levels=0,  # Disable stacking
                presets="good_quality",
                ag_args_fit={"num_cpus": os.cpu_count()},
                verbosity=0
            )

            # Predict on the validation fold and calculate performance metrics
            y_val_preds = predictor.predict(X_val_fold)
            f1_per_class = f1_score(y_val_fold, y_val_preds, average=None)
            macro_f1 = f1_score(y_val_fold, y_val_preds, average="macro")
            accuracy = accuracy_score(y_val_fold, y_val_preds)

            f1_scores_per_fold["High"].append(f1_per_class[2])
            f1_scores_per_fold["Medium"].append(f1_per_class[1])
            f1_scores_per_fold["Low"].append(f1_per_class[0])
            f1_scores_per_fold["Macro Avg"].append(macro_f1)
            accuracy_scores.append(accuracy)

        # Store averaged CV metrics for this target
        cv_results[target_col] = {
            "Model": best_model_name,
            "High": f"{np.mean(f1_scores_per_fold['High']):.4f} ± {np.std(f1_scores_per_fold['High']):.4f}",
            "Medium": f"{np.mean(f1_scores_per_fold['Medium']):.4f} ± {np.std(f1_scores_per_fold['Medium']):.4f}",
            "Low": f"{np.mean(f1_scores_per_fold['Low']):.4f} ± {np.std(f1_scores_per_fold['Low']):.4f}",
            "Macro Avg": f"{np.mean(f1_scores_per_fold['Macro Avg']):.4f} ± {np.std(f1_scores_per_fold['Macro Avg']):.4f}",
            "Accuracy": f"{np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}"
        }
    return cv_results

def cross_validate_best_model(models, features_df, targets_df):
    """
    Perform 5-fold cross-validation using the best AutoGluon models.
    
    Args:
        models (dict): Dictionary of trained models.
        features_df (DataFrame): Input features.
        targets_df (DataFrame): Target labels.
        
    Returns:
        dict: Cross-validation results for each target.
    """
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    cv_results = {}

    for target_col in targets_df.columns:
        print(f"\nPerforming 5-Fold Cross-Validation for {target_col}...")
        y = targets_df[target_col]
        f1_scores_per_fold = {"High": [], "Medium": [], "Low": [], "Macro Avg": []}
        accuracy_scores = []

        # Loop through each fold in cross-validation
        for train_idx, val_idx in kfold.split(features_df, y):
            X_train_fold, X_val_fold = features_df.iloc[train_idx], features_df.iloc[val_idx]
            y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]

            predictor = models[target_col]
            predictor.refit_full()  # Retrain the model on full training data
            y_val_preds = predictor.predict(X_val_fold)

            f1_per_class = f1_score(y_val_fold, y_val_preds, average=None)
            macro_f1 = f1_score(y_val_fold, y_val_preds, average="macro")
            accuracy = accuracy_score(y_val_fold, y_val_preds)

            f1_scores_per_fold["High"].append(f1_per_class[2])
            f1_scores_per_fold["Medium"].append(f1_per_class[1])
            f1_scores_per_fold["Low"].append(f1_per_class[0])
            f1_scores_per_fold["Macro Avg"].append(macro_f1)
            accuracy_scores.append(accuracy)

        # Save averaged CV metrics for current target
        cv_results[target_col] = {
            key: f"{np.mean(scores):.4f} ± {np.std(scores):.4f}" for key, scores in f1_scores_per_fold.items()
        }
        cv_results[target_col]["Accuracy"] = f"{np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}"
    return cv_results

###############################################################################
# Step 5: Generate LaTeX Table for Reporting Results
###############################################################################

def generate_latex_table(cv_results, models, modalities="Eye, Face, GSR, Personality"):
    """
    Generate a LaTeX table for classification results based on cross-validation metrics.
    
    Args:
        cv_results (dict): Cross-validation results.
        models (dict): Dictionary of trained models.
        modalities (str): Description of the modalities used.
        
    Returns:
        str: LaTeX formatted table as a string.
    """
    # Retrieve best model names for each target by filtering out ensemble models
    best_models = {}
    for target_col, predictor in models.items():
        leaderboard = predictor.leaderboard(silent=True)
        non_ensemble_models = leaderboard[~leaderboard["model"].str.contains("WeightedEnsemble")].copy()
        best_model_name = non_ensemble_models.iloc[0]["model"]
        best_models[target_col] = best_model_name

    # Start constructing the LaTeX table
    latex_table = rf"""
\begin{{table*}}[h!]
\centering
\caption{{Classification Performance (F1 Score) Using Multimodal Features ({modalities}) (5-Fold Cross-Validation)}}
\label{{tab:multimodal_results}}
\begin{{tabular}}{{lcccc}}
\toprule
 & Perceived Arousal & Perceived Valence & Felt Arousal & Felt Valence \\ 
\cmidrule(lr){{2-2}} \cmidrule(lr){{3-3}} \cmidrule(lr){{4-4}} \cmidrule(lr){{5-5}}
Best Model & {best_models["preceived_arousal"]} & {best_models["preceived_valance"]} & {best_models["felt_arousal"]} & {best_models["felt_valance"]} \\ \midrule
"""

    # Add each performance metric row (High, Medium, Low, Macro Avg, Accuracy)
    for class_label in ["High", "Medium", "Low", "Macro Avg", "Accuracy"]:
        latex_table += (
            f"{class_label} & {cv_results['preceived_arousal'][class_label]} & "
            f"{cv_results['preceived_valance'][class_label]} & "
            f"{cv_results['felt_arousal'][class_label]} & "
            f"{cv_results['felt_valance'][class_label]} \\\\ \n"
        )

    latex_table += r"""
\bottomrule
\end{tabular}
\end{table*}
"""
    return latex_table
import os
import random
import numpy as np
import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
import neurokit2 as nk
from tabulate import tabulate  # Import tabulate for nice table formatting

###############################################################################
# Step 6: Print Classification Results in a Readable Table Format
###############################################################################

def print_classification_results(cv_results, models, modalities="Eye, Face, GSR, Personality"):
    """
    Print classification performance results in a formatted table.

    Args:
        cv_results (dict): Cross-validation results.
        models (dict): Dictionary of trained models.
        modalities (str): Description of the modalities used.
    """
    # Retrieve best model names for each target
    best_models = {}
    for target_col, predictor in models.items():
        leaderboard = predictor.leaderboard(silent=True)
        non_ensemble_models = leaderboard[~leaderboard["model"].str.contains("WeightedEnsemble")].copy()
        best_model_name = non_ensemble_models.iloc[0]["model"]
        best_models[target_col] = best_model_name

    print("\n" + "="*80)
    print(f"Classification Performance Using Multimodal Features ({modalities})")
    print("="*80)

    # Prepare the table header
    headers = ["Metric", "Perceived Arousal", "Perceived Valence", "Felt Arousal", "Felt Valence"]

    # Prepare rows for the best model names
    best_model_row = ["Best Model", 
                      best_models.get("preceived_arousal", "N/A"), 
                      best_models.get("preceived_valance", "N/A"), 
                      best_models.get("felt_arousal", "N/A"), 
                      best_models.get("felt_valance", "N/A")]

    # Prepare rows for performance metrics
    metric_rows = []
    for class_label in ["High", "Medium", "Low", "Macro Avg", "Accuracy"]:
        metric_rows.append([
            class_label,
            cv_results["preceived_arousal"].get(class_label, "N/A"),
            cv_results["preceived_valance"].get(class_label, "N/A"),
            cv_results["felt_arousal"].get(class_label, "N/A"),
            cv_results["felt_valance"].get(class_label, "N/A")
        ])

    # Print the formatted table
    print(tabulate([best_model_row] + metric_rows, headers=headers, tablefmt="grid"))
    print("="*80 + "\n")

###############################################################################
# Main Execution
###############################################################################
# Note: The main execution section should call the functions in the appropriate order.
# For example:
#   1. Load eye_df, au_df, and gsr_df from your data sources.
#   2. Use extract_features() to merge and summarize the features.
#   3. Discretize target labels with discretize_labels().
#   4. Train models using train_autogluon_models().
#   5. Optionally, perform cross-validation using retrain_best_model_kfold() or cross_validate_best_model().
#   6. Generate a LaTeX table for reporting results with generate_latex_table().


# Full Multimodal Emotion Classification

## Overview
This cell performs **emotion classification** using **all available modalities**:
- **Eye Tracking**
- **Facial Action Units (AU)**
- **Galvanic Skin Response (GSR)**
- **Personality Traits**

## Steps:
1. Extracts **features** from all modalities.
2. **Discretizes emotion labels** (Perceived & Felt Arousal/Valence).
3. Trains **AutoGluon models** for classification.
4. Performs **5-Fold Cross-Validation** to validate performance.
5. Prints results in a **structured table format**.

## Expected Outcome:
- The best-performing model for each emotion category.
- **F1 scores & accuracy metrics** across different emotional states.


In [None]:
# Define modalities to be used
USE_MODALITIES = {
    "eye": True,
    "action_units": True,
    "gsr": True,
    "personality": True
}

# Extract features and discretize target labels
features_df, targets_df = extract_features(eye_df, au_df, gsr_df, USE_MODALITIES)
targets_df = discretize_labels(targets_df)

# Train AutoGluon models
models_all, cv_results_all, leaderboard_all = train_autogluon_models(features_df, targets_df)

# Perform 5-Fold Cross-Validation
cv_results = retrain_best_model_kfold(features_df, targets_df, models_all, n_splits=5)

# Print final classification results in a structured format
print_classification_results(cv_results, models_all, modalities=", ".join([k for k, v in USE_MODALITIES.items() if v]))

# print(latex_output)

No path specified. Models will be saved in: "AutogluonModels/ag-20250224_131604"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.9.21
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.3.0: Thu Jan  2 20:24:22 PST 2025; root:xnu-11215.81.4~3/RELEASE_ARM64_T6041
CPU Count:          14
Memory Avail:       3.34 GB / 24.00 GB (13.9%)
Disk Space Avail:   183.48 GB / 926.35 GB (19.8%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit


Training AutoGluon model for preceived_arousal...


# Emotion Classification Using Eye Tracking Only

## Overview
This cell trains and evaluates models **using only Eye Tracking data**.

## Steps:
1. Extracts **features from eye-tracking data**.
2. **Discretizes emotion labels** into categorical bins.
3. Trains **AutoGluon models** based only on **eye-tracking features**.
4. Performs **5-Fold Cross-Validation** for validation.
5. Prints **classification results**.

## Expected Outcome:
- Performance analysis of **Eye Tracking** as a standalone modality.
- Comparison with other modalities to assess its **effectiveness**.


In [None]:
# Use only eye-tracking data for classification
USE_MODALITIES = {
    "eye": True,
    "action_units": False,
    "gsr": False,
    "personality": False
}

# Extract features and discretize target labels
features_df, targets_df = extract_features(eye_df, au_df, gsr_df, USE_MODALITIES)
targets_df = discretize_labels(targets_df)

# Train models using only eye-tracking data
models_eye, cv_results_eye, leaderboards_eye = train_autogluon_models(features_df, targets_df)

# Perform 5-Fold Cross-Validation
cv_results = retrain_best_model_kfold(features_df, targets_df, models_eye, n_splits=5)

# Print classification results
print_classification_results(cv_results, models_eye, modalities=", ".join([k for k, v in USE_MODALITIES.items() if v]))


# Emotion Classification Using Facial Action Units (AU) Only

## Overview
This cell evaluates **emotion classification performance using only AU data**.

## Steps:
1. Extracts **features from AU data**.
2. **Discretizes emotion labels** into low, medium, and high bins.
3. Trains **AutoGluon models** using **AU-based features**.
4. Applies **5-Fold Cross-Validation**.
5. Prints **classification results**.

## Expected Outcome:
- Measures how well **facial expressions alone** predict emotions.
- Useful for **non-contact emotion recognition systems**.


In [None]:
# Use only action unit (facial expression) data for classification
USE_MODALITIES = {
    "eye": False,
    "action_units": True,
    "gsr": False,
    "personality": False
}

# Extract features and discretize target labels
features_df, targets_df = extract_features(eye_df, au_df, gsr_df, USE_MODALITIES)
targets_df = discretize_labels(targets_df)

# Train models using only facial action units
models_au, cv_results_au, leaderboards_au = train_autogluon_models(features_df, targets_df)

# Perform 5-Fold Cross-Validation
cv_results = retrain_best_model_kfold(features_df, targets_df, models_au, n_splits=5)

# Print classification results
print_classification_results(cv_results, models_au, modalities=", ".join([k for k, v in USE_MODALITIES.items() if v]))


# Emotion Classification Using GSR Only

## Overview
This cell trains models **using only Galvanic Skin Response (GSR) data**.

## Steps:
1. Extracts **GSR-based features**.
2. **Discretizes emotion labels**.
3. Trains **AutoGluon models** using **only GSR signals**.
4. Performs **5-Fold Cross-Validation**.
5. Prints **classification results**.

## Expected Outcome:
- Evaluates the effectiveness of **physiological signals** for emotion detection.
- Useful for **stress & arousal measurement applications**.


In [None]:
# Use only Galvanic Skin Response (GSR) data for classification
USE_MODALITIES = {
    "eye": False,
    "action_units": False,
    "gsr": True,
    "personality": False
}

# Extract features and discretize target labels
features_df, targets_df = extract_features(eye_df, au_df, gsr_df, USE_MODALITIES)
targets_df = discretize_labels(targets_df)

# Train models using only GSR data
models_gsr, cv_results_gsr, leaderboards_gsr = train_autogluon_models(features_df, targets_df)

# Perform 5-Fold Cross-Validation
cv_results = retrain_best_model_kfold(features_df, targets_df, models_gsr, n_splits=5)

# Print classification results
print_classification_results(cv_results, models_gsr, modalities=", ".join([k for k, v in USE_MODALITIES.items() if v]))



# Emotion Classification Using Eye, AU, and GSR (Excluding Personality)

## Overview
This cell performs **multimodal emotion classification** using:
- **Eye Tracking**
- **Facial Action Units (AU)**
- **Galvanic Skin Response (GSR)**
- ❌ **Excludes Personality Traits**

## Steps:
1. Extracts **features** from Eye, AU, and GSR.
2. **Discretizes emotion labels**.
3. Trains **AutoGluon models** using the three selected modalities.
4. Performs **5-Fold Cross-Validation**.
5. Prints **classification results**.

## Expected Outcome:
- Evaluates multimodal performance **without personality features**.
- Helps determine **if personality traits significantly impact classification**.


In [None]:
# Use Eye, AU, and GSR, but exclude personality features
USE_MODALITIES = {
    "eye": True,
    "action_units": True,
    "gsr": True,
    "personality": False
}

# Extract features and discretize target labels
features_df, targets_df = extract_features(eye_df, au_df, gsr_df, USE_MODALITIES)
targets_df = discretize_labels(targets_df)

# Train models with all modalities except personality
models_all_without_personality, cv_results_no_personality, leaderboards_no_personality = train_autogluon_models(features_df, targets_df)

# Perform 5-Fold Cross-Validation
cv_results = retrain_best_model_kfold(features_df, targets_df, models_all_without_personality, n_splits=5)

# Print classification results
print_classification_results(cv_results, models_all_without_personality, modalities=", ".join([k for k, v in USE_MODALITIES.items() if v]))


# Summary of Modalities Used in Each Cell

| **Cell #** | **Modalities Used** | **Includes Personality?** | **Purpose** |
|-----------|----------------|----------------|---------------------------|
| **1** | Eye + AU + GSR + Personality | ✅ Yes | Full Multimodal Analysis |
| **2** | Eye Tracking Only | ❌ No | Evaluates Eye Tracking Data Alone |
| **3** | Facial Action Units Only | ❌ No | Evaluates AU Data Alone |
| **4** | GSR Only | ❌ No | Evaluates GSR Data Alone |
| **5** | Eye + AU + GSR | ❌ No | Multimodal Without Personality |

# Key Insights:
✔ **Modular Analysis**: Tests **individual vs. combined** modality performance.  
✔ **Robust Validation**: Uses **5-Fold Cross-Validation** to ensure reliable metrics.  
✔ **Interpretable Results**: Presents structured performance tables for comparison.  
✔ **Scalability**: Easy to modify and **extend** with new physiological data.

# Next Steps:
- **Compare results across cells** to identify the best modality.
- **Optimize models** by tuning hyperparameters or using stacked ensembling.
- **Expand the dataset** with EEG, HRV, or speech data for richer multimodal learning.
