In [1]:
import json
import os
import pandas as pd
import numpy as np
import scipy.stats
from scipy import stats, ndimage
from scipy.signal import butter, lfilter, medfilt

#Change these for the data

trialLength = 3300
baselineLength = 1000

In [2]:
def calculate_column_wise_average(df):
    columns_to_average = df.columns[5:3305].values
    return df[columns_to_average].mean(axis=0)

In [3]:
all_participants_data = pd.DataFrame()
all_ext_participants_data = pd.DataFrame()
path = "/Users/dhewitt/Data/pps/"

def process_participant(participantID):
    global all_participants_data
    global all_ext_participants_data
    
    PKL_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_dataframelist.pkl")
    CSV_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_Extracted_1504.csv")
    OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_GazeDirectionProcessed.csv")
    AV_OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_GazeDirectionProcessed_noAv.csv")
    EXT_AV_OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_GazeDirectionProcessedExtAv.csv")

    data2load = pd.read_csv(CSV_FILE_NAME) 
    dataframe_list = pd.read_pickle(PKL_FILE_NAME)

    #Interpolating the df due to changes in sampling rate for collecting data between trials.
    #Normalising the timestamp from the first timepoint.

    def extract_and_interpolate_gaze_direction(df, trial_number):
        gaze_direction = df['gaze_direction'].apply(pd.Series)  # Expand tuple into separate columns
        interpolated_gaze_direction = pd.DataFrame({
        'gaze_direction_x': np.interp(np.arange(baselineLength + trialLength),
                                      df['timestamp'] - df['timestamp'].iloc[0],
                                      gaze_direction[0]),
        'gaze_direction_y': np.interp(np.arange(baselineLength + trialLength),
                                      df['timestamp'] - df['timestamp'].iloc[0],
                                      gaze_direction[1]),
        'gaze_direction_z': np.interp(np.arange(baselineLength + trialLength),
                                      df['timestamp'] - df['timestamp'].iloc[0],
                                      gaze_direction[2]),
        'trial': trial_number
        })
        return interpolated_gaze_direction

    # Apply the function to each DataFrame in the list
    gaze_direction_data = [extract_and_interpolate_gaze_direction(df, trial_number)
                       for trial_number, df in enumerate(dataframe_list, start=1)]

    # Concatenate the individual DataFrames into one large DataFrame
    GazeDirection = pd.concat(gaze_direction_data, ignore_index=False)
    GazeDirection = GazeDirection.reset_index().rename(columns={'index':'timepoint'})
    GazeDirection = GazeDirection.drop(['gaze_direction_y','gaze_direction_z'],axis=1)
    GazeDirection = GazeDirection.pivot(index='trial', columns='timepoint', values='gaze_direction_x')
    GazeDirection = GazeDirection.reset_index(drop=True)
    
    df1 = pd.DataFrame(GazeDirection)
    total_mean = df1.stack().mean()
    total_std = df1.stack().std()
    data_columns = df1.columns

    # Create an empty list to store dictionaries of blink indices
    blink_indices_list = []

    # Create a function to calculate blink counts and return indices
    def calculate_blink_indices(row):
        trial_data = row[data_columns]
        trial_mean = trial_data.mean()
        trial_std = trial_data.std()
        blink_data_points = np.abs(trial_data - trial_mean) > 2 * trial_std
        #blink_data_points = np.abs(trial_data - total_mean) > 3 * total_std
        column_indices = np.where(blink_data_points)[0]
        blink_indices = [{"Row Index": row.name, "Column Index": data_columns[col_idx]} for col_idx in column_indices]
        blink_indices_list.extend(blink_indices)

    df1.apply(calculate_blink_indices, axis=1)
    blink_indices_df = pd.DataFrame(blink_indices_list)

    sorted_df = blink_indices_df.sort_values(by=['Row Index', 'Column Index'])
    diff = sorted_df['Column Index'].diff()
    group = (diff != 1).cumsum()
    grouped_data = sorted_df.groupby(['Row Index', group], as_index=False)['Column Index'].agg(['first', 'last'])
    grouped_data.columns = ['Start Column Index', 'End Column Index']
    grouped_data['Total Dur'] = grouped_data['End Column Index']-grouped_data['Start Column Index']

    threshold = 150;

    grouped_data = grouped_data[grouped_data['Total Dur']>=threshold].reset_index().drop(columns='Column Index')
    
    ###Getting rid of any trials with too much extreme data - 25% of all trials
    
    interpolated_df1 = df1.copy()

    # Iterate over the rows and matching columns in filtered_grouped_data
    for _, row_data in grouped_data.iterrows():
        row_index = row_data['Row Index']
        start_col_index = row_data['Start Column Index']
        end_col_index = row_data['End Column Index']

        # Ensure the row index is present in the DataFrame
        if row_index in interpolated_df1.index:
        # Replace matching data with NaN for interpolation
            interpolated_df1.iloc[row_index, start_col_index:end_col_index + 1] = np.nan

    # Interpolate the missing values using linear interpolation
    interpolated_df1 = interpolated_df1.interpolate(method='linear', axis=1, limit_direction='both')
    
    # Filter out 'Row Index' in grouped_data with 'Total Dur' greater than 825
    rows_to_remove = grouped_data[grouped_data['Total Dur'] > 1025]['Row Index']

    # Drop rows in interpolated_df2 corresponding to the selected 'Row Index'
    df1 = interpolated_df1.drop(rows_to_remove, errors='ignore')

    # Reset the index of the DataFrame after dropping rows
    df1.reset_index(drop=True, inplace=True)

    # Filtering would go here if needed
    
    filtered_df1 = medfilt(df1, 5)
    df1 = pd.DataFrame(filtered_df1, columns=df1.columns)
    
    #Baseline correcting -- currently turned off

    baselineStart=250
    baselineEnd=750

    #baselineAv = df1.iloc[:,baselineStart:baselineEnd].mean(axis=1)
    #df1 = df1.apply(lambda x: (x - baselineAv)*1000)
    df1 = df1.apply(lambda x: x * 1000)
    
    df1.drop(df1.iloc[:,0:baselineLength], inplace=True, axis=1) #getting rid of the baseline
    columns = dict(map(reversed, enumerate(df1.columns))) #resetting the index
    df1 = df1.rename(columns=columns)

    # Now getting the condition data

    df = pd.DataFrame(data2load)
    newdf = pd.concat([df,df1],axis=1)

        # Creating averages for each column (i.e., over all trials for each condition)

    condMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 1)]
    Middle_cond_average1 = calculate_column_wise_average(condMiddle)

    condLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 1)]
    Left_cond_average1 = calculate_column_wise_average(condLeft)

    condRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 1)]
    Right_cond_average1 = calculate_column_wise_average(condRight) 

    extMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 2)]
    Middle_ext_average1 = calculate_column_wise_average(extMiddle)

    extLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 2)]
    Left_ext_average1 = calculate_column_wise_average(extLeft)

    extRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 2)]
    Right_ext_average1 = calculate_column_wise_average(extRight)    
    
    #### second run
    condMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 3)]
    Middle_cond_average2 = calculate_column_wise_average(condMiddle)

    condLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 3)]
    Left_cond_average2 = calculate_column_wise_average(condLeft)

    condRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 3)]
    Right_cond_average2 = calculate_column_wise_average(condRight) 

    extMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 4)]
    Middle_ext_average2 = calculate_column_wise_average(extMiddle)

    extLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 4)]
    Left_ext_average2 = calculate_column_wise_average(extLeft)

    extRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 4)]
    Right_ext_average2 = calculate_column_wise_average(extRight)   

    # Saving single subject data for pain-cue-block comparisons

    frames = [Middle_cond_average1, Left_cond_average1, Right_cond_average1, Middle_ext_average1, Left_ext_average1, Right_ext_average1,
             Middle_cond_average2, Left_cond_average2, Right_cond_average2, Middle_ext_average2, Left_ext_average2, Right_ext_average2]
    avscore = pd.concat(frames, axis=1, keys = ["CM1", "CL1", "CR1", "EM1", "EL1", "ER1",
                                               "CM2", "CL2", "CR2", "EM2", "EL2", "ER2"])
    avscore.to_csv(AV_OUT_FILE_NAME)
    
    # Saving single subject data for extinction-tonic pain comparisons
    
    extMiddle1 = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 2)]
    Middle_ext_average1 = calculate_column_wise_average(extMiddle1)

    extLeft1 = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 2)]
    Left_ext_average1 = calculate_column_wise_average(extLeft1)

    extRight1 = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 2)]
    Right_ext_average1 = calculate_column_wise_average(extRight1)    
    
    extMiddle2 = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 4)]
    Middle_ext_average2 = calculate_column_wise_average(extMiddle2)

    extLeft2 = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 4)]
    Left_ext_average2 = calculate_column_wise_average(extLeft2)

    extRight2 = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 4)]
    Right_ext_average2 = calculate_column_wise_average(extRight2)    
    
    frames = [Middle_ext_average1, Left_ext_average1, Right_ext_average1, Middle_ext_average2, Left_ext_average2, Right_ext_average2]
    avscore = pd.concat(frames, axis=1, keys = ["EM1", "EL1", "ER1", "EM2", "EL2", "ER2"])
    avscore.to_csv(EXT_AV_OUT_FILE_NAME)
    
    # Read the CSV file for the current participant
    participant_data = pd.read_csv(AV_OUT_FILE_NAME)
    participant_data["Participant"] = participantID
    all_participants_data = pd.concat([all_participants_data, participant_data], ignore_index=True)
    
    ext_participant_data = pd.read_csv(EXT_AV_OUT_FILE_NAME)
    ext_participant_data["Participant"] = participantID
    all_ext_participants_data = pd.concat([all_ext_participants_data, ext_participant_data], ignore_index=True)    
    

In [4]:
# List of participant IDs
participant_ids = ["02", "03", "04", "05", "06", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "19", "20", "21", "22", "24", "25", "26", "27", "28", "29","30"] 
#participant_ids = ["02"]

# Loop over each participant
for participantID in participant_ids:
    # ... (The existing code for processing a single participant will go here)
    process_participant(participantID)

# Concatenate all participant data into a single DataFrame
all_participants_data.to_csv("/Users/dhewitt/Data/pps/Exports/Combined_GazeDirection_noav_1504.csv", index=False)
all_ext_participants_data.to_csv("/Users/dhewitt/Data/pps/Exports/Combined_GazeDirectionExt_1504.csv", index=False)