In [1]:
import json
import os
import pandas as pd
import numpy as np
import scipy.stats
from scipy import stats, ndimage
from scipy.signal import butter, lfilter, medfilt

#Change these for the data

trialLength = 3000
baselineLength = 1000

In [2]:
def calculate_column_wise_average(df):
    columns_to_average = df.columns[5:3305].values
    return df[columns_to_average].mean(axis=0)

In [3]:
all_participants_data = pd.DataFrame()
all_ext_participants_data = pd.DataFrame()

def process_participant(participantID):
    global all_participants_data
    global all_ext_participants_data
    path = "/Users/dhewitt/Data/pps/"
    
    PKL_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_dataframelist.pkl")
    CSV_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_Extracted_1504.csv")
    OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_PupilDiameterProcessed_1904.csv")
    STT_OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID  + "_PupilDiameterProcessed_ST.csv")
    AV_OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID + "_PupilDiameterProcessed_noAv_1904.csv")
    EXT_AV_OUT_FILE_NAME = os.path.join(path + "P" + participantID + "/P" + participantID + "_PupilDiameterProcessed_AvExt_1904.csv")

    data2load = pd.read_csv(CSV_FILE_NAME) 
    dataframe_list = pd.read_pickle(PKL_FILE_NAME)

    #Normalising the timestamp from the first timepoint.
    #Just getting the 'in trial' bit from trial start/cue appearing to the pain stimulus onset.

    Right_extracted_pupil_sizes = list(map(lambda df: np.interp(np.arange(baselineLength+trialLength), df['timestamp'] - df['timestamp'][0], 
                    df['right_pupil_diameter_mm'].to_numpy()), dataframe_list))

    Left_extracted_pupil_sizes = list(map(lambda df: np.interp(np.arange(baselineLength+trialLength), df['timestamp'] - df['timestamp'][0], 
                    df['left_pupil_diameter_mm'].to_numpy()), dataframe_list))

    # Now getting the blinks and interpolating them - starting with the right pupil
    df1 = pd.DataFrame(Right_extracted_pupil_sizes)
    total_mean = df1.stack().mean()
    total_std = df1.stack().std()

    # Get blinks
    data_columns = df1.columns
    blink_indices_list = []
    
    def calculate_blink_indices(row):
        trial_data = row[data_columns]     # Extract the data points for the current trial
        trial_mean = trial_data.mean()
        trial_std = trial_data.std()
        blink_data_points = np.abs(trial_data - trial_mean) > 2 * trial_std     # Identify data points more than 2 standard deviations from the mean
        #blink_data_points = np.abs(trial_data - total_mean) > 3 * total_std
        column_indices = np.where(blink_data_points)[0] # Get the column indices where blink_data_points are True
        blink_indices = [{"Row Index": row.name, "Column Index": data_columns[col_idx]} for col_idx in column_indices] # Create a list of dictionaries with row and column indices
        blink_indices_list.extend(blink_indices)
    
    df1.apply(calculate_blink_indices, axis=1)
    blink_indices_df = pd.DataFrame(blink_indices_list)

    sorted_df = blink_indices_df.sort_values(by=['Row Index', 'Column Index'])
    diff = sorted_df['Column Index'].diff()
    # Identify gaps in column indices and create a new group when gaps are not equal to 1
    group = (diff != 1).cumsum()
    # Group the data by 'Row Index' and the calculated 'group'
    grouped_data = sorted_df.groupby(['Row Index', group], as_index=False)['Column Index'].agg(['first', 'last'])

    # Rename the columns for clarity
    grouped_data.columns = ['Start Column Index', 'End Column Index']
    grouped_data['Total Dur'] = grouped_data['End Column Index']-grouped_data['Start Column Index']

    threshold = 150;
    grouped_data = grouped_data[grouped_data['Total Dur']>=threshold].reset_index().drop(columns='Column Index')

    interpolated_df1 = df1.copy()
    # Iterate over the rows and matching columns in filtered_grouped_data
    for _, row_data in grouped_data.iterrows():
        row_index = row_data['Row Index']
        start_col_index = row_data['Start Column Index']
        end_col_index = row_data['End Column Index']

        # Ensure the row index is present in the DataFrame
        if row_index in interpolated_df1.index:
            # Replace matching data with NaN for interpolation
            #print(row_index, start_col_index, end_col_index)
            interpolated_df1.iloc[row_index, start_col_index:end_col_index + 1] = np.nan

    # Interpolate the missing values using linear interpolation
    interpolated_df1 = interpolated_df1.interpolate(method='linear', axis=1, limit_direction='both')

    #matching_df.to_csv('/Users/dhewitt/Data/pps/P03/P03_matching.csv')
    #interpolated_df1.to_csv('/Users/dhewitt/Data/pps/P03/P03_interpdf_nan00.csv')

    ###Getting rid of any trials with too many interpolations - 25% of all trials

    # Filter out 'Row Index' in grouped_data with 'Total Dur' greater than 825
    rows_to_remove = grouped_data[grouped_data['Total Dur'] > 1025]['Row Index']

    # Drop rows in interpolated_df2 corresponding to the selected 'Row Index'
    interpolated_df1 = interpolated_df1.drop(rows_to_remove, errors='ignore')
    interpolated_df1.reset_index(drop=True, inplace=True)

    # Now the left pupil

    df2 = pd.DataFrame(Left_extracted_pupil_sizes)
    total_mean = df2.stack().mean()
    total_std = df2.stack().std()

    # Create an empty list to store dictionaries of blink indices
    data_columns = df2.columns
    blink_indices_list = []
    
    def calculate_blink_indices(row):
        trial_data = row[data_columns]     # Extract the data points for the current trial
        trial_mean = trial_data.mean()
        trial_std = trial_data.std()
        blink_data_points = np.abs(trial_data - trial_mean) > 2 * trial_std     # Identify data points more than 2 standard deviations from the mean
        #blink_data_points = np.abs(trial_data - total_mean) > 3 * total_std
        column_indices = np.where(blink_data_points)[0] # Get the column indices where blink_data_points are True
        blink_indices = [{"Row Index": row.name, "Column Index": data_columns[col_idx]} for col_idx in column_indices] # Create a list of dictionaries with row and column indices
        blink_indices_list.extend(blink_indices)
    
    
    df2.apply(calculate_blink_indices, axis=1)
    blink_indices_df = pd.DataFrame(blink_indices_list)

    sorted_df = blink_indices_df.sort_values(by=['Row Index', 'Column Index'])
    diff = sorted_df['Column Index'].diff()
    # Identify gaps in column indices and create a new group when gaps are not equal to 1
    group = (diff != 1).cumsum()
    # Group the data by 'Row Index' and the calculated 'group'
    grouped_data = sorted_df.groupby(['Row Index', group], as_index=False)['Column Index'].agg(['first', 'last'])
    grouped_data.columns = ['Start Column Index', 'End Column Index']
    grouped_data['Total Dur'] = grouped_data['End Column Index']-grouped_data['Start Column Index']

    threshold = 150;

    grouped_data = grouped_data[grouped_data['Total Dur']>=threshold].reset_index().drop(columns='Column Index')

    interpolated_df2 = df2.copy()
    # Iterate over the rows and matching columns in grouped_data
    for _, row_data in grouped_data.iterrows():
        row_index = row_data['Row Index']
        start_col_index = row_data['Start Column Index']
        end_col_index = row_data['End Column Index']

        # Ensure the row index is present in the DataFrame
        if row_index in interpolated_df2.index:
            #print(row_index, start_col_index, end_col_index)
            # Replace matching data with NaN for interpolation
            interpolated_df2.iloc[row_index, start_col_index:end_col_index + 1] = np.nan

    # Interpolate the missing values using linear interpolation
    interpolated_df2 = interpolated_df2.interpolate(method='linear', axis=1, limit_direction='both')

    #df2.to_csv('/Users/dhewitt/Data/pps/P03/P03_matching.csv')
    #interpolated_df2.to_csv('/Users/dhewitt/Data/pps/P03/P03_interpdf2_monday.csv')

    ###Getting rid of any trials with too many interpolations - 25% of all trials

    # Filter out 'Row Index' in grouped_data with 'Total Dur' greater than 825
    rows_to_remove = grouped_data[grouped_data['Total Dur'] > 1025]['Row Index']
    interpolated_df2 = interpolated_df2.drop(rows_to_remove, errors='ignore')
    interpolated_df2.reset_index(drop=True, inplace=True)

    ##Filtering

    ## Apply the filter to interpolated_df1 and interpolated_df2
    #filtered_interpolated_df1 = medfilt(interpolated_df1, 3)
    #filtered_interpolated_df2 =  medfilt(interpolated_df2, 3)
    filtered_interpolated_df1 = interpolated_df1
    filtered_interpolated_df2 =  interpolated_df2

    #filtered_interpolated_df1 = ndimage.median_filter(interpolated_df1, size=11)
    #filtered_interpolated_df2 =  ndimage.median_filter(interpolated_df2, size=11)

    ## Create DataFrames from the filtered arrays
    interpolated_df1 = pd.DataFrame(filtered_interpolated_df1, columns=interpolated_df1.columns)
    interpolated_df2 = pd.DataFrame(filtered_interpolated_df2, columns=interpolated_df2.columns)

    # Baseline correcting
    baselineStart=250
    baselineEnd=750

    baselineAv = interpolated_df1.iloc[:,baselineStart:baselineEnd].mean(axis=1)
    interpolated_df1 = interpolated_df1.apply(lambda x: x - baselineAv)

    baselineAv = interpolated_df2.iloc[:,baselineStart:baselineEnd].mean(axis=1)
    interpolated_df2 = interpolated_df2.apply(lambda x: x - baselineAv)

    # Concatenating left and right eyes

    df1 = pd.DataFrame(interpolated_df1) #right pupil sizes after blink interpolation
    df2 = pd.DataFrame(interpolated_df2) #left pupil sizes after blink interpolation

    botheyesdf = (df1+df2)/2

    botheyesdf.drop(botheyesdf.iloc[:,0:baselineLength], inplace=True, axis=1) #getting rid of the baseline
    columns = dict(map(reversed, enumerate(botheyesdf.columns))) #resetting the index
    botheyesdf = botheyesdf.rename(columns=columns)

    df = pd.DataFrame(data2load)
    newdf = pd.concat([df,botheyesdf],axis=1)
    
    newdf.to_csv(STT_OUT_FILE_NAME, index=False)

    # Creating averages for each column (i.e., over all trials for each condition)

    condMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 1)]
    Middle_cond_average1 = calculate_column_wise_average(condMiddle)

    condLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 1)]
    Left_cond_average1 = calculate_column_wise_average(condLeft)

    condRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 1)]
    Right_cond_average1 = calculate_column_wise_average(condRight) 

    extMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 2)]
    Middle_ext_average1 = calculate_column_wise_average(extMiddle)

    extLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 2)]
    Left_ext_average1 = calculate_column_wise_average(extLeft)

    extRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 2)]
    Right_ext_average1 = calculate_column_wise_average(extRight)    
    
    #### second run
    condMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 3)]
    Middle_cond_average2 = calculate_column_wise_average(condMiddle)

    condLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 3)]
    Left_cond_average2 = calculate_column_wise_average(condLeft)

    condRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 3)]
    Right_cond_average2 = calculate_column_wise_average(condRight) 

    extMiddle = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 4)]
    Middle_ext_average2 = calculate_column_wise_average(extMiddle)

    extLeft = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 4)]
    Left_ext_average2 = calculate_column_wise_average(extLeft)

    extRight = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 4)]
    Right_ext_average2 = calculate_column_wise_average(extRight)   

    # Saving single subject data for pain-cue-block comparisons

    frames = [Middle_cond_average1, Left_cond_average1, Right_cond_average1, Middle_ext_average1, Left_ext_average1, Right_ext_average1,
             Middle_cond_average2, Left_cond_average2, Right_cond_average2, Middle_ext_average2, Left_ext_average2, Right_ext_average2]
    avscore = pd.concat(frames, axis=1, keys = ["CM1", "CL1", "CR1", "EM1", "EL1", "ER1",
                                               "CM2", "CL2", "CR2", "EM2", "EL2", "ER2"])
    avscore.to_csv(AV_OUT_FILE_NAME)
    
    # Saving single subject data for extinction-tonic pain comparisons
    
    extMiddle1 = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 2)]
    Middle_ext_average1 = calculate_column_wise_average(extMiddle1)

    extLeft1 = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 2)]
    Left_ext_average1 = calculate_column_wise_average(extLeft1)

    extRight1 = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 2)]
    Right_ext_average1 = calculate_column_wise_average(extRight1)    
    
    extMiddle2 = newdf.loc[(newdf['start_new_trial_condition'] == 'MiddleLow') & (newdf['condorder'] == 4)]
    Middle_ext_average2 = calculate_column_wise_average(extMiddle2)

    extLeft2 = newdf.loc[(newdf['start_new_trial_condition'] == 'Left') & (newdf['condorder'] == 4)]
    Left_ext_average2 = calculate_column_wise_average(extLeft2)

    extRight2 = newdf.loc[(newdf['start_new_trial_condition'] == 'Right') & (newdf['condorder'] == 4)]
    Right_ext_average2 = calculate_column_wise_average(extRight2)    
    
    frames = [Middle_ext_average1, Left_ext_average1, Right_ext_average1, Middle_ext_average2, Left_ext_average2, Right_ext_average2]
    avscore2 = pd.concat(frames, axis=1, keys = ["EM1", "EL1", "ER1", "EM2", "EL2", "ER2"])
    avscore2.to_csv(EXT_AV_OUT_FILE_NAME)
    
    # Read the CSV file for the current participant
    participant_data = pd.read_csv(AV_OUT_FILE_NAME)
    participant_data["Participant"] = participantID
    all_participants_data = pd.concat([all_participants_data, participant_data], ignore_index=True)
    
    ext_participant_data = pd.read_csv(EXT_AV_OUT_FILE_NAME)
    ext_participant_data["Participant"] = participantID
    all_ext_participants_data = pd.concat([all_ext_participants_data, ext_participant_data], ignore_index=True)    
    

In [4]:
# List of participant IDs
#participant_ids = ["P02", "P03", "P04", "P05", "P06", "P08", "P09", "P10", "P11", "P12", "P13", "P14", "P15", "P16", "P17", "P19", "P20", "P21", "P22", "P24", "P25", "P26", "P27", "P28", "P29","P30"]  # Add more participant IDs as needed
#participant_ids = ["02", "03", "04", "05", "06", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "19", "20", "21", "22", "24", "25", "26", "27", "28", "29","30"] 
participant_ids = ['01']

# Loop over each participant
for participantID in participant_ids:
    # ... (The existing code for processing a single participant will go here)
    process_participant(participantID)
    
# Concatenate all participant data into a single DataFrame
all_participants_data.to_csv("/Users/dhewitt/Data/pps/Exports/Combined_PupilDiameter_noav_0912.csv", index=False)
all_ext_participants_data.to_csv("/Users/dhewitt/Data/pps/Exports/Combined_PupilDiameterExt_0912.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/dhewitt/Data/pps/P01/P01_Extracted_1504.csv'