In [None]:
# %preprocess physiological data files%
# compute values of zyg, cog, scr for each trigger in each trial
# combine physiological values to behavioral data file (one row per trial)
# coding=utf-8

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from constant import *
from pyphysio.pyphysio import signal, filters, segmenters, interactive
from pyphysio.pyphysio.specialized.eda import DriverEstim, PhasicEstim, preset_phasic

In [None]:
# load data
def load_data(datafile, triggers):
    col_names = ["zyg","scr","cor","trg1","trg2","trg3","trg4","trg5","trg6","trg7","trg8","na"]
    data = pd.read_csv(datafile, sep='\t', names = col_names, index_col=False)
    data['line'] = np.array(list(np.linspace(0, data.shape[0]-1, data.shape[0])), int)
    data["binary"] = data.trg1.astype(str) + data.trg2.astype(str) + data.trg3.astype(str) + \
    data.trg4.astype(str) + data.trg5.astype(str) + data.trg6.astype(str) + \
    data.trg7.astype(str) + data.trg8.astype(str)
    df = data.merge(triggers, on="binary").drop(["Binary"], axis=1).sort_values(by='line')
    return df

In [None]:
# separate phases
def sep_phases(df, phase_names):
    df['Phase'] = ''
    line_idxs = {}
    for phase in phase_names:
        line_i = np.where(df['TriggerName']== phase)[0][0]
        line_n = df.line.iloc[line_i]
        line_idxs[phase]=line_n
    # Practice Start
    df.loc[df.line.isin(range(line_idxs['prac2BackStart'],line_idxs['caliStart'])),'Phase'] = "Practice2Back"
    df.loc[df.line.isin(range(line_idxs['caliStart'],line_idxs['prac1BackStart'])),'Phase'] = "Calibration"
    df.loc[df.line.isin(range(line_idxs['prac1BackStart'],line_idxs['prac3BackStart'])),'Phase'] = "Practice1Back"
    df.loc[df.line.isin(range(line_idxs['prac3BackStart'],line_idxs['testStart'])),'Phase'] = "Practice3Back"
    # Test Start
    df.loc[df.line.isin(range(line_idxs['offlineT0Start'],line_idxs['learningStart'])),'Phase'] = "OfflineRatingT0"
    df.loc[df.line.isin(range(line_idxs['learningStart'],line_idxs['offlineT1Start'])),'Phase'] = "Learning"
    df.loc[df.line.isin(range(line_idxs['offlineT1Start'],line_idxs['trueChoiceStart'])),'Phase'] = "OfflineRatingT1"
    df.loc[df.line.isin(range(line_idxs['trueChoiceStart'],line_idxs['offlineT2Start'])),'Phase'] = "TrueChoice"
    
    df.loc[df.line.isin(range(line_idxs['offlineT2Start'],line_idxs['learningCheckT1Start'])),'Phase'] = "OfflineRatingT2"
    df.loc[df.line.isin(range(line_idxs['learningCheckT1Start'],line_idxs['falseChoiceStart'])),'Phase'] = "LearningCheckT1"
    df.loc[df.line.isin(range(line_idxs['falseChoiceStart'],line_idxs['offlineT3Start'])),'Phase'] = "FalseChoice"
    df.loc[df.line.isin(range(line_idxs['offlineT3Start'],line_idxs['learningCheckT2Start'])),'Phase'] = "OfflineRatingT3"
    df.loc[df.line.isin(range(line_idxs['learningCheckT2Start'],line_idxs['demandRatingStart'])),'Phase'] = "LearningCheckT2"
    # Demand Manipulation Check
    df.loc[df.line.isin(range(line_idxs['demandRatingStart'],line_idxs['expEnd'])),'Phase'] = "DemandRating"
    # separate phases
    df_learning = df[df['Phase'] == "Learning"].copy().sort_values('line')
    df_true_choice = df[df['Phase'] == "TrueChoice"].copy().sort_values('line')
    df_false_choice = df[df['Phase'] == "FalseChoice"].copy().sort_values('line')
    df_offlines = df[df['Phase'].str.startswith("OfflineRating")].copy().sort_values('line')
    df_learning_checks = df[df['Phase'].str.startswith("LearningCheck")].copy().sort_values('line')
    df_demand_ratings = df[df['Phase'] == "DemandRating"].copy().sort_values('line')
    # combine into a dict
    df_physios_raw = {'Learning':df_learning, 'TrueChoice':df_true_choice, 'FalseChoice':df_false_choice, 'Offlines':df_offlines,
                     'LearningCheck':df_learning_checks, 'DemandRating':df_demand_ratings}
    return df_physios_raw

In [None]:
# add trial number
def add_trial_n(df):
    trial_n = []
    trial_count = 0            
    for i in range(df.shape[0]):
        if df['TriggerName'].iloc[i] == "trialStart":
            if df['TriggerName'].iloc[i+1] != "trialStart":
                trial_count += 1
        if df['TriggerName'].iloc[i] == "trialEnd":
            pass
        trial_n.append(trial_count)
    df_trialed = df.copy()
    df_trialed['Trial_N'] = trial_n
    return df_trialed

In [None]:
def ex_timeout(df):
    rep = 0
    df_in = df.copy()
    while sum(df['TriggerName']=="respChoiceTimeout"):
        rep += 1
        trial_ex = set(df[df['TriggerName']=="respChoiceTimeout"].Trial_N)
        trial_in = set(df.Trial_N) - trial_ex
        df_in = df[df['Trial_N'].isin(trial_in)].copy()
        if rep == 3:
            break
    return df_in

In [None]:
def clean_phases(dfs, phase_names=['Learning','TrueChoice','FalseChoice','Offlines']):
    df_physios = {}
    for phase in phase_names:
        df_p = dfs[phase]
        df_trialed = add_trial_n(df_p)
        df_in = ex_timeout(df_trialed)
        df_physios[phase] = df_in
    return df_physios

In [None]:
def ex_outliers_iqr(df, channel, gpb='label'):
    iqrs = df.groupby(gpb)[f'{channel}_IIRFilter_NotchFilter_femg_mean'].quantile([0.05, 0.95]).unstack(level=1)
    low_outs = iqrs.loc[df[gpb],0.05] > df[f'{channel}_IIRFilter_NotchFilter_femg_mean'].values
    high_outs = iqrs.loc[df[gpb],0.95] < df[f'{channel}_IIRFilter_NotchFilter_femg_mean'].values
    ts = set(df['trialn'].values)
    t_outs = set(df.loc[(low_outs | high_outs).values]['trialn'].values)
    t_ins = tns - outs
    df_clean = df[df['trialn'].isin(ins)]
    return df_clean

In [None]:
def diff_values(df, channel):
    values = []
    trials = range(10)
    trials_in = set(df_res_clean['trialn'].values)
    for i in trials:
        if i in trials_in:
            df = df_res_clean.loc[df_res_clean['trialn']==i,['label',f'{channel}_IIRFilter_NotchFilter_femg_mean']]
            idx = list(df.columns).index(f'{channel}_IIRFilter_NotchFilter_femg_mean')
            df.iloc[0, idx]
            fixa = df.iloc[0, idx]
            target = df.iloc[1, idx]
            diff = target - fixa
        else:
            diff = np.nan
        values.append(diff)
        zvalues = zscore(values, nan_policy='omit')
    diffs = pd.DataFrame({'trials':trials, f'{channel}_values':values, f'{channel}_zvalues':zvalues})
    return diffs