# Python notebook for extracting accuracy and reaction time data for the e-prime computer task
### created by Kat Marton 4/4/2024
### last edited by Kat 5/6/2024

In [1]:
import pandas as pd
import numpy as np
import os
import time
from datetime import datetime, timedelta

In [2]:
os.chdir('C:\\Users\\katgm\\Rutgers University\\Michelle Chen - Rutgers_Neuropsych_Lab\\COVID_Fatigue\\RC_award\\Data\\E-Prime')

In [3]:
def clean_epr(SubjID):
    epr = pd.read_csv(os.getcwd() + "\\" + SubjID + '\\' + SubjID + '_epr.txt',sep="\t",encoding="UTF-16 LE")
    epr = epr.replace('?',np.NaN)
    epr["Fatigue.OnsetTime"] = epr["Fatigue.OnsetTime"].ffill()
    epr["FatigueRating"] = epr["FatigueRating"].ffill()
    epr = epr.query('Block%2==0').copy() #cleaning "Blocks" column and reassigning
    epr["Block"] = (epr["Block"]/2).astype(int)
    
    epr["response"] = np.where(epr["stimulus.RESP"]=="{SPACE}", 1,0) #did they press spacebar or not coded to 0,1
    epr = epr.drop(['stimulus.ACC', 'stimulus.RESP'], axis=1) #drop inaccurate accuracy column and old response column
    
    epr["item"] = epr.index
    epr2 = epr[epr["Letter"].notnull()].copy()
    epr2.reset_index(drop=True, inplace=True)
    
    epr2['expected'] = 0
    shifted_letters = epr2["Letter"].shift(2)
    epr2.loc[epr2["Letter"] == shifted_letters, "expected"] = 1
    
    epr3 = epr2[["item", "expected"]]
    epr = pd.merge(epr, epr3, on="item", how="left")
    
    epr['expected'] = pd.to_numeric(epr['expected'], errors='coerce').astype('Int64')
    epr['stimulus.RT'] = pd.to_numeric(epr['stimulus.RT'], errors='coerce').astype('Int64')
    epr = epr.fillna(pd.NA)

    if SubjID == "Cov4": #cov4 is an exception: doesn't include AM/PM for some reason
        init_time= pd.to_datetime(epr["SessionStartDateTimeUtc"][0], format='%m/%d/%Y %H:%M')
    else:
        init_time = pd.to_datetime(epr["SessionStartDateTimeUtc"][0], format='%m/%d/%Y %I:%M:%S %p') #this is in UTC
    
    epr["timestamp"] = np.where(pd.isna(epr["stimulus.OnsetTime"]), epr["nullTrl.OnsetTime"], epr["stimulus.OnsetTime"])
    epr["timestamp"] = pd.to_timedelta(epr["timestamp"],'ms')
    epr["timestamp"] = init_time + epr["timestamp"]

    cols = ["Subject", "timestamp", "Block","Trial","Letter","response","expected","stimulus.RT","FatigueRating","ITI.OnsetTime","Fatigue.OnsetTime","nullTrl.OnsetTime","stimulus.OnsetTime","trlDuration","Order","SessionTime"]
    epr=epr[cols]

    return epr

In [4]:
cov4=clean_epr("Cov4")
cov5=clean_epr("Cov5")
cov7=clean_epr("Cov7")
cov8=clean_epr("Cov8")
cov13=clean_epr("Cov13")
cov14=clean_epr("Cov14")
cov19=clean_epr("Cov19")
cov20=clean_epr("Cov20")
cov22=clean_epr("Cov22")
cov23=clean_epr("Cov23")
cov24=clean_epr("Cov24")
cov30=clean_epr("Cov30")
cov31=clean_epr("Cov31")
cov35=clean_epr("Cov35")
cov36=clean_epr("Cov36")
cov37=clean_epr("Cov37")

In [11]:
cov5

Unnamed: 0,Subject,timestamp,Block,Trial,Letter,response,expected,stimulus.RT,FatigueRating,ITI.OnsetTime,Fatigue.OnsetTime,nullTrl.OnsetTime,stimulus.OnsetTime,trlDuration,Order,SessionTime
0,5,2023-03-03 17:07:20.516,1,1.0,H,0,0,0,55,69999.0,56465.0,,68516.0,2.0,1 2 3 4 5 6,12:06:12
1,5,2023-03-03 17:07:22.516,1,2.0,V,0,0,0,55,71999.0,56465.0,,70516.0,2.0,1 2 3 4 5 6,12:06:12
2,5,2023-03-03 17:07:24.516,1,3.0,,0,,,55,,56465.0,72516.0,,6000.0,1 2 3 4 5 6,12:06:12
3,5,2023-03-03 17:07:30.533,1,4.0,F,0,0,0,55,80017.0,56465.0,,78533.0,2.0,1 2 3 4 5 6,12:06:12
4,5,2023-03-03 17:07:32.533,1,5.0,H,0,0,0,55,82017.0,56465.0,,80533.0,2.0,1 2 3 4 5 6,12:06:12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
603,5,2023-03-03 17:34:39.987,6,97.0,,0,,,67,,1479353.0,1707987.0,,2.0,1 2 3 4 5 6,12:06:12
604,5,2023-03-03 17:34:40.003,6,98.0,M,1,1,919,67,1709487.0,1479353.0,,1708003.0,2.0,1 2 3 4 5 6,12:06:12
605,5,2023-03-03 17:34:42.004,6,99.0,Z,0,0,0,67,1711487.0,1479353.0,,1710004.0,2.0,1 2 3 4 5 6,12:06:12
606,5,2023-03-03 17:34:44.004,6,100.0,,0,,,67,,1479353.0,1712004.0,,2.0,1 2 3 4 5 6,12:06:12


# Getting Accuracy
### Overall accuracy: how many correct responses in total
### Accuracy for omission errors: Not pressing spacebar when response is expected
### Accuracy for commission errors: Pressing spacebar when no response is expected

Accuracy is computed PER BLOCK, and can also be computed for specific window intervals. Which intervals? 
30 second
60 second
overlapping windows? What do I need to know to compute that? essentially, I need the timestamps of the window edges

In [6]:
df = cov5.copy()

In [81]:
df[0:50]

Unnamed: 0,Subject,timestamp,Block,Trial,Letter,response,expected,stimulus.RT,FatigueRating,ITI.OnsetTime,Fatigue.OnsetTime,nullTrl.OnsetTime,stimulus.OnsetTime,trlDuration,Order,SessionTime,block_index,omission
0,5,2023-03-03 17:07:20.516,1,1.0,H,0,0.0,0.0,55,69999.0,56465.0,,68516.0,2.0,1 2 3 4 5 6,12:06:12,1,False
1,5,2023-03-03 17:07:22.516,1,2.0,V,0,0.0,0.0,55,71999.0,56465.0,,70516.0,2.0,1 2 3 4 5 6,12:06:12,2,False
2,5,2023-03-03 17:07:24.516,1,3.0,,0,,,55,,56465.0,72516.0,,6000.0,1 2 3 4 5 6,12:06:12,3,
3,5,2023-03-03 17:07:30.533,1,4.0,F,0,0.0,0.0,55,80017.0,56465.0,,78533.0,2.0,1 2 3 4 5 6,12:06:12,4,False
4,5,2023-03-03 17:07:32.533,1,5.0,H,0,0.0,0.0,55,82017.0,56465.0,,80533.0,2.0,1 2 3 4 5 6,12:06:12,5,False
5,5,2023-03-03 17:07:34.533,1,6.0,,0,,,55,,56465.0,82533.0,,2000.0,1 2 3 4 5 6,12:06:12,6,
6,5,2023-03-03 17:07:36.550,1,7.0,F,1,1.0,859.0,55,86034.0,56465.0,,84550.0,2.0,1 2 3 4 5 6,12:06:12,7,False
7,5,2023-03-03 17:07:38.550,1,8.0,Q,0,0.0,0.0,55,88034.0,56465.0,,86550.0,2.0,1 2 3 4 5 6,12:06:12,8,False
8,5,2023-03-03 17:07:40.550,1,9.0,J,0,0.0,0.0,55,90034.0,56465.0,,88550.0,2.0,1 2 3 4 5 6,12:06:12,9,False
9,5,2023-03-03 17:07:42.551,1,10.0,,0,,,55,,56465.0,90551.0,,4000.0,1 2 3 4 5 6,12:06:12,10,


In [107]:
#overall accuracy
def get_accuracy(df):
    df_trials = df[df["Letter"].notnull()].copy()
    
    by_block = df_trials.groupby("Block")[['expected', 'response']].apply(lambda x: (x['expected']==x['response']).mean())
    by_block = by_block.reset_index()
    by_block.columns = ['Block', 'accuracy']

    by_30window = df_trials.groupby(["Block", pd.Grouper(key="timestamp", freq="1min", origin="start")])[['expected', 'response']].apply(lambda x: (x['expected']==x['response']).mean())
    by_30window = by_30window.reset_index()
    by_30window["window"] = by_30window.groupby(["Block"])["Block"].cumcount()
    by_30window.columns = ['Block', 'timestamp','accuracy','window']
    
    by_60window = df_trials.groupby(["Block", pd.Grouper(key="timestamp", freq=".5min", origin="start")])[['expected', 'response']].apply(lambda x: (x['expected']==x['response']).mean())
    by_60window = by_60window.reset_index()
    by_60window["window"] = by_60window.groupby(["Block"])["Block"].cumcount()
    by_60window.columns = ['Block', 'timestamp','accuracy','window']
    
    return by_block, by_30window, by_60window

#omission errors (when expected==1 but response==0)
def get_omission(df):
    ser1 = df["expected"]==1
    ser2 = df['response']==0
    df["omission"] = ser1&ser2

    by_block = df.groupby("Block")["omission"].sum()
    by_block = by_block.reset_index()
    
    by_60window = df.groupby(["Block", pd.Grouper(key="timestamp", freq="1min", origin="start")])["omission"].sum()
    by_60window = by_60window.reset_index()

    by_30window = df.groupby(["Block", pd.Grouper(key="timestamp", freq=".5min", origin="start")])["omission"].sum()
    by_30window = by_30window.reset_index()

    return by_block, by_60window, by_30window
    
#commission errors (when expected==0 but response==1)
def get_commission(df):
    ser1 = df["expected"]==0
    ser2 = df['response']==1
    df["commission"] = ser1&ser2

    by_block = df.groupby("Block")["commission"].sum()
    by_block = by_block.reset_index()
    
    by_60window = df.groupby(["Block", pd.Grouper(key="timestamp", freq="1min", origin="start")])["commission"].sum()
    by_60window = by_60window.reset_index()

    by_30window = df.groupby(["Block", pd.Grouper(key="timestamp", freq=".5min", origin="start")])["commission"].sum()
    by_30window = by_30window.reset_index()

    return by_block, by_60window, by_30window


In [104]:
get_accuracy(cov5)[2]

Unnamed: 0,Block,timestamp,accuracy,window
0,1,2023-03-03 17:07:20.516,1.0,0
1,1,2023-03-03 17:07:50.516,0.8,1
2,1,2023-03-03 17:08:20.516,1.0,2
3,1,2023-03-03 17:08:50.516,0.5,3
4,1,2023-03-03 17:09:20.516,0.833333,4
5,1,2023-03-03 17:09:50.516,1.0,5
6,1,2023-03-03 17:10:20.516,0.714286,6
7,1,2023-03-03 17:10:50.516,0.888889,7
8,1,2023-03-03 17:11:20.516,1.0,8
9,2,2023-03-03 17:12:20.516,1.0,0


In [109]:
get_omission(cov5)[2]

Unnamed: 0,Block,timestamp,omission
0,1,2023-03-03 17:07:20.516,0
1,1,2023-03-03 17:07:50.516,2
2,1,2023-03-03 17:08:20.516,0
3,1,2023-03-03 17:08:50.516,3
4,1,2023-03-03 17:09:20.516,1
5,1,2023-03-03 17:09:50.516,0
6,1,2023-03-03 17:10:20.516,2
7,1,2023-03-03 17:10:50.516,1
8,1,2023-03-03 17:11:20.516,0
9,2,2023-03-03 17:12:20.516,0


In [113]:
get_commission(cov7)[2]

Unnamed: 0,Block,timestamp,commission
0,1,2023-04-06 18:06:22.871,0
1,1,2023-04-06 18:06:52.871,0
2,1,2023-04-06 18:07:22.871,0
3,1,2023-04-06 18:07:52.871,0
4,1,2023-04-06 18:08:22.871,0
5,1,2023-04-06 18:08:52.871,1
6,1,2023-04-06 18:09:22.871,0
7,1,2023-04-06 18:09:52.871,0
8,1,2023-04-06 18:10:22.871,0
9,2,2023-04-06 18:10:52.871,0


# Getting Reaction Times
### Again, can get by block and by windows

In [125]:
def get_rt(df):
    df["stimulus.RT"] = df["stimulus.RT"].replace(0,pd.NA)
        
    #reaction time by block
    by_block = df.groupby("Block")["stimulus.RT"].mean()
    by_block = by_block.reset_index()
    
    #reaction time 60 seconds
    by_60window = df.groupby(["Block", pd.Grouper(key="timestamp", freq="1min", origin="start")])["stimulus.RT"].mean()
    by_60window = by_60window.reset_index()
    
    #reaction time 30 seconds
    by_30window = df.groupby(["Block", pd.Grouper(key="timestamp", freq=".5min", origin="start")])["stimulus.RT"].mean()
    by_30window = by_30window.reset_index()

    return by_block, by_60window, by_30window

In [126]:
get_rt(cov5)[0]

Unnamed: 0,Block,stimulus.RT
0,1,1017.666667
1,2,882.888889
2,3,978.222222
3,4,899.1
4,5,995.777778
5,6,1072.25


## Code for reading tags (not useful for accuracy, just for checking timestamps and blocks)

In [98]:
#this code only works for e4 users
SubjID="Cov22"
os.chdir('C:\\Users\\katgm\\Rutgers University\\Michelle Chen - Rutgers_Neuropsych_Lab\\COVID_Fatigue\\RC_award\\Data\\EmpaticaE4')
tags=pd.read_csv(os.getcwd() + "\\" + SubjID + "\\" + SubjID + "_baseline\\tags.csv",header=None) 
tags = tags.map(lambda x: pd.to_datetime(x,unit="s"))