In [35]:
import numpy 
#import numpy as np
import pandas 


In [36]:
#function computes condition-wise stats for a given participant 
#input: 
    #filename: name of logfile 
#output: 
    #3 lists for condition-wise motion_rt, accuracy and std of color error 

def SubjectStats(filename):
    df = pandas.read_csv(filename, header = None)
    df.columns = ['', 'arrow_direction', 'arrow_onset', '', 
              'dots_pos', 'dots_moving_direction', 'color',
             'dots_onset', '', 'motion_responses', 'motion_rt',
             '', 'color_response']
    idx = df.loc[:, 'arrow_direction'] == 0 #rows we want to change values 
    df.loc[idx, 'arrow_direction'] = -1 
    
    #calculate congruency between arrow_direction and dots_pos, store in column arrow_pos_c
    c = df.loc[:, 'arrow_direction'] * df.loc[:, 'dots_pos']
    df['arrow_pos_c'] = c / 3
    
    c = df.loc[:, 'dots_pos'] * df.loc[:, 'dots_moving_direction']
    df['motion_dots_c'] = c / 3
    
    #calculate motion correct
    c = numpy.abs(df.loc[:, 'dots_moving_direction'] - df.loc[:, 'motion_responses'])
    c[c == 0] = 1
    c[c == 2] = 0
    df['motion_correct'] = c
    
    #calculate color error
    c = df.loc[:, 'color_response'] - df.loc[:, 'color']
    c[c > 180] = c[c > 180] - 360
    c[c < -180] = c[c < -180] + 360
    df['color_error'] = c
    
    #calculate final numbers for each condition
    
    c1_idx = [1, 1, -1, -1]
    c2_idx = [1, -1, 1, -1]
    
    motion_rt_mean = []
    acc = []
    color_error_std = []
    
    for i in range(4): 
        idx1 = df.loc[:, 'arrow_pos_c'] == c1_idx[i]
        idx2 = df.loc[:, 'motion_dots_c'] == c2_idx[i]
        df1 = df.loc[idx1 & idx2, :]

        #choose correct trials
        idx = df1.loc[:, 'motion_correct'] == 1
        motion_rt_mean.append(numpy.mean(df1.loc[idx, 'motion_rt']))
        acc.append(numpy.mean(df1.loc[:, 'motion_correct']))
        color_error_std.append(numpy.std(df1.loc[:, 'color_error']))
    
    return motion_rt_mean, acc, color_error_std

In [37]:
[motion_rt_mean, acc, color_error_std] = SubjectStats('logfiles/subj_01.csv')
print(motion_rt_mean)
print(acc)
print(color_error_std)

[956.8078323102308, 1307.778203880546, 1110.5231025953942, 1521.4665827619042]
[0.946875, 0.915625, 0.95, 0.853125]
[100.92294011706191, 106.34820031476772, 97.92039307577608, 106.29636102542764]


In [38]:
#create a new data frame with empty columns 
data = {'subj':[], 'arrow_pos_c':[], 'motion_pos_c':[], 'motion_rt_mean':[], 'acc':[], 'color_error_std':[]}

df = pandas.DataFrame(data = data) #give value of data to data in the function 
print(df)

#for loop to run all subjects through 
for i in range(1, 21): #end with 21 because last number in range doesn't count, so this is 1-20 
    #format i to fit subject filename 
    filename = 'logfiles/subj_' + f"{i:02d}" + '.csv'#if not 2 digits it will add leading 0, d means integer 
    [motion_rt_mean, acc, color_error_std] = SubjectStats(filename)
    #each iteration of loop, add to data frame 
    data = {'subj':[i, i, i, i], 'arrow_pos_c':[1, 1, -1, -1], 'motion_pos_c':[1, -1, 1, -1], 'motion_rt_mean':motion_rt_mean, 'acc':acc, 'color_error_std':color_error_std}
    #create a dataframe for this participant
    df1 = pandas.DataFrame(data = data)
    #append df1 to group-level data frame 
    df = df.append(df1, ignore_index = True) #ignore_index, creates unique index number (appended to the first set of indicies)

print(df)
df.to_csv('GroupData.csv', index = False, index_label = False)#documentation pandas.dataframetocsv 
    

Empty DataFrame
Columns: [subj, arrow_pos_c, motion_pos_c, motion_rt_mean, acc, color_error_std]
Index: []
    subj  arrow_pos_c  motion_pos_c  motion_rt_mean       acc  color_error_std
0    1.0          1.0           1.0      956.807832  0.946875       100.922940
1    1.0          1.0          -1.0     1307.778204  0.915625       106.348200
2    1.0         -1.0           1.0     1110.523103  0.950000        97.920393
3    1.0         -1.0          -1.0     1521.466583  0.853125       106.296361
4    2.0          1.0           1.0     1128.792822  0.962500       106.049811
..   ...          ...           ...             ...       ...              ...
75  19.0         -1.0          -1.0      755.927673  0.940625       102.829576
76  20.0          1.0           1.0     1006.510359  0.940625       108.525261
77  20.0          1.0          -1.0     1107.163805  0.956250       103.572542
78  20.0         -1.0           1.0     1015.424368  0.915625       108.122125
79  20.0         -1.0   

In [39]:
df = pandas.read_csv('GroupData.csv') #headers, so don't need to set header to false 
df 

Unnamed: 0,subj,arrow_pos_c,motion_pos_c,motion_rt_mean,acc,color_error_std
0,1.0,1.0,1.0,956.807832,0.946875,100.922940
1,1.0,1.0,-1.0,1307.778204,0.915625,106.348200
2,1.0,-1.0,1.0,1110.523103,0.950000,97.920393
3,1.0,-1.0,-1.0,1521.466583,0.853125,106.296361
4,2.0,1.0,1.0,1128.792822,0.962500,106.049811
...,...,...,...,...,...,...
75,19.0,-1.0,-1.0,755.927673,0.940625,102.829576
76,20.0,1.0,1.0,1006.510359,0.940625,108.525261
77,20.0,1.0,-1.0,1107.163805,0.956250,103.572542
78,20.0,-1.0,1.0,1015.424368,0.915625,108.122125


In [40]:
#group conditions 
idx1 = df.loc[:, 'arrow_pos_c'] == 1 #for congruent 
idx2 = df.loc[:, 'motion_pos_c'] == 1 
df_cc = df.loc[idx1 & idx2, :]

idx1 = df.loc[:, 'arrow_pos_c'] == 1  
idx2 = df.loc[:, 'motion_pos_c'] == -1 
df_ci = df.loc[idx1 & idx2, :]

idx1 = df.loc[:, 'arrow_pos_c'] == -1 
idx2 = df.loc[:, 'motion_pos_c'] == 1 
df_ic = df.loc[idx1 & idx2, :]

idx1 = df.loc[:, 'arrow_pos_c'] == -1
idx2 = df.loc[:, 'motion_pos_c'] == -1 
df_ii = df.loc[idx1 & idx2, :]

Two important statistical analysis modules: 
- scipy.stats
- statsmodels 

see 3/9/22 powerpoint for further notes 

In [52]:
#t tests 
import scipy.stats as st

#test difference in motion_rt_mean between cc and ci, within subject design 
#convert dataframe to a numpy list(array)
cc = df_cc['motion_rt_mean'].to_numpy()
ci = df_ci['motion_rt_mean'].to_numpy()
dif = ci - cc
#one sample t-test/paired sample t-test
t, p = st.ttest_1samp(dif, 0)
print("T statistic = " + str(t))
print("p = " + str(p))

#two sample t-test, doesn't assume ci and cc are paired 
t, p = st.ttest_ind(ci, cc)
print("T statistic = " + str(t))
print("p = " + str(p))


T statistic = 2.716347777417675
p = 0.013697197974130748
T statistic = 1.9143708500035261
p = 0.06312495788304728


In [54]:
#anovas 
from statsmodels.stats.anova import AnovaRM 

#repeated meausures, used for within subject design 

ans = AnovaRM(data = df, depvar = 'motion_rt_mean', subject = 'subj', within = ['arrow_pos_c'], aggregate_func = 'mean').fit()

print(ans)

                  Anova
            F Value Num DF  Den DF Pr > F
-----------------------------------------
arrow_pos_c  5.4871 1.0000 19.0000 0.0302

