### Note: I received four participant CSV files and determined that the second and third file were the same participant, so I removed participant_3.csv from the analysis

### Import necessary packages

In [153]:
import pandas as pd
import numpy as np

## Read each participant's CSV file to their own dataframe

In [154]:
p1 = pd.read_csv('participant_1.csv')
p2 = pd.read_csv('participant_2.csv')
p3 = pd.read_csv('participant_4.csv')

## Merge the participant dataframes into a single dataframe

In [186]:
frames = [p1, p2, p3]

In [192]:
participants = pd.concat(frames, keys=['Participant 1', 'Participant 2', 'Participant 3'])

### Check shape of dataframe to see the number of datapoints before cleaning

In [193]:
participants.shape

(94373, 13)

## Remove null values and missing data

In [194]:
ptemp = participants.replace(0,np.nan)

In [195]:
p_clean = ptemp.dropna(subset=['r_size','r_conf','r_x_pos','r_y_pos','l_size','l_conf','l_x_pos','l_y_pos'])

### Fix timestamp at 0 seconds

In [197]:
p_clean.loc['Participant 1'].at[0,'timestamp']=0

In [198]:
p_clean.loc['Participant 2'].at[0,'timestamp']=0

In [199]:
p_clean.loc['Participant 3'].at[0,'timestamp']=0

### Check shape of files post-cleaning to make sure there are still enough datapoints

In [200]:
p_clean.shape

(88507, 13)

Only ~6000 datapoints (~6%) were lost from cleaning; probably worth seeing how many more datapoints are lost due to a threshhold for left/right eye confidence

## Insert a confidence threshhold

In [313]:
p_temp = p_clean[p_clean['r_conf'] > 0.25]

In [314]:
p_corr = p_temp[p_temp['l_conf'] > 0.25]

In [315]:
p_corr.shape

(83810, 13)

Requiring each eye to have a minimum confidence value of 0.2 removes a mere 4000 datapoints from the original 95000, yet likely confers a benefit from removing outliers.

I also noticed some of the pupil sizes were odd (a few were the value of Pi for example) so I will create another threshhold.

## Insert a pupil size threshhold

In [316]:
p_temp_2 = p_corr[p_corr['r_size'] > 5]

In [317]:
p_final = p_temp_2[p_temp_2['l_size'] > 5]

In [318]:
p_temp_2.shape

(83810, 13)

In [319]:
p_final.shape

(83810, 13)

It seems the previous threshhold for confidence was enough to take care of pupil size outliers.

## Perform basic data analysis

### Group by block number and type

In [322]:
p1_blocknum = p_final.loc['Participant 1'].groupby(['block_number', 'block_type'])

In [323]:
p1_blocknum['l_conf'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
block_number,block_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,TRIAL_START,200.0,0.931108,0.076176,0.342857,0.914286,0.942857,0.971429,1.000000
2.0,BEGIN_SPAN,432.0,0.913083,0.060334,0.294118,0.901457,0.914286,0.941176,1.000000
3.0,CUE_ANSWER,625.0,0.904783,0.088940,0.257143,0.904320,0.916667,0.942857,1.000000
4.0,RESPONSE,84.0,0.706652,0.208448,0.257143,0.558824,0.710084,0.911765,1.000000
5.0,BEGIN_SPAN,188.0,0.711440,0.218821,0.257143,0.555482,0.742857,0.914222,1.000000
6.0,CUE_ANSWER,357.0,0.851200,0.164670,0.257143,0.885714,0.914286,0.942857,1.000000
7.0,RESPONSE,233.0,0.921373,0.080690,0.272727,0.914286,0.937500,0.944444,1.000000
8.0,BEGIN_SPAN,346.0,0.884500,0.114298,0.277778,0.888889,0.914286,0.942857,1.000000
9.0,CUE_ANSWER,57.0,0.712022,0.195067,0.264706,0.600000,0.696970,0.916022,0.957000
10.0,RESPONSE,129.0,0.825799,0.196027,0.264706,0.800000,0.923714,0.944444,1.000000


In [324]:
p1_blocknum['l_conf'].mean()

block_number  block_type 
1.0           TRIAL_START    0.931108
2.0           BEGIN_SPAN     0.913083
3.0           CUE_ANSWER     0.904783
4.0           RESPONSE       0.706652
5.0           BEGIN_SPAN     0.711440
6.0           CUE_ANSWER     0.851200
7.0           RESPONSE       0.921373
8.0           BEGIN_SPAN     0.884500
9.0           CUE_ANSWER     0.712022
10.0          RESPONSE       0.825799
11.0          BEGIN_SPAN     0.759037
12.0          CUE_ANSWER     0.863183
13.0          RESPONSE       0.948947
14.0          BEGIN_SPAN     0.900418
15.0          CUE_ANSWER     0.789531
16.0          RESPONSE       0.905016
17.0          BEGIN_SPAN     0.936370
18.0          CUE_ANSWER     0.900095
19.0          RESPONSE       0.885677
20.0          BEGIN_SPAN     0.883542
21.0          CUE_ANSWER     0.863928
22.0          RESPONSE       0.922537
23.0          BEGIN_SPAN     0.922848
24.0          CUE_ANSWER     0.907850
25.0          RESPONSE       0.940643
26.0          BEGIN_SPAN

In [None]:
test = 