# Signal Detection Theory for Emotional Faces

## Import modules

In [1]:
import pandas as pd
import numpy as np

## Explore the data

Description of variables:
Stimulus Level (ground truth): 1: 100% fearful, 2: 70% fearful, 3: 60% fearful, 4: 50% fearful, 5: 40% fearful, 6: 30% fearful, 7: 0% fearful.
* 1 = fearful (signal, 100% fearful)
* 0 = NOT fearful (noise, 0 % fearful)
    * change 7s to 0s

Response (decision): 1: judging face as fear (signal), 2: judging face as happy (noise, aka NOT fear), NaN: missing button press.
* 1 = judge fearful (signal)
* 0 = judge NOT fearful (noise)
    * change 2s to 0s

Confidence scale: 3: very sure, 2: sure, 1: unsure

Clean data:
* Remove RT and Confidence columns
* Remove NaN and 30-70% rows
* Split patient groups into Epilepsy and Lesion

In [2]:
df = pd.read_csv('data_Wang_2017_NatComm.csv')
df.head()

Unnamed: 0,Subj_idx,Stimulus,Response,Confidence,RT_dec,RT_conf,Group
0,p26CS_121211,5,2.0,3.0,0.70448,0.39629,Epilepsy
1,p26CS_121211,3,1.0,3.0,0.25075,0.068314,Epilepsy
2,p26CS_121211,4,1.0,3.0,0.12543,0.000375,Epilepsy
3,p26CS_121211,3,1.0,3.0,0.22883,0.00038,Epilepsy
4,p26CS_121211,1,1.0,3.0,0.001323,2.0,Epilepsy


In [3]:
len(df)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4880 entries, 0 to 4879
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Subj_idx    4880 non-null   object 
 1   Stimulus    4880 non-null   int64  
 2   Response    4650 non-null   float64
 3   Confidence  4583 non-null   float64
 4   RT_dec      4650 non-null   float64
 5   RT_conf     4583 non-null   float64
 6   Group       4880 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 267.0+ KB


## Data cleaning

In [4]:
df.duplicated().sum()

160

In [5]:
df = df.dropna()
df.isnull().sum()

Subj_idx      0
Stimulus      0
Response      0
Confidence    0
RT_dec        0
RT_conf       0
Group         0
dtype: int64

In [6]:
df['Response'] = df['Response'].apply(np.int64) #convert variable to int
len(df)

4583

In [7]:
df = df.drop(['RT_dec', 'RT_conf', 'Confidence'], axis=1) #drop columns

# drop rows where fearful is =30-70%
df = df.drop(df[df['Stimulus'] == 2].index)
df = df.drop(df[df['Stimulus'] == 3].index)
df = df.drop(df[df['Stimulus'] == 4].index)
df = df.drop(df[df['Stimulus'] == 5].index)
df = df.drop(df[df['Stimulus'] == 6].index)

# recode the stimulus and response options to 0s and 1s
# change Stimulus 7s to 0s (noise)
df['Stimulus'] = df['Stimulus'].replace(to_replace=7, value=0)

# change Response 2s to 0s (noise)
df['Response'] = df['Response'].replace(to_replace=2, value=0)

# drop non-epilepsy conditions ?? take this out???
# df = df.drop(df[df['Group'] != 'Epilepsy'].index)

# make sure that all hits, FAs, misses, and CRs add up to this value in the new df
len(df)

668

## SDT

### Calculate SDT measures for both groups

* hit when Response==1 & Stimulus==1
* fa when Response==1 & Stimulus==0
* miss when Response==0 & Stimulus==1
* cr when Response==0 & Stimulus==0

* Group 1: Epilepsy
* Group 2: Lesion

In [8]:
# split groups into 2 dataframes
df_epilepsy = df[df['Group']=='Epilepsy']
df_lesion = df[df['Group']=='Lesion']

print(len(df_epilepsy), len(df_lesion))
print((len(df_epilepsy) + len(df_lesion)) == len(df)) #make sure it matches

# calculate hits
hit_epilepsy = np.sum((df_epilepsy['Response']==1) & (df_epilepsy['Stimulus']==1))
hit_lesion = np.sum((df_lesion['Response']==1) & (df_lesion['Stimulus']==1))

# calculate FAs
fa_epilepsy = np.sum((df_epilepsy['Response']==1) & (df_epilepsy['Stimulus']==0))
fa_lesion = np.sum((df_lesion['Response']==1) & (df_lesion['Stimulus']==0))

# calculate misses
miss_epilepsy = np.sum((df_epilepsy['Response']==0) & (df_epilepsy['Stimulus']==1))
miss_lesion = np.sum((df_lesion['Response']==0) & (df_lesion['Stimulus']==1))

# calculate CRs
cr_epilepsy = np.sum((df_epilepsy['Response']==0) & (df_epilepsy['Stimulus']==0))
cr_lesion = np.sum((df_lesion['Response']==0) & (df_lesion['Stimulus']==0))

576 92
True


### Make SDT dataframe

In [9]:
outcomes = {
    'hit' : [hit_epilepsy, hit_lesion], 
    'fa' : [fa_epilepsy, fa_lesion],
    'miss' : [miss_epilepsy, miss_lesion],
    'cr' : [cr_epilepsy, cr_lesion] }

# create SDT dataframe
df_sdt = pd.DataFrame(outcomes, columns = ['hit', 'fa', 'miss', 'cr'])
df_sdt

Unnamed: 0,hit,fa,miss,cr
0,283,6,6,281
1,46,0,0,46


## Save file

In [10]:
# write data out to a csv file
filename = 'COGS107_newWang.csv'
df_sdt.to_csv(filename, index=False) 
print('Data saved successfully to', filename)

Data saved successfully to COGS107_newWang.csv
