## Simulate data for stroop experiment

# Simulates binary responses.  
- Numpy stands for "number Python" and deals well with the complex math we use in science
- We are taking random choice from the list `["incorrect", "correct"]`
- There is a sample size of 50, meaning we take a random choice 50 times
- We give it a probability `p` which is specified using Python's fraction notation.  Note that the correct and incorrect probabilities are out of 1, but could have been specified differently
- We'll have more corrects than incorrects in the congruent condition, so we swap the fractions in congrent and incongruent in order to simulate that

In [77]:
import numpy as np
congruent_responses = np.random.choice(["incorrect", "correct"], size=(50,), p=[1./3, 2./3])
incongruent_responses = np.random.choice(["incorrect", "correct"], size=(50,), p=[2./3, 1./3])

In [78]:
print(congruent_responses)
print(incongruent_responses)

['incorrect' 'incorrect' 'correct' 'incorrect' 'correct' 'correct'
 'correct' 'incorrect' 'correct' 'correct' 'incorrect' 'correct' 'correct'
 'correct' 'correct' 'incorrect' 'correct' 'correct' 'incorrect'
 'incorrect' 'correct' 'correct' 'incorrect' 'correct' 'incorrect'
 'incorrect' 'correct' 'incorrect' 'incorrect' 'correct' 'incorrect'
 'correct' 'correct' 'incorrect' 'incorrect' 'correct' 'correct' 'correct'
 'correct' 'incorrect' 'correct' 'incorrect' 'correct' 'correct' 'correct'
 'incorrect' 'incorrect' 'correct' 'correct' 'incorrect']
['correct' 'incorrect' 'incorrect' 'correct' 'correct' 'incorrect'
 'incorrect' 'correct' 'incorrect' 'incorrect' 'incorrect' 'correct'
 'correct' 'correct' 'correct' 'incorrect' 'correct' 'incorrect'
 'incorrect' 'correct' 'incorrect' 'incorrect' 'correct' 'incorrect'
 'correct' 'correct' 'correct' 'correct' 'incorrect' 'correct' 'incorrect'
 'correct' 'incorrect' 'incorrect' 'incorrect' 'incorrect' 'incorrect'
 'incorrect' 'incorrect' 'correct

In [79]:
proportion_correct = np.count_nonzero(congruent_responses == "correct")
number_correct

17

## Simulate reaction time data

In [80]:
import random

a = 0.005 # lowest possible reaction time
b = 6 # highest possible reaction time
x = 2.5 # mode of reaction time
reaction_time = random.triangular(a, b, 3*x - a - b)
reaction_time

1.6968456909774687

### Add that to our program

In [81]:
import numpy as np
import pandas as pd
import random


number_of_participants = 1000
a = 0.005 # lowest possible reaction time
b = 6 # highest possible reaction time
x = 2.5 # mode of reaction time


data = []

for i in range(number_of_participants):
    condition = "congruent"
    congruent_responses = np.random.choice(["incorrect", "correct"], size=(50,), p=[1./3, 2./3])  # generate responses to 50 congruent stimuli for each person
    x = 1 # mode of reaction time is lower here than in incongruent to simulate faster reactions
    reaction_time = random.triangular(a, b, 3*x - a - b)
    participant = i + 1000
    data_dictionary = {
        "participant": participant,
        "condition": condition,
        "response_correct": congruent_responses,
        "reaction_time": reaction_time
    }
    data.append(data_dictionary)
    condition = "incongruent"
    incongruent_responses = np.random.choice(["incorrect", "correct"], size=(50,), p=[2./3, 1./3]) # generate responses to 50 incongruent stimuli for each person
    x = 3 # mode of reaction time is higher here than in incongruent to simulate slower reactions
    reaction_time = random.triangular(a, b, 3*x - a - b)
    data_dictionary = {
        "participant": participant,
        "condition": condition,
        "response_correct": incongruent_responses,
        "reaction_time": reaction_time
    }
    data.append(data_dictionary)

In [82]:
data

[{'participant': 1000,
  'condition': 'congruent',
  'response_correct': array(['incorrect', 'correct', 'correct', 'correct', 'correct', 'correct',
         'correct', 'incorrect', 'correct', 'correct', 'correct', 'correct',
         'correct', 'correct', 'correct', 'correct', 'correct', 'correct',
         'incorrect', 'correct', 'incorrect', 'incorrect', 'correct',
         'incorrect', 'correct', 'incorrect', 'incorrect', 'correct',
         'correct', 'correct', 'correct', 'correct', 'incorrect', 'correct',
         'correct', 'incorrect', 'correct', 'correct', 'correct', 'correct',
         'incorrect', 'correct', 'correct', 'correct', 'incorrect',
         'correct', 'correct', 'correct', 'correct', 'incorrect'],
        dtype='<U9'),
  'reaction_time': 2.1312073446991056},
 {'participant': 1000,
  'condition': 'incongruent',
  'response_correct': array(['incorrect', 'incorrect', 'correct', 'incorrect', 'incorrect',
         'incorrect', 'correct', 'correct', 'incorrect', 'incorr

# Pandas DataFrame
A Pandas DataFrame is a Python Object that holds a set of data like a spreadsheet.

In [83]:
import pandas as pd  # it is custom to shorten pandas to pd because we'll be typing it a log
df = pd.DataFrame(data=data)  # called the dataframe df, but you can use any name you want... it's a Python object.

You can view the whole DataFrame by typing `df` as usual.  But, perhaps your dataset is large and you just want to get a sense of how it's looking.  For that we can use the `head()` method call on our dataframe.  By default it shows the first 5 rows, or you can specify the number of rows you want to see.

In [84]:
df.head()

Unnamed: 0,condition,participant,reaction_time,response_correct
0,congruent,1000,2.131207,"[incorrect, correct, correct, correct, correct..."
1,incongruent,1000,2.188675,"[incorrect, incorrect, correct, incorrect, inc..."
2,congruent,1001,0.074151,"[correct, correct, correct, correct, correct, ..."
3,incongruent,1001,2.134743,"[correct, incorrect, incorrect, incorrect, cor..."
4,congruent,1002,0.847994,"[correct, correct, correct, incorrect, correct..."


In [85]:
df.head(2)

Unnamed: 0,condition,participant,reaction_time,response_correct
0,congruent,1000,2.131207,"[incorrect, correct, correct, correct, correct..."
1,incongruent,1000,2.188675,"[incorrect, incorrect, correct, incorrect, inc..."


In [86]:
for col in df.columns: 
    print(col)

condition
participant
reaction_time
response_correct


In [87]:
df.head(10)

Unnamed: 0,condition,participant,reaction_time,response_correct
0,congruent,1000,2.131207,"[incorrect, correct, correct, correct, correct..."
1,incongruent,1000,2.188675,"[incorrect, incorrect, correct, incorrect, inc..."
2,congruent,1001,0.074151,"[correct, correct, correct, correct, correct, ..."
3,incongruent,1001,2.134743,"[correct, incorrect, incorrect, incorrect, cor..."
4,congruent,1002,0.847994,"[correct, correct, correct, incorrect, correct..."
5,incongruent,1002,1.362036,"[incorrect, incorrect, incorrect, correct, inc..."
6,congruent,1003,0.385385,"[incorrect, correct, incorrect, incorrect, cor..."
7,incongruent,1003,4.465601,"[correct, incorrect, correct, incorrect, incor..."
8,congruent,1004,-0.409676,"[correct, correct, correct, correct, correct, ..."
9,incongruent,1004,0.53384,"[incorrect, correct, correct, correct, incorre..."


Ack! our `response_correct` column has a Python list in it.  We wanted each response to be in it's own column.  That's okay.   We'll use the `apply` method on the `response_correct` column and turn the list into a series.

In [88]:
responses = df['response_correct'].apply(pd.Series)

Then we concatenate, or put the two dataframes together

In [89]:
df = pd.concat([df, responses], axis=1)
df.head()

Unnamed: 0,condition,participant,reaction_time,response_correct,0,1,2,3,4,5,...,40,41,42,43,44,45,46,47,48,49
0,congruent,1000,2.131207,"[incorrect, correct, correct, correct, correct...",incorrect,correct,correct,correct,correct,correct,...,incorrect,correct,correct,correct,incorrect,correct,correct,correct,correct,incorrect
1,incongruent,1000,2.188675,"[incorrect, incorrect, correct, incorrect, inc...",incorrect,incorrect,correct,incorrect,incorrect,incorrect,...,correct,incorrect,correct,correct,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect
2,congruent,1001,0.074151,"[correct, correct, correct, correct, correct, ...",correct,correct,correct,correct,correct,incorrect,...,correct,correct,correct,incorrect,correct,incorrect,incorrect,incorrect,incorrect,correct
3,incongruent,1001,2.134743,"[correct, incorrect, incorrect, incorrect, cor...",correct,incorrect,incorrect,incorrect,correct,correct,...,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,correct,correct,incorrect
4,congruent,1002,0.847994,"[correct, correct, correct, incorrect, correct...",correct,correct,correct,incorrect,correct,correct,...,correct,correct,correct,correct,correct,correct,correct,correct,correct,correct


Clean up by removing numpy array

In [90]:
df = df.drop('response_correct', axis=1)

In [91]:
df.head()

Unnamed: 0,condition,participant,reaction_time,0,1,2,3,4,5,6,...,40,41,42,43,44,45,46,47,48,49
0,congruent,1000,2.131207,incorrect,correct,correct,correct,correct,correct,correct,...,incorrect,correct,correct,correct,incorrect,correct,correct,correct,correct,incorrect
1,incongruent,1000,2.188675,incorrect,incorrect,correct,incorrect,incorrect,incorrect,correct,...,correct,incorrect,correct,correct,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect
2,congruent,1001,0.074151,correct,correct,correct,correct,correct,incorrect,incorrect,...,correct,correct,correct,incorrect,correct,incorrect,incorrect,incorrect,incorrect,correct
3,incongruent,1001,2.134743,correct,incorrect,incorrect,incorrect,correct,correct,incorrect,...,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,correct,correct,incorrect
4,congruent,1002,0.847994,correct,correct,correct,incorrect,correct,correct,correct,...,correct,correct,correct,correct,correct,correct,correct,correct,correct,correct


We can access our columns with data using slice notation

In [109]:
df.iloc[:, 3:53]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,incorrect,correct,correct,correct,correct,correct,correct,incorrect,correct,correct,...,incorrect,correct,correct,correct,incorrect,correct,correct,correct,correct,incorrect
1,incorrect,incorrect,correct,incorrect,incorrect,incorrect,correct,correct,incorrect,incorrect,...,correct,incorrect,correct,correct,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect
2,correct,correct,correct,correct,correct,incorrect,incorrect,incorrect,correct,correct,...,correct,correct,correct,incorrect,correct,incorrect,incorrect,incorrect,incorrect,correct
3,correct,incorrect,incorrect,incorrect,correct,correct,incorrect,incorrect,correct,incorrect,...,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,correct,correct,incorrect
4,correct,correct,correct,incorrect,correct,correct,correct,correct,correct,incorrect,...,correct,correct,correct,correct,correct,correct,correct,correct,correct,correct
5,incorrect,incorrect,incorrect,correct,incorrect,correct,incorrect,incorrect,correct,incorrect,...,incorrect,incorrect,correct,correct,incorrect,correct,incorrect,incorrect,incorrect,incorrect
6,incorrect,correct,incorrect,incorrect,correct,correct,correct,correct,correct,incorrect,...,correct,correct,incorrect,correct,correct,incorrect,incorrect,correct,correct,correct
7,correct,incorrect,correct,incorrect,incorrect,incorrect,correct,correct,correct,incorrect,...,incorrect,correct,incorrect,correct,correct,incorrect,incorrect,correct,incorrect,incorrect
8,correct,correct,correct,correct,correct,correct,correct,incorrect,incorrect,incorrect,...,correct,correct,incorrect,correct,correct,correct,correct,incorrect,incorrect,correct
9,incorrect,correct,correct,correct,incorrect,correct,incorrect,incorrect,incorrect,incorrect,...,incorrect,correct,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect


Then we can calculate proportion correct by applying a conditional count method.

In [112]:
df['proportion_correct'] = df[df.iloc[:, 3:53] == "correct"].count(axis=1) / 50
df.head()

Unnamed: 0,condition,participant,reaction_time,0,1,2,3,4,5,6,...,41,42,43,44,45,46,47,48,49,proportion_correct
0,congruent,1000,2.131207,incorrect,correct,correct,correct,correct,correct,correct,...,correct,correct,correct,incorrect,correct,correct,correct,correct,incorrect,0.74
1,incongruent,1000,2.188675,incorrect,incorrect,correct,incorrect,incorrect,incorrect,correct,...,incorrect,correct,correct,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,0.28
2,congruent,1001,0.074151,correct,correct,correct,correct,correct,incorrect,incorrect,...,correct,correct,incorrect,correct,incorrect,incorrect,incorrect,incorrect,correct,0.66
3,incongruent,1001,2.134743,correct,incorrect,incorrect,incorrect,correct,correct,incorrect,...,incorrect,incorrect,incorrect,incorrect,incorrect,incorrect,correct,correct,incorrect,0.26
4,congruent,1002,0.847994,correct,correct,correct,incorrect,correct,correct,correct,...,correct,correct,correct,correct,correct,correct,correct,correct,correct,0.7
