In [1]:
import pandas as pd

In [2]:
# load data for semiotics and linguistics seperately

df_sem_raw = pd.read_csv("https://raw.githubusercontent.com/ethanweed/ExPsyLing/master/datasets/Lexical-decision/2021/semiotics_2021_raw.csv")
df_ling_raw = pd.read_csv("https://raw.githubusercontent.com/ethanweed/ExPsyLing/master/datasets/Lexical-decision/2021/linguistics_2021_raw.csv")

In [3]:
# add a "group" column to index semiotics and linguistics data

df_sem_raw = df_sem_raw.assign(group = ['semiotics']*df_sem_raw.shape[0])
df_ling_raw = df_ling_raw.assign(group = ['linguistics']*df_ling_raw.shape[0])

# combine semiotics and linguistics data
df = pd.concat([df_sem_raw, df_ling_raw], ignore_index=True)

In [4]:
# define a function to clean up the data:
# 1. remove unnecessary columns
# 2. add a column indexing whether the answer was correct or incorrect (and make this a string, not a bool)
# 3. remove practice runs

def clean_data(df):
    df_clean = pd.DataFrame(
    {'participantID': df['jatosStudyResultId'],
     'group': df['group'],
     'block': df['block'],
     'condition': df['condition'],
     'stimulus': df['stim'],
     'correct_response': df['correct_response'],
     'response': df['response'],
     'rt': df['response_time']
    })
    
    # add column for correct / incorrect responses
    df_clean['correct_response'] = [x.lower() for x in list(df_clean['correct_response'])]
    correct = list(df_clean['correct_response'] == df_clean['response'])
    correct = ["correct" if x == True else "incorrect" for x in correct]
    df_clean.insert(loc = 6, column = 'correct', value = correct)
    
    
    # remove practice trials
    df_clean = df_clean[df_clean['block'] != 'practice']
    
    return(df_clean)
    




In [5]:
data = clean_data(df)
data.head()

Unnamed: 0,participantID,group,block,condition,stimulus,correct_response,correct,response,rt
6,239,semiotics,C,Unrelated,peg-yolk,left,incorrect,right,1543
7,239,semiotics,C,Filler,afraid-scobet,right,correct,right,2268
8,239,semiotics,C,Nonword,villane-towp,right,correct,right,1830
9,239,semiotics,C,Nonword,chorch-steeble,right,incorrect,left,1960
10,239,semiotics,C,Nonword,eas-jood,right,correct,right,1263
