# Data pre-processing before inputting to JASP to get nice plots

## Load data

In [1]:
import pandas as pd

data = pd.read_csv("data_raw.csv")
data

Unnamed: 0,age,gender,country,english,attention_check,d0_condition,d1_condition,d2_condition,d0_q1,d0_q2,...,d2_q5,d2_q6,d2_q7,d2_q8,d2_q9,d2_q10,d2_q11,d2_q12,d2_q13,d2_q14
0,42,male,GR,med,"[0, 1, 2]",model0_int,model0_ext,model0_ext,4,5,...,3,3,6,4,5,3,5,5,3,0
1,27,male,GB,high,"[0, 1, 5]",model2_int,model2_ext,model1_int,3,3,...,5,1,0,1,1,1,5,4,5,0
2,26,male,PL,med,"[0, 1, 5]",model2_ext,model2_int,model0_int,1,1,...,3,3,4,4,5,4,6,6,6,0
3,32,male,AT,med,"[0, 1, 5]",model1_int,model1_ext,model2_ext,3,1,...,0,2,2,4,3,3,6,3,4,0
4,37,male,IT,med,[0],model0_ext,model1_int,model1_ext,3,2,...,2,3,2,3,3,3,4,3,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,27,male,MX,med,"[0, 1, 2]",model2_int,model1_ext,model2_int,1,1,...,1,4,5,4,2,4,2,2,2,0
172,22,male,PL,med,"[0, 1, 5]",model1_ext,model1_int,model1_ext,1,3,...,3,3,4,4,3,4,4,3,3,0
173,30,female,PT,high,"[0, 1, 5]",model2_ext,model0_int,model0_ext,1,1,...,1,3,5,4,4,4,6,5,6,0
174,22,female,ES,med,"[0, 1, 2]",model1_int,model2_int,model2_ext,2,2,...,4,2,2,1,1,2,3,1,3,0


## Remove failed attention checks

In [2]:
data = data[data['d0_q14'] == 0]
data = data[data['d1_q14'] == 0]
data = data[data['d2_q14'] == 0]
data['attention_check'][0]
to_drop = []
list(map(int, data['attention_check'][0][1:-1].split(',')))
for index, row in data.iterrows():
    a_c = list(map(int, row['attention_check'][1:-1].split(',')))
    if 0 in a_c and 1 in a_c:
        continue
    to_drop.append(index)
print(f'Dropped more than {len(to_drop)} rows')
data = data.drop(to_drop, axis=0)
data

Dropped more than 28 rows


Unnamed: 0,age,gender,country,english,attention_check,d0_condition,d1_condition,d2_condition,d0_q1,d0_q2,...,d2_q5,d2_q6,d2_q7,d2_q8,d2_q9,d2_q10,d2_q11,d2_q12,d2_q13,d2_q14
0,42,male,GR,med,"[0, 1, 2]",model0_int,model0_ext,model0_ext,4,5,...,3,3,6,4,5,3,5,5,3,0
1,27,male,GB,high,"[0, 1, 5]",model2_int,model2_ext,model1_int,3,3,...,5,1,0,1,1,1,5,4,5,0
2,26,male,PL,med,"[0, 1, 5]",model2_ext,model2_int,model0_int,1,1,...,3,3,4,4,5,4,6,6,6,0
3,32,male,AT,med,"[0, 1, 5]",model1_int,model1_ext,model2_ext,3,1,...,0,2,2,4,3,3,6,3,4,0
5,24,male,PT,med,"[0, 1, 5]",model0_int,model0_ext,model0_ext,2,2,...,2,4,5,5,5,6,5,4,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,21,female,MX,med,"[0, 1, 5]",model0_ext,model0_ext,model0_int,0,2,...,1,4,6,3,3,6,4,6,4,0
171,27,male,MX,med,"[0, 1, 2]",model2_int,model1_ext,model2_int,1,1,...,1,4,5,4,2,4,2,2,2,0
172,22,male,PL,med,"[0, 1, 5]",model1_ext,model1_int,model1_ext,1,3,...,3,3,4,4,3,4,4,3,3,0
173,30,female,PT,high,"[0, 1, 5]",model2_ext,model0_int,model0_ext,1,1,...,1,3,5,4,4,4,6,5,6,0


## Data 2 JASP

In [3]:
model2name = {'model0': 'Expert', 'model1': 'GPT-3', 'model2': 'STRAP'}
condition2personality = {'ext': 'Ext', 'int': 'Int'}
questions = [f'q{i+1}' for i in range(14)]
data4jasp = pd.DataFrame()
for _, row in data.iterrows():
    essential = {'Gender': row['gender'], 'Age': row['age'], 'Country': row['country'], 'English': row['english']}
    for dialogue in [f'd{i}' for i in range(3)]:
        essential['Dialogue'] = int(dialogue[-1]) + 1
        essential['Condition'] = dialogue + "_" + row[f'{dialogue}_condition']
        essential['Model'] = model2name[row[f'{dialogue}_condition'][:6]]
        essential['Personality condition'] = condition2personality[row[f'{dialogue}_condition'][-3:]]
        essential['Extraversion'] = (row[[f'{dialogue}_q{i+6}' for i in range(5)]].mean() + 6 
                                     - row[[f'{dialogue}_q{i+1}' for i in range(5)]].mean()) / 2
        essential['Fluency'] = row[[f'{dialogue}_q{i+11}' for i in range(3)]].mean()
        data4jasp = pd.concat([data4jasp, pd.DataFrame([essential])], ignore_index=True)
overall = []
data4jasp.to_csv("data4jasp.csv", index=False)
data4jasp

Unnamed: 0,Gender,Age,Country,English,Dialogue,Condition,Model,Personality condition,Extraversion,Fluency
0,male,42,GR,med,1,d0_model0_int,Expert,Int,2.4,3.666667
1,male,42,GR,med,2,d1_model0_ext,Expert,Ext,5.7,4.666667
2,male,42,GR,med,3,d2_model0_ext,Expert,Ext,4.2,4.333333
3,male,27,GB,high,1,d0_model2_int,STRAP,Int,3.2,1.000000
4,male,27,GB,high,2,d1_model2_ext,STRAP,Ext,4.6,1.000000
...,...,...,...,...,...,...,...,...,...,...
418,female,30,PT,high,2,d1_model0_int,Expert,Int,2.2,3.666667
419,female,30,PT,high,3,d2_model0_ext,Expert,Ext,4.2,5.666667
420,female,22,ES,med,1,d0_model1_int,GPT-3,Int,4.2,6.000000
421,female,22,ES,med,2,d1_model2_int,STRAP,Int,4.1,3.333333
