# Data pre-processing before inputting to JASP to get nice plots

## Load data

In [1]:
import pandas as pd

data = pd.read_csv("data.csv")
data = data.drop('Unnamed: 0', axis=1)
data

Unnamed: 0,age,gender,country,english,attention_check,d0_condition,d1_condition,d2_condition,d0_q1,d0_q2,...,d2_q5,d2_q6,d2_q7,d2_q8,d2_q9,d2_q10,d2_q11,d2_q12,d2_q13,d2_q14
0,25,male,IT,med,"[0, 1, 5]",model1_int,model2_int,model2_ext,1,1,...,5,2,1,2,3,4,2,1,3,0
1,24,female,GR,med,"[0, 1, 5]",model0_ext,model0_ext,model0_int,0,0,...,3,2,3,3,4,2,6,6,6,0
2,31,female,IT,med,"[0, 1, 5]",model0_int,model2_ext,model1_int,2,2,...,1,4,5,4,3,4,6,5,5,0
3,18,female,IT,med,"[0, 1, 5]",model2_int,model1_ext,model2_int,4,4,...,4,1,2,1,1,1,5,5,4,0
4,30,male,FI,med,"[0, 1, 5]",model1_ext,model1_int,model1_ext,0,5,...,0,1,4,1,3,4,5,5,5,0
5,23,female,ZA,high,"[0, 1, 5]",model2_ext,model0_int,model0_ext,0,2,...,0,6,6,6,6,6,6,6,6,0
6,63,female,GR,med,"[0, 5]",model1_int,model2_int,model2_ext,0,3,...,3,4,3,3,2,5,5,5,3,0
7,26,male,GR,med,"[0, 1, 5]",model0_ext,model0_ext,model0_int,3,4,...,4,3,3,2,2,3,5,4,4,0
8,33,female,PL,med,"[0, 1]",model0_int,model2_ext,model1_int,1,3,...,1,4,4,2,3,3,5,4,5,0
9,36,female,ZA,high,[5],model2_int,model1_ext,model2_int,4,5,...,3,4,2,2,2,2,6,0,4,5


## Remove failed attention checks

In [2]:
data = data[data['d0_q14'] == 0]
data = data[data['d1_q14'] == 0]
data = data[data['d2_q14'] == 0]
data['attention_check'][0]
to_drop = []
list(map(int, data['attention_check'][0][1:-1].split(',')))
for index, row in data.iterrows():
    a_c = list(map(int, row['attention_check'][1:-1].split(',')))
    if 0 in a_c and 1 in a_c:
        continue
    to_drop.append(index)
print(f'Dropped {len(to_drop)} rows')
data = data.drop(to_drop, axis=0)

Dropped 5 rows


## Data 2 JASP

In [3]:
model2name = {'model0': 'Expert', 'model1': 'GPT-3', 'model2': 'STRAP'}
condition2personality = {'ext': 'Ext', 'int': 'Int'}
questions = [f'q{i+1}' for i in range(14)]
data2jasp = pd.DataFrame()
for _, row in data.iterrows():
    essential = {'Gender': row['gender'], 'Age': row['age'], 'Country': row['country'], 'English': row['english']}
    for dialogue in [f'd{i}' for i in range(3)]:
        essential['Dialogue'] = int(dialogue[-1]) + 1
        essential['Condition'] = dialogue + "_" + row[f'{dialogue}_condition']
        essential['Model'] = model2name[row[f'{dialogue}_condition'][:6]]
        essential['Personality'] = condition2personality[row[f'{dialogue}_condition'][-3:]]
        essential['Introversion rating'] = row[[f'{dialogue}_q{i+1}' for i in range(5)]].mean()
        essential['Extraversion rating'] = row[[f'{dialogue}_q{i+6}' for i in range(5)]].mean()
        essential['Extraversion'] = (essential['Extraversion rating'] + 6 - essential['Introversion rating']) / 2
        essential['Fluency rating'] = row[[f'{dialogue}_q{i+11}' for i in range(3)]].mean()
        data2jasp = data2jasp.append(essential, ignore_index=True)
overall = []
for i, row in data2jasp.iterrows():
    score = row['Extraversion rating'] - row['Introversion rating']
    if row['Personality'] == 'Int':
        score *= -1
    score += 6
    score /= 2
    overall.append(score)
data2jasp['Personality overall'] = overall
data2jasp.to_csv("data2jasp.csv", index=False)
data2jasp

Unnamed: 0,Gender,Age,Country,English,Dialogue,Condition,Model,Personality,Introversion rating,Extraversion rating,Extraversion,Fluency rating,Personality overall
0,male,25.0,IT,med,1.0,d0_model1_int,GPT-3,Int,1.0,4.8,4.9,4.333333,1.1
1,male,25.0,IT,med,2.0,d1_model2_int,STRAP,Int,1.8,4.6,4.4,3.333333,1.6
2,male,25.0,IT,med,3.0,d2_model2_ext,STRAP,Ext,4.2,2.4,2.1,2.000000,2.1
3,female,24.0,GR,med,1.0,d0_model0_ext,Expert,Ext,0.4,5.8,5.7,6.000000,5.7
4,female,24.0,GR,med,2.0,d1_model0_ext,Expert,Ext,0.4,6.0,5.8,6.000000,5.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,female,30.0,PT,high,2.0,d1_model0_int,Expert,Int,3.4,1.8,2.2,3.666667,3.8
65,female,30.0,PT,high,3.0,d2_model0_ext,Expert,Ext,1.6,4.0,4.2,5.666667,4.2
66,female,22.0,ES,med,1.0,d0_model1_int,GPT-3,Int,2.2,4.6,4.2,6.000000,1.8
67,female,22.0,ES,med,2.0,d1_model2_int,STRAP,Int,2.0,4.2,4.1,3.333333,1.9
