# Preprocessing of data before feeding it to JASP

In [1]:
# Load data
import pandas as pd
data = pd.read_csv('data_raw.csv')

In [2]:
# Remove failed attention checks
to_drop = []
for index, row in data.iterrows():
    if row['attention_check_name'] != 4 and row['attention_check_topic'] != 1:
        to_drop.append(index)
        continue
    if row['own_personality_q5'] != 6 and row['rosas_q7'] != 0 and row['personality_q14'] != 6:
        to_drop.append(index)
        
print(f'Dropped {len(to_drop)} rows')
data = data.drop(to_drop, axis=0)
data

Dropped 11 rows


Unnamed: 0,age,gender,country,english,condition,attention_check_name,attention_check_topic,own_personality_q1,own_personality_q2,own_personality_q3,...,rosas_q4,rosas_q5,rosas_q6,rosas_q7,rosas_q8,rosas_q9,rosas_q10,rosas_q11,rosas_q12,rosas_q13
0,19,female,ZA,high,strap_ext_yes,2,1,4,2,4,...,0,1,0,0,3,3,3,2,3,3
1,32,female,ZA,high,strap_int_yes,4,1,4,4,5,...,1,1,1,0,2,3,3,2,2,2
2,22,female,EE,med,gpt_int_yes,4,1,5,4,4,...,2,4,2,0,0,1,0,0,0,0
3,23,female,PT,med,gpt_ext_yes,2,1,5,3,4,...,0,3,1,0,1,2,1,2,2,1
4,39,male,CA,high,strap_ext_yes,2,1,4,4,5,...,2,2,1,0,2,4,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,20,male,PL,med,expert_ext_yes,4,5,4,3,2,...,2,3,1,0,0,2,0,1,1,0
174,33,female,IT,none,expert_ext_yes,4,5,4,1,1,...,1,0,0,0,1,2,2,0,0,0
175,26,female,ZA,high,expert_ext_yes,4,1,4,5,4,...,0,1,0,0,0,1,0,0,0,0
176,25,male,PL,med,expert_ext_yes,4,0,4,2,3,...,0,1,0,0,1,2,1,0,0,0


In [3]:
# Group measures
data4jasp = pd.DataFrame()
data4jasp[['Age', 'Gender', 'English']] = data[['age', 'gender', 'english']]
data4jasp['Model'] = data['condition'].apply(lambda x: {"gpt": "GPT-3", "strap": "STRAP", "expert": "Expert"}[x.split("_")[0]])
data4jasp['Personality condition'] = data['condition'].apply(lambda x: "Ext" if x.split("_")[1] == "ext" else "Int")
data4jasp['Condition'] = data4jasp.apply(lambda x: x['Model'] + " " + x['Personality condition'], axis=1)
data4jasp['Own introversion'] = data[[f'own_personality_q{i+1}' for i in range(4)]].mean(axis=1)
data4jasp['Own extraversion'] = data[[f'own_personality_q{i+6}' for i in range(4)]].mean(axis=1)
data4jasp['Own personality'] = 'Ext'
data4jasp['Own personality'][data4jasp['Own extraversion'] < data4jasp['Own introversion']] = 'Int'
data4jasp.drop("Own introversion", axis=1, inplace=True)
data4jasp.drop("Own extraversion", axis=1, inplace=True)
data4jasp['Condition and own personality'] = data4jasp.apply(lambda x: x['Condition'] + " " + x['Own personality'], axis=1)
data4jasp['Personality matching'] = data4jasp['Own personality'] == data4jasp['Personality condition']
data4jasp['Extraversion'] = (data[[f'personality_q{i+6}' for i in range(5)]].mean(axis=1) + 6 
                             - data[[f'personality_q{i+1}' for i in range(5)]].mean(axis=1)) / 2
data4jasp['Fluency'] = data[[f'personality_q{i+11}' for i in range(3)]].mean(axis=1)
data4jasp['Godspeed anthropomorphism'] = data[[f'godspeed_q{i+1}' for i in range(5)]].mean(axis=1)
data4jasp['Godspeed likeability'] = data[[f'godspeed_q{i+6}' for i in range(5)]].mean(axis=1)
data4jasp['Godspeed safety'] = data[[f'godspeed_q{i+11}' for i in range(3)]].mean(axis=1)
data4jasp['RoSAS warmth'] = data[[f'rosas_q{i+1}' for i in range(6)]].mean(axis=1)
data4jasp['RoSAS discomfort'] = data[[f'rosas_q{i+8}' for i in range(6)]].mean(axis=1)
data4jasp.to_csv('data4jasp.csv', index=False)
data4jasp

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data4jasp['Own personality'][data4jasp['Own extraversion'] < data4jasp['Own introversion']] = 'Int'


Unnamed: 0,Age,Gender,English,Model,Personality condition,Condition,Own personality,Condition and own personality,Personality matching,Extraversion,Fluency,Godspeed anthropomorphism,Godspeed likeability,Godspeed safety,RoSAS warmth,RoSAS discomfort
0,19,female,high,STRAP,Ext,STRAP Ext,Ext,STRAP Ext Ext,True,5.6,6.000000,1.8,2.8,3.000000,1.333333,2.833333
1,32,female,high,STRAP,Int,STRAP Int,Int,STRAP Int Int,True,3.6,2.666667,2.0,3.0,3.333333,1.333333,2.333333
2,22,female,med,GPT-3,Int,GPT-3 Int,Int,GPT-3 Int Int,True,3.1,5.666667,4.0,5.0,2.333333,2.833333,0.166667
3,23,female,med,GPT-3,Ext,GPT-3 Ext,Ext,GPT-3 Ext Ext,True,4.4,4.333333,3.0,3.4,2.333333,1.666667,1.500000
4,39,male,high,STRAP,Ext,STRAP Ext,Int,STRAP Ext Int,False,4.5,5.000000,2.6,3.4,2.666667,2.166667,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,20,male,med,Expert,Ext,Expert Ext,Ext,Expert Ext Ext,True,4.9,5.333333,2.0,3.2,2.666667,2.333333,0.666667
174,33,female,none,Expert,Ext,Expert Ext,Ext,Expert Ext Ext,True,2.2,2.000000,2.0,3.2,4.000000,0.666667,0.833333
175,26,female,high,Expert,Ext,Expert Ext,Ext,Expert Ext Ext,True,3.9,6.000000,2.0,4.4,2.000000,1.666667,0.166667
176,25,male,med,Expert,Ext,Expert Ext,Ext,Expert Ext Ext,True,4.4,5.666667,1.8,3.8,2.333333,1.166667,0.666667
