In [1]:
import pandas as pd
data_phil = pd.read_csv('Raw_Data_StudentPop_DataPhil.csv')

In [2]:
############## Cleaning up the columns #####################

#Dropping the first two rows because they are column descriptions

data_phil.drop([0,1], axis = 0, inplace= True)
#data_phil.head()

#Dropping unnecessary columns that Qualtrics adds

data_phil.drop(data_phil.loc[:, 'StartDate':'Duration (in seconds)'].columns, axis =1 , inplace= True)
data_phil.drop(data_phil.loc[:, 'RecordedDate' : 'Intro'].columns, axis =1 , inplace= True)
data_phil.drop(['Q11', 'Q10_3.1', 'Q10_2.1', 'Q9', 'Q8', 'Q7', 'Q7_7_TEXT', 'Q5', 'Q5_4_TEXT'], axis = 1, inplace = True)

#list(data_phil.columns)

In [3]:
############## Renaming the columns and making sure the data type is the correct one #################

#Renaming columns depending on scale from where it was taken
data_phil.set_axis(['Finished', 'T_P_1', 'T_P_2', 'T_P_3', 'T_P_4', 'FI_1', 'FI_2', 'FI_3', 'FA_1', 'FA_2', 'FA_3', 'Atten', 'FA_4', 'PC_1', 'PC_2', 'PC_3', 'PC_4', 'WOM_1', 'WOM_2', 'WOM_3', 'DU_1', 'DU_2', 'DU_3', 'DU_4', 'EV_1', 'EV_2', 'EV_3', 'EV_4', 'C_1', 'C_2', 'C_3', 'C_4', 'Mani', 'T_N_1', 'T_N_2', 'T_N_3', 'T_N_4', 'T_N_5', 'Fam', 'CTM', 'Age', 'Condition'], axis = 'columns', inplace= True)

#Converting the data from Object to Float to do calculations
data_phil = data_phil.astype({'T_P_1' : 'float','T_P_2' : 'float', 'T_P_3' : 'float', 'T_P_4' : 'float', 'FI_1' : 'float','FI_2' : 'float', 'FI_3' : 'float', 'FA_1' : 'float', 'FA_2' : 'float','FA_3' : 'float', 'Atten' : 'float', 'FA_4' : 'float', 'PC_1' : 'float', 'PC_2' : 'float', 'PC_3' : 'float', 'PC_4' : 'float', 'WOM_1' : 'float', 'WOM_2' : 'float', 'WOM_3' : 'float', 'DU_1' : 'float', 'DU_2' : 'float', 'DU_3' : 'float', 'DU_4' : 'float', 'EV_1' : 'float', 'EV_2' : 'float', 'EV_3' : 'float', 'EV_4' : 'float', 'C_1' : 'float', 'C_2' : 'float', 'C_3' : 'float', 'C_4' : 'float', 'Mani' : 'float', 'T_N_1' : 'float', 'T_N_2' : 'float', 'T_N_3' : 'float', 'T_N_4' : 'float', 'T_N_5' : 'float', 'Fam' : 'float', 'CTM' : 'str', 'Age' : 'float'})

In [4]:
################### Recoding variables ########################

#Creating two variables to establish the experiment conditions - Qualtrics downloads it as one field.

data_phil.loc[data_phil['Condition'] == 'Control/Trust', "Trust"] = 1
data_phil.loc[data_phil['Condition'] == 'Control/NoTrust', "Trust"] = 0
data_phil.loc[data_phil['Condition'] == 'NoControl/Trust', "Trust"] = 1
data_phil.loc[data_phil['Condition'] == 'NoControl/NoTrust', "Trust"] = 0
data_phil.loc[data_phil['Condition'] == 'Control/Trust', "Control"] = 1
data_phil.loc[data_phil['Condition'] == 'NoControl/Trust', "Control"] = 0
data_phil.loc[data_phil['Condition'] == 'Control/NoTrust', "Control"] = 1
data_phil.loc[data_phil['Condition'] == 'NoControl/NoTrust', "Control"] = 0

#Creating a variable that has a value of 1 if the subjects did not fail the manipulation check, and 0 if the subjects failed the manipulation check 

data_phil.loc[data_phil['Mani'] == 2, "Mani_Fail"] = 0
data_phil.loc[data_phil['Mani'] != 2, "Mani_Fail"] = 1

In [5]:
################## Cleaning the data and removing the unfinished records and the subjects who failed the attention check ###########

#Dropping the rows of the subjects who did not finish the survey

data_phil.drop(data_phil[data_phil['Finished'] == '0'].index, inplace= True)

#Checking the value count of the variable 'Finished' to see how many subjects finished the survey

print (data_phil['Finished'].value_counts())

#Dropping the rows of the subjects who failed the attention check. (i.e., they are straightlining). The attention check serves as a control measure that highlights those subjects that do not read the questions. 

data_phil.drop (data_phil[data_phil['Atten'] != 2 ].index, inplace= True)

#Dropping the subjects who failed the manipulation check (i.e., do not know the company donated data)

#data_phil.drop (data_phil[data_phil['Mani'] != 2].index, inplace = True)

len(data_phil.index)

1    95
Name: Finished, dtype: int64


80

In [6]:
#Deleting the data that has a 'Finished' value of 1 but all other values are NaN. 

columns= ['T_P_1', 'T_P_2', 'T_P_3', 'T_P_4', 'FI_1', 'FI_2', 'FI_3', 'FA_1', 'FA_2', 'FA_3', 'Atten', 'FA_4', 'PC_1', 'PC_2', 'PC_3', 'PC_4', 'WOM_1', 'WOM_2', 'WOM_3', 'DU_1', 'DU_2', 'DU_3', 'DU_4', 'EV_1', 'EV_2', 'EV_3', 'EV_4', 'C_1', 'C_2', 'C_3', 'C_4', 'Mani', 'T_N_1', 'T_N_2', 'T_N_3', 'T_N_4', 'T_N_5', 'Fam', 'CTM']

data_phil.dropna(subset = columns, inplace= True)

#resetting the index for the data frame

data_phil.reset_index(inplace = True, drop= True)

#Added because I forgot to include the drop arugument in the reset_index function. 
#data_phil.drop("index", axis=1, inplace = True)

In [7]:
#checking number of rows after cleaning up the data

len(data_phil.index)

80

In [8]:
#creating the variables

#Creating Av_TP which is the average for the trust for the forprofits scale 

data_phil['Av_TP'] = data_phil[['T_P_1', 'T_P_2', 'T_P_3', 'T_P_4' ]].mean(axis = 1).round(2)

#Creating Av_FI which is the average for the false information scale 

data_phil['Av_FI'] = data_phil[['FI_1', 'FI_2', 'FI_3' ]].mean(axis = 1).round(2)

#Creating Av_FA which is the average for the Fairness scale 

data_phil['Av_FA'] = data_phil[['FA_1', 'FA_2', 'FA_3', 'FA_4' ]].mean(axis = 1).round(2)

#Creating Av_PC which is the average for the privacy concerns scale 

data_phil['Av_PC'] = data_phil[['PC_1', 'PC_2', 'PC_3', 'PC_4' ]].mean(axis = 1).round(2)

#Creating Av_WOM which is the average for the negative WOM scale 

data_phil['Av_WOM'] = data_phil[['WOM_1', 'WOM_2', 'WOM_3']].mean(axis = 1).round(2)

#Creating Av_EV which is the average for the emotional violation scale 

data_phil['Av_EV'] = data_phil[['EV_1', 'EV_2', 'EV_3', 'EV_4' ]].mean(axis = 1).round(2)

#Creating Av_DU which is the average for the Data use transparency scale 

data_phil['Av_DU'] = data_phil[['DU_1', 'DU_2', 'DU_3', 'DU_4' ]].mean(axis = 1).round(2)

#Creating Av_C which is the average for the Data control perceptions scale 

data_phil['Av_C'] = data_phil[['C_1', 'C_2', 'C_3', 'C_4' ]].mean(axis = 1).round(2)

#Creating Av_TN which is the average for the nonprofit trust scale 

data_phil['Av_TN'] = data_phil[['T_N_1', 'T_N_2', 'T_N_3', 'T_N_4' ]].mean(axis = 1).round(2)

In [9]:
#Writing the clean data- ready for analysis- into a new CSV file
data_phil.to_csv('clean data/data_phil_clean_103122.csv')