### Prelude:

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv("./../data/SOSC 13300 Severson and Coleman Extension Survey_April 22, 2024_09.12.csv", skiprows=[1, 2])


### Data Cleaning:

In [3]:
#drop rows where survey was not completed
dataClean = data[data['Finished'] == True]

#from Tiffanie's code
dataClean = dataClean[['GasTax', 'CarbTax',
       'Treaty', 'RegCarb', 'political_views', 'party_id', 'party_id.1',
       'party_id.2', 'QID74', 'ScientificConfidence', 'RewardConsequence ',
       'Attention_Check_1', 'Religiosity', 'Economic_Reasoning',
       'Attention_Check_2', 'prosociality_1', 'prosociality_2',
       'prosociality_3', 'prosociality_4', 'prosociality_5', 'prosociality_6',
       'prosociality_7', 'prosociality_8', 'prosociality_9', 'GasTax_after',
       'CarbTax_after', 'Treaty_after', 'RegCarb_after', 'treatment_value']]

#drop rows where participant failed attention check
dataClean = dataClean[(dataClean['Attention_Check_1'] == 'Strongly like') & (dataClean['Attention_Check_2'] == '1,3')]

In [4]:
#making complete party_id number (merging party_id columns)

#making columns into integers
dataClean['QID74'] = dataClean['QID74'].replace(float('nan'), 0).astype(int)
dataClean['party_id.1'] = dataClean['party_id.1'].replace(float('nan'), 0).astype(int)
dataClean['party_id.2'] = dataClean['party_id.2'].replace(float('nan'), 0).astype(int)

dataClean['party_id_merged'] = dataClean['QID74'] + dataClean['party_id.1'] + dataClean['party_id.2']

In [5]:
#making average climate support column (mean of Gas Tax, Carbon Tax, and Renewable Energy)
columns = ['GasTax', 'CarbTax', 'Treaty', 'RegCarb', 'GasTax_after', 'CarbTax_after', 'Treaty_after', 'RegCarb_after']

dataClean['mean_climate_support_before'] = dataClean[['GasTax', 'CarbTax', 'Treaty', 'RegCarb']].mean(axis=1)
dataClean['mean_climate_support_after'] = dataClean[['GasTax_after', 'CarbTax_after', 'Treaty_after', 'RegCarb_after']].mean(axis=1)
dataClean['mean_climate_support_change'] = dataClean['mean_climate_support_after'] - dataClean['mean_climate_support_before']

In [6]:
#Framing Column (from Tiffanie's code)
treatments = {0: "No framing",
              1: "Positive science",
              2: "Negative science",
              3: "Religious",
              4: "Equity",
              5: "Efficiency",
              6: "Secular"}
dataClean["treatment_frame"] = dataClean["treatment_value"].map(treatments)

In [8]:
#save cleaned data
dataClean.to_csv('./../data/cleaned_data.csv', index=False)