In [130]:
import numpy as np
import pandas as pd
import statsmodels as sm

In [2]:
pilot_data = pd.read_csv("../data/pilot_data.csv", skiprows=[1, 2])

In [3]:
data = pilot_data[['GasTax', 'CarbTax',
       'Treaty', 'RegCarb', 'political_views', 'party_id', 'party_id.1',
       'party_id.2', 'QID74', 'ScientificConfidence', 'RewardConsequence ',
       'Attention_Check_1', 'Religiosity', 'Economic_Reasoning',
       'Attention_Check_2', 'prosociality_1', 'prosociality_2',
       'prosociality_3', 'prosociality_4', 'prosociality_5', 'prosociality_6',
       'prosociality_7', 'prosociality_8', 'prosociality_9', 'GasTax_after',
       'CarbTax_after', 'Treaty_after', 'RegCarb_after', 'treatment_value']]

In [4]:
# filter responses based on 2 attention checks
data = data.loc[(data["Attention_Check_1"] == "Strongly like") &
                (data["Attention_Check_2"] == '1,3'), :]

In [80]:
# 186 observations
data.shape, pilot_data.shape

((186, 33), (202, 52))

In [7]:
# method 1: "main_party_id" -- consolidate Independent and No preference (should ask the other data group)
data.loc[:, "main_party_id"] = data["party_id"]
data.loc[(data["party_id"] == "Independent") | (data["party_id"] == "No preference"), "main_party_id"] = "Independent_nopref"

In [10]:
data.main_party_id.value_counts()

main_party_id
Democrat              86
Independent_nopref    72
Republican            28
Name: count, dtype: int64

In [22]:
# method 2: "party" -- group by Democrat/Republican-leaning, then include or exclude pure Independents/no preference
data.loc[(data["party_id"] == "Democrat") | (data["QID74"] == 2), "party"] = "D"
data.loc[(data["party_id"] == "Republican") | (data["QID74"] == 4), "party"] = "R"
data.loc[(data["QID74"] == 3), "party"] = "I"

In [24]:
data.party.value_counts()

party
D    117
R     37
I     32
Name: count, dtype: int64

In [36]:
# average policy support (in [0, 3])
data["avg_policy_support"] = data[['GasTax_after', 'CarbTax_after',
                                   'Treaty_after', 'RegCarb_after']].mean(axis=1)

In [76]:
# not sure which treatment conditions map to which values but for now assuming 0
# represents no treatment, the rest are in order of the framings on the document
treatments = {0: "No framing",
              1: "Positive science",
              2: "Negative science",
              3: "Religious",
              4: "Secular",
              5: "Equity",
              6: "Efficiency"}
data["treatment_frame"] = data["treatment_value"].map(treatments)

In [123]:
# distribution of subjects across treatment conditions (like Table 1 from paper)
# N = 186
treatment_freq = data[["treatment_value", "treatment_frame"]].value_counts()
treatment_rel_freq = data["treatment_frame"].value_counts(normalize=True)
treatment_freq.to_frame().sort_index().join(treatment_rel_freq)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,proportion
treatment_value,treatment_frame,Unnamed: 2_level_1,Unnamed: 3_level_1
0,No framing,22,0.11828
1,Positive science,22,0.11828
2,Negative science,18,0.096774
3,Religious,35,0.188172
4,Secular,30,0.16129
5,Equity,32,0.172043
6,Efficiency,27,0.145161


**1. What is the mean response under each of the different framings, on average, and separately for dems/republicans?**

Difference in means

- A simple table of the average climate policy support under each of the different framings, with standard errors.  
- Average policy support for different framing separated by political position

Regression analysis

- Basic regression analysis of framing’s impact on policy position, controlling for de-meaned covariates and de-meaned covariates + treatment interactions. Use robust standard errors.
- Include pre-test response as a control, and list other controls based on the data quality group's coding. 
- We should use the Lin estimator, as we did in week 3, where we de-mean all covariates, and then control for covariates and covariate-treatment interactions.

In [124]:
pd.pivot_table(data, values=["avg_policy_support"],
               index=["treatment_value","treatment_frame"],
               aggfunc=['mean', 'sem'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,sem
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_policy_support,avg_policy_support
treatment_value,treatment_frame,Unnamed: 2_level_2,Unnamed: 3_level_2
0,No framing,1.590909,0.167378
1,Positive science,1.795455,0.186559
2,Negative science,1.972222,0.238192
3,Religious,1.857143,0.109566
4,Secular,1.916667,0.115801
5,Equity,1.953125,0.137224
6,Efficiency,1.990741,0.124809


In [125]:
# temporarily using method 2 (Independents are D/R-leaning or pure Independent)
# including pure Independents
pd.pivot_table(data, values=["avg_policy_support"],
               index=["party", "treatment_value", "treatment_frame"], aggfunc=['mean', 'sem'])

# SE for avg policy support is NaN: only one person with party I that was assigned treatment 2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,sem
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,avg_policy_support,avg_policy_support
party,treatment_value,treatment_frame,Unnamed: 3_level_2,Unnamed: 4_level_2
D,0,No framing,1.875,0.161019
D,1,Positive science,2.295455,0.088022
D,2,Negative science,2.461538,0.126105
D,3,Religious,1.99,0.122882
D,4,Secular,2.029412,0.142009
D,5,Equity,2.222222,0.106736
D,6,Efficiency,2.078947,0.156549
I,0,No framing,1.3,0.382426
I,1,Positive science,0.8125,0.344223
I,2,Negative science,1.75,


In [126]:
# excluding pure Independents
pd.pivot_table(data.loc[data["party"] != "I"], values=["avg_policy_support"],
               index=["party", "treatment_value", "treatment_frame"],
               aggfunc=['mean', 'sem'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,sem
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,avg_policy_support,avg_policy_support
party,treatment_value,treatment_frame,Unnamed: 3_level_2,Unnamed: 4_level_2
D,0,No framing,1.875,0.161019
D,1,Positive science,2.295455,0.088022
D,2,Negative science,2.461538,0.126105
D,3,Religious,1.99,0.122882
D,4,Secular,2.029412,0.142009
D,5,Equity,2.222222,0.106736
D,6,Efficiency,2.078947,0.156549
R,0,No framing,0.75,0.520416
R,1,Positive science,1.571429,0.403535
R,2,Negative science,0.4375,0.4375


Regression analysis

- Basic regression analysis of framing’s impact on policy position, controlling for de-meaned covariates and de-meaned covariates + treatment interactions. Use robust standard errors.
- Include pre-test response as a control, and list other controls based on the data quality group's coding. 
- We should use the Lin estimator, as we did in week 3, where we de-mean all covariates, and then control for covariates and covariate-treatment interactions.

In [195]:
# demean covariates of interest (need to ask about this)
# possible covariates: political party, political views, scientific confidence,
# religious, economic reasoning

# party ID
party_id = data.loc[:, "party_id.1"]
party_cov = party_id.fillna(data["party_id.2"]).fillna(data["QID74"])
demeaned_party_cov = party_cov - party_cov.mean()

In [185]:
# scientific confidence
sci_conf = data.loc[:, "ScientificConfidence"]

# 1 NaN value -- dropping for now
demeaned_sci_conf_cov = data["ScientificConfidence"] - data["ScientificConfidence"].dropna().mean()

In [192]:
# religiosity
religiosity = data.loc[:, "Religiosity"]

# 1 NaN value -- dropping for now
demeaned_religiosity_cov = data["Religiosity"] - data["Religiosity"].mean()

In [196]:
# political views
pol_views = data.loc[:, "political_views"]

# 1 NaN value -- dropping for now
demeaned_pol_views_cov = data["political_views"] - data["political_views"].dropna().mean()

**2. Does the framing that is best for people on average statistically outperform the control?**

Procedure for estimating effect of policy that is best on average:

- Split the data into two random folds.
- In each fold, using separate regression adjusted estimates, determine which treatment had the largest treatment effect. 
- Create a new variable which is an indicator for being in the best condition as determined by the opposite fold (i.e., if an observation is in fold 1, is it in the condition with the highest treatment effect in fold 2?). 
- Estimate the treatment effect of being in this condition as compared to the control using the regression-adjusted estimator. 

**3. Does the best personalized assignment (i.e., where we give everyone the framing we think is best for them) outperform the framing that is best on average?**

- Use the same random folds, and the same best on average treatment from the previous part. 
- In each fold, fit a random forest under each treatment condition (so there will be 7 random forests in each fold). 
- For each observation, predict outcomes under each of the treatment conditions using the random forests from the opposite fold. 
- Create a new variable which is an indicator for being in the best personalized condition as determined by the opposite fold. 
- Estimate the treatment effect of being in this condition as compared to the control using the regression-adjusted estimator. 