## Using new demeaned outcomes to get rid of individual ozone fixed effects

In [None]:
# DD regression 1
dependent_vars = ["demeaned_ln_ozone_max", "demeaned_ln_8hr_ozone_max"]
treatment_vars = ["treat_rvpI", "treat_rvpII", "treat_rfg", "treat_CARB"]

In [None]:
results2 = {}
for i in range(len(dependent_vars)):
    
    dependent_var = dependent_vars[i]
    print("working on dependent, " + dependent_var)
    results2[dependent_var] = {}
    
    for j in range(len(treatment_vars)):
        
        treatment_var = treatment_vars[j]
        print("working on treatment, " + treatment_var)
        
        model = smf.ols(f"{dependent_var}~ 1 + {treatment_var} + C(year)*C(census_region)", data = summer_only).fit() # ozone fixed effects are taken care of by demeaning
        
        values = {"Point Estimate":model.params[treatment_var], 
                  "Standard Error":model.bse[treatment_var],
                  "P-Value":model.pvalues[treatment_var],
                  "R Squared":model.rsquared}
        results2[dependent_var][treatment_var] = values


In [None]:
result_df_1 = pd.DataFrame(results2["demeaned_ln_ozone_max"])
result_df_1 = result_df_1.applymap(lambda x: round(x, 4))
result_df_1 = result_df_1.T
index1 = result_df_1.index
index1.name = "demeaned_ln_ozone_max"
result_df_1

In [None]:
result_df_2 = pd.DataFrame(results2["demeaned_ln_8hr_ozone_max"])
result_df_2 = result_df_2.applymap(lambda x: round(x, 4))
result_df_2 = result_df_2.T
index2 = result_df_2.index
index2.name = "demeaned_ln_8hr_ozone_max"
result_df_2

## Trying new regression with all of the treatments in the regression? 

In [None]:
# DD regression 1
dependent_vars = ["demeaned_ln_ozone_max", "demeaned_ln_8hr_ozone_max"]
treatment_vars = ["treat_rvpI", "treat_rvpII", "treat_rfg", "treat_CARB"]

In [None]:
results3 = {}
for i in range(len(dependent_vars)):
    
    dependent_var = dependent_vars[i]
    print("working on dependent, " + dependent_var)
    results3[dependent_var] = {}

    model = smf.ols(f"{dependent_var} ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region)", data = summer_only).fit() # ozone fixed effects are taken care of by demeaning
    
    values = {}
    for var in treatment_vars:
        values[f"Point Estimate {var}"] = model.params[var]
    for var in treatment_vars:
        values[f"Standard Error {var}"] = model.bse[var]
    for var in treatment_vars:
        values[f"P-Value {var}"] = model.pvalues[var]
    values["R Squared"] = model.rsquared
    results3[dependent_var] = values


In [None]:
result_df_1 = pd.DataFrame(results3)
result_df_1 = result_df_1.applymap(lambda x: round(x, 4))
result_df_1 = result_df_1.T
index1 = result_df_1.index
index1.name = "treatment"
result_df_1

## Redoing DD with demeaned ozoneID Robustness Check

In [None]:
def convert_to_nan(x):
    """Use this sepcifically on the weather dataframe so that we can use it"""
    if isinstance(x, float):
        return x
    else:
        return np.nan

    
    
summer_only["TMAX"] = summer_only["TMAX"].apply(convert_to_nan) 
summer_only["TMIN"] = summer_only["TMIN"].apply(convert_to_nan) 
summer_only["SNOW"] = summer_only["SNOW"].apply(convert_to_nan) 
summer_only["PRCP"] = summer_only["PRCP"].apply(convert_to_nan) 
summer_only["lagged_TMIN"] = summer_only["lagged_TMIN"].apply(convert_to_nan) 
summer_only["lagged_TMAX"] = summer_only["lagged_TMAX"].apply(convert_to_nan) 
summer_only["lagged_SNOW"] = summer_only["lagged_SNOW"].apply(convert_to_nan) 
summer_only["lagged_PRCP"] = summer_only["lagged_PRCP"].apply(convert_to_nan) 



In [None]:
formula_dd1 = "demeaned_ln_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region) + \
I(TMAX) + I(TMAX**2) + I(TMAX**3) + I(TMIN) + I(TMIN**2) + I(TMIN**3) + SNOW + PRCP + I(SNOW**2) + I(PRCP**2) + \
I(TMAX*TMIN) + I(PRCP*TMAX) + lagged_TMIN + lagged_TMAX + lagged_SNOW + lagged_PRCP + I(TMAX*lagged_TMAX) + I(TMAX*lagged_TMIN) + \
C(day_of_week)*TMAX + C(day_of_week)*TMIN + C(day_of_week)*SNOW + C(day_of_week)*PRCP + C(day_of_week)*C(census_region) + C(day_of_year)*C(census_region) + income" 

formula_dd12 = "demeaned_ln_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region) + \
I(TMAX) + I(TMAX**2) + I(TMAX**3) + I(TMIN) + I(TMIN**2) + I(TMIN**3) + SNOW + PRCP + I(SNOW**2) + I(PRCP**2) + \
I(TMAX*TMIN) + I(PRCP*TMAX) + lagged_TMIN + lagged_TMAX + lagged_SNOW + lagged_PRCP + I(TMAX*lagged_TMAX) + I(TMAX*lagged_TMIN)"

formula_dd13 = "demeaned_ln_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region) + \
I(TMAX) + I(TMAX**2) + I(TMIN) + I(TMIN**2) + SNOW + PRCP + I(SNOW**2) + I(PRCP**2) + \
I(TMAX*TMIN) + I(PRCP*TMAX) + lagged_TMIN + lagged_TMAX + lagged_SNOW + lagged_PRCP"

#last one is the simple model
formula_dd14 = "demeaned_ln_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region)"


In [None]:
dd2_model1 = smf.ols(formula_dd1, data = summer_only).fit()

In [None]:
values21 = {}
for var in treatment_vars:
    values21[f"Point Estimate {var}"] = dd2_model1.params[var]
for var in treatment_vars:
    values21[f"Standard Error {var}"] = dd2_model1.bse[var]
for var in treatment_vars:
    values21[f"P-Value {var}"] = dd2_model1.pvalues[var]

In [None]:
dd2_model2 = smf.ols(formula_dd12, data = summer_only).fit()

In [None]:
values22 = {}
for var in treatment_vars:
    values22[f"Point Estimate {var}"] = dd2_model2.params[var]
for var in treatment_vars:
    values22[f"Standard Error {var}"] = dd2_model2.bse[var]
for var in treatment_vars:
    values22[f"P-Value {var}"] = dd2_model2.pvalues[var]

In [None]:
dd2_model3 = smf.ols(formula_dd13, data = summer_only).fit()

In [None]:
values23 = {}
for var in treatment_vars:
    values23[f"Point Estimate {var}"] = dd2_model3.params[var]
for var in treatment_vars:
    values23[f"Standard Error {var}"] = dd2_model3.bse[var]
for var in treatment_vars:
    values23[f"P-Value {var}"] = dd2_model3.pvalues[var]

In [None]:
dd2_model4 = smf.ols(formula_dd14, data = summer_only).fit()

In [None]:
values24 = {}
for var in treatment_vars:
    values24[f"Point Estimate {var}"] = dd2_model4.params[var]
for var in treatment_vars:
    values24[f"Standard Error {var}"] = dd2_model4.bse[var]
for var in treatment_vars:
    values24[f"P-Value {var}"] = dd2_model4.pvalues[var]

In [None]:
results = {"Model 1 (Most Complex)":values21, "Model 2 (No Time Variables)":values22, "Model 3 (No Weather Cubics)":values23, "Model 4 (No Weather)":values24}

In [None]:
pd.DataFrame(results).T

## now doing it for 8 hr max and seeing if same

In [None]:
formula_dd2 = "demeaned_ln_8hr_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region) + \
I(TMAX) + I(TMAX**2) + I(TMAX**3) + I(TMIN) + I(TMIN**2) + I(TMIN**3) + SNOW + PRCP + I(SNOW**2) + I(PRCP**2) + \
I(TMAX*TMIN) + I(PRCP*TMAX) + lagged_TMIN + lagged_TMAX + lagged_SNOW + lagged_PRCP + I(TMAX*lagged_TMAX) + I(TMAX*lagged_TMIN) + \
C(day_of_week)*TMAX + C(day_of_week)*TMIN + C(day_of_week)*SNOW + C(day_of_week)*PRCP + C(day_of_week)*C(census_region) + C(day_of_year)*C(census_region) + income" 

formula_dd22 = "demeaned_ln_8hr_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region) + \
I(TMAX) + I(TMAX**2) + I(TMAX**3) + I(TMIN) + I(TMIN**2) + I(TMIN**3) + SNOW + PRCP + I(SNOW**2) + I(PRCP**2) + \
I(TMAX*TMIN) + I(PRCP*TMAX) + lagged_TMIN + lagged_TMAX + lagged_SNOW + lagged_PRCP + I(TMAX*lagged_TMAX) + I(TMAX*lagged_TMIN)"

formula_dd23 = "demeaned_ln_8hr_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region) + \
I(TMAX) + I(TMAX**2) + I(TMIN) + I(TMIN**2) + SNOW + PRCP + I(SNOW**2) + I(PRCP**2) + \
I(TMAX*TMIN) + I(PRCP*TMAX) + lagged_TMIN + lagged_TMAX + lagged_SNOW + lagged_PRCP"

#last one is the simple model
formula_dd24 = "demeaned_ln_8hr_ozone_max ~ 1 + treat_rvpI + treat_rvpII + treat_rfg + treat_CARB + C(year)*C(census_region)"


In [None]:
dd3_model1 = smf.ols(formula_dd2, data = summer_only).fit()

In [None]:
values31 = {}
for var in treatment_vars:
    values31[f"Point Estimate {var}"] = dd3_model1.params[var]
for var in treatment_vars:
    values31[f"Standard Error {var}"] = dd3_model1.bse[var]
for var in treatment_vars:
    values31[f"P-Value {var}"] = dd3_model1.pvalues[var]

In [None]:
dd3_model2 = smf.ols(formula_dd22, data = summer_only).fit()

In [None]:
values32 = {}
for var in treatment_vars:
    values32[f"Point Estimate {var}"] = dd3_model2.params[var]
for var in treatment_vars:
    values32[f"Standard Error {var}"] = dd3_model2.bse[var]
for var in treatment_vars:
    values32[f"P-Value {var}"] = dd3_model2.pvalues[var]

In [None]:
dd3_model3 = smf.ols(formula_dd23, data = summer_only).fit()

In [None]:
values33 = {}
for var in treatment_vars:
    values33[f"Point Estimate {var}"] = dd3_model3.params[var]
for var in treatment_vars:
    values33[f"Standard Error {var}"] = dd3_model3.bse[var]
for var in treatment_vars:
    values33[f"P-Value {var}"] = dd3_model3.pvalues[var]

In [None]:
dd3_model4 = smf.ols(formula_dd24, data = summer_only).fit()

In [None]:
values34 = {}
for var in treatment_vars:
    values34[f"Point Estimate {var}"] = dd3_model4.params[var]
for var in treatment_vars:
    values34[f"Standard Error {var}"] = dd3_model4.bse[var]
for var in treatment_vars:
    values34[f"P-Value {var}"] = dd3_model4.pvalues[var]

In [None]:
results2 = {"Model 1 (Most Complex)":values31, "Model 2 (No Time Variables)":values32, "Model 3 (No Weather Cubics)":values33, "Model 4 (No Weather)":values34}

In [None]:
pd.DataFrame(results2).T