In [1]:
import json
import numpy as np
import pandas as pd
import pickle
import plotly.express as px
import sqlite3
import statsmodels.formula.api as smf
import warnings


from causalinference import CausalModel
from pandas.core.common import SettingWithCopyWarning


warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [2]:
with open("cfg.json", "r") as jin:
    cfg = json.loads(jin.read())

# rewrite to ensure formatting
with open("cfg.json", "w") as jout:
    json.dump(cfg, jout, indent=4)

In [3]:
conn_proj = sqlite3.connect(cfg["db_dir"] + cfg["install_db"])
cursor_proj = conn_proj.cursor()

query_g = """
SELECT *

FROM g_df2

"""


g_df = pd.read_sql_query(query_g, conn_proj)
g_df['cost_electricity'] = g_df['cost_electricity'].astype(float)

In [4]:
g_df2 = g_df[['count_per_home','NREL_PSM_2019','cost_electricity',
              'price','p_type_31','p_type_32','p_type_33','p_type_37',
              'p_type_46','p_type_50','p_type_78','p_type_87','p_type_88','p_type_92','p_type_86']]

In [5]:
cols_eval = ['p_type_31','p_type_32', 'p_type_33', 'p_type_37',
             'p_type_46', 'p_type_50', 'p_type_78', 'p_type_92', 'p_type_86']

cols_rebate = ['p_type_87', 'p_type_88']

treatment_dict = {}

In [6]:



g_df2 = g_df.copy()
for col in cols_eval:
    g_df2[col] = g_df2[col].replace({0:1})
    g_df2[col] = g_df2[col].replace({-1:0})
for col in cols_rebate:
    g_df2[col] = g_df2[col].replace({0:-1})
    g_df2[col] = g_df2[col].replace({-1:0})
reg = smf.ols('count_per_home ~ NREL_PSM_2019 + cost_electricity + price + p_type_31 + p_type_32 + p_type_37 + p_type_46 + p_type_78+ p_type_87+ p_type_88 + p_type_92 + p_type_86', data = g_df2).fit()

ols_robust2_1 = reg.get_robustcov_results(cov_type= 'HC1')

print(ols_robust2_1.summary())

                            OLS Regression Results                            
Dep. Variable:         count_per_home   R-squared:                       0.073
Model:                            OLS   Adj. R-squared:                  0.073
Method:                 Least Squares   F-statistic:                     4639.
Date:                Sun, 22 Aug 2021   Prob (F-statistic):               0.00
Time:                        20:23:28   Log-Likelihood:             1.1017e+07
No. Observations:             1718436   AIC:                        -2.203e+07
Df Residuals:                 1718423   BIC:                        -2.203e+07
Df Model:                          12                                         
Covariance Type:                  HC1                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept        -7.238e-05   3.41e-06  

In [7]:
with open(cfg["data_dir"] + cfg["p_dates"], 'rb') as fp:
    prog_map = pickle.load(fp)

The current dataframe is too large to complete locally. The following cell grabs a condensed section of the data immediately surrounding the program initialization dates for evaluation.

In [8]:
g_df3 = g_df.copy()

for program in [31,32,33,37,46,50,78,92,86]:
    
    def in_effect(x):
        
        z = x[0]
        d = x[1]
        
        p_type = program

        output = 0

        try:
            items = []
            for item in prog_map[p_type][z]:
                items.append(item[1])
                if output != 1:
                    
                    if item[1] <= d <= (item[1] + 6):
                        output = 1
                    elif (item[1] - 6) <= d <= item[1]:
                        output = -1
                    
            if d < min(items):
                output = -1

        except:
            output = 0
        
        return output
    
    g_df3['p_type_' + str(program)] = [in_effect(x) for x in zip(g_df3.zipcode,g_df3.month)]
    print(str(program) + ' complete')

31 complete
32 complete
33 complete
37 complete
46 complete
50 complete
78 complete
92 complete
86 complete


In [9]:
all_cols = cols_eval + cols_rebate
for col in all_cols:
    in_df = g_df3[g_df3[col] != 0].reset_index()
    print(col,in_df.shape)

p_type_31 (44558, 27)
p_type_32 (23696, 27)
p_type_33 (0, 27)
p_type_37 (136900, 27)
p_type_46 (98156, 27)
p_type_50 (0, 27)
p_type_78 (168857, 27)
p_type_92 (196754, 27)
p_type_86 (118650, 27)
p_type_87 (1277969, 27)
p_type_88 (1032936, 27)


In [None]:


for col in cols_eval:
    in_df = g_df3[g_df3[col] != 0].reset_index()
    
    in_df[col] = in_df[col].replace(-1,0)
    
    Y_i = in_df.count_per_home.values
    df_EVAL = in_df.drop(columns = ['month','zipcode','home_count','i_count', 'count_per_home'])


    print(col + ' started')
    try:
        D_i = df_EVAL[col].values
        df_t = df_EVAL[['NREL_PSM_2019', 'cost_electricity','price']].to_numpy()
        model = CausalModel(Y = Y_i, #outcome 
                            D = D_i, #treatment
                            X = df_t)

        print('model complete')
        model.est_propensity() 
        
        print('propensity complete')
        model.est_via_matching()  
        print('matching complete')
        #model.estimates

        ate = round(model.estimates['matching']['ate'],5)
        
        treatment_dict[col] = ate
    except Exception as e:
        print(e)
        treatment_dict[col] = 'unknown'
    print(col + ' complete')

p_type_31 started
model complete
propensity complete
matching complete
p_type_31 complete
p_type_32 started
model complete
propensity complete
matching complete
p_type_32 complete
p_type_33 started
Too few control units: N_c < K+1
p_type_33 complete
p_type_37 started
model complete
propensity complete
matching complete
p_type_37 complete
p_type_46 started
model complete
propensity complete
matching complete
p_type_46 complete
p_type_50 started
Too few control units: N_c < K+1
p_type_50 complete
p_type_78 started
model complete
propensity complete


In [None]:



    
for col in ['p_type_87','p_type_88']:
    in_df = g_df3[(g_df3[col] != 0) & (g_df3.month >= 203)].reset_index()
    
    in_df[col] = in_df[col].replace(-1,0)
    
    
    print(in_df.shape)
    Y_i = in_df.count_per_home.values
    df_EVAL = in_df.drop(columns = ['month','zipcode','home_count','i_count', 'count_per_home'])


    print(col + ' started')
    try:
        D_i = df_EVAL[col].values
        df_t = df_EVAL[['NREL_PSM_2019', 'cost_electricity','price']].to_numpy()
        model = CausalModel(Y = Y_i, #outcome 
                            D = D_i, #treatment
                            X = df_t)
        # added columns = ,'med_income','med_home_value','pop_density'
        #from hummingbird.ml import convert
        #model = convert(ci_model, 'pytorch')
        print('model complete')
        model.est_propensity() 
        
        print('propensity complete')
        model.est_via_matching()  
        print('matching complete')
        #model.estimates

        ate = round(model.estimates['matching']['ate'],5)
        
        treatment_dict[col] = ate
    except Exception as e:
        print(e)
        print(col, df_EVAL[df_EVAL[col] == 1].shape, df_EVAL[df_EVAL[col] == 0].shape)
        treatment_dict[col] = 'unknown'
    print(col + ' complete')

In [None]:
#treatment_dict
with open(cfg["data_dir"] + cfg["prog_ate"], 'wb') as fp:
    pickle.dump(treatment_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)

Select zipcode
Select policy type
Select implementation date

Marks other zipcodes (chloropleth?)
Eliminates other zipcodes with policy
Identifies closest remaining zipcode (matching)
mean before
mean after
is slope consistent?


In [None]:
treatment_dict

In [None]:
#show feature importance

In [None]:
x = ['Personal Tax Credit',
     'Personal Tax Deduction',
     'Personal Tax Exemption',
     'Net Metering',
     'Public Benefits Fund',
     'Property Tax Assessment',
     'Property Tax Incentive',
     'Feed-in Tariff',
     'Value of Solar Tariff',
     'Grant Program',
     'Rebate Program']
y = list(treatment_dict.values())
types = ['Personal Tax',
         'Personal Tax',
         'Personal Tax',
         'Long-term cost-benefit',
         'Community Fund',
         'Property Tax',
         'Property Tax',
         'Long-term cost-benefit',
         'Long-term cost-benefit',
         'Discount',
         'Discount']

plot_frame = pd.DataFrame({'Program Type':x,'Treatment Affect':y,'Description':types})




In [None]:
import plotly.io as pio
pio.renderers.default='notebook'

fig = px.bar(plot_frame, 
             x='Program Type', 
             y='Treatment Affect', 
             color = 'Description',
             title = 'Average Treatment Affect of Select Program Types',
             template = 'plotly_dark')
fig.update_layout(legend=dict(orientation="v", yanchor="auto", y=1, x=0.8))
fig.show()

In [None]:
# fig.write_image(cfg["data_dir"] + "ATE.png")