In [79]:
import pandas as pd
import numpy as np
import datetime as dt
import statsmodels.formula.api as smf
import statsmodels.nonparametric.api as nparam
from statsmodels.nonparametric.bandwidths import bw_silverman

## Import Data

In [93]:
reg_data_df = pd.read_csv('../../data/processed/regression_data.csv')

## Nonlinear Control IV Regressions

\begin{align*}
Q &= P \beta_d + f(T) + u_d \\
Q &= P \beta_s + g(W) + u_s \\
P &= (\beta_d - \beta_s)^{-1} \, \left(  g(W) - f(T) + u_s - u_d \right)  \\
E(u_d \, | \, T, W) &= 0 \\
E(u_s \, | \, T, W) &= 0 \\
\end{align*}

where $f(T)$ is some nonlinear function of $T$ and $g(W)$ is some nonlinear function of $W$. 

Let $\alpha = (\beta_d - \beta_s)^{-1}$. Firstly, note that we have:
\begin{align*}
E(P\,|\,T) &= \alpha (E(W) - f(T)) \\
E(P\,|\,W) &= \alpha (g(W) - E(T)) \\
E(P\,|\,T,W) &= \alpha(g(W) - f(T)) 
\end{align*}

Now, differencing $Q$ with its conditional expectation, we have:
\begin{align*}
Q - E(Q \,|\,T) &= (P - E(P\,|\,T))\beta_d + (f(T) - E(f(T) \,|\,T)) + (u_d - E(u_d|T))\\
&= (\alpha(g(W) - E(W)))\beta_d + 0 + u_d 
\end{align*}
Now, note that we also have:
$$E(P\,|\,T,W) - E(P\,|\,T) = \alpha (g(W) - E(W))$$
hence we can regress 
$$(Q - E(Q \,|\,T)) \sim (E(P\,|\,T,W) - E(P\,|\,T)) $$ 
to estimate $\beta_d$

In [81]:
# Data
reg_data_sample_df = reg_data_df.copy()
reg_data_sample_df['1'] = 1
print(len(reg_data_sample_df))

6817


### No controls

In [82]:
w  = np.array(reg_data_sample_df[['ln_coal_rel']])
p  = np.array(reg_data_sample_df['ln_price_rel'])
q  = np.array(reg_data_sample_df['ln_load_rel'])

# Estimation of CE(P | W)
ce_p_w, _ = nparam.KernelReg(endog=[p], exog=[w], reg_type='lc', var_type='c'*np.shape(w)[1], 
                             bw=[bw_silverman(w)]).fit(w)

In [83]:
reg_data_sample_df['ce_p_w']  = ce_p_w

In [84]:
fit_1 = smf.ols('ln_load_rel ~ ce_p_w', 
              data = reg_data_sample_df
             ).fit(cov_type = 'HC1')
fit_1.summary()

0,1,2,3
Dep. Variable:,ln_load_rel,R-squared:,0.043
Model:,OLS,Adj. R-squared:,0.043
Method:,Least Squares,F-statistic:,313.8
Date:,"Wed, 19 Jun 2019",Prob (F-statistic):,1.08e-68
Time:,13:31:44,Log-Likelihood:,-1052.7
No. Observations:,6817,AIC:,2109.0
Df Residuals:,6815,BIC:,2123.0
Df Model:,1,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0054,0.004,-1.210,0.226,-0.014,0.003
ce_p_w,-2.9976,0.169,-17.716,0.000,-3.329,-2.666

0,1,2,3
Omnibus:,4.783,Durbin-Watson:,0.84
Prob(Omnibus):,0.092,Jarque-Bera (JB):,4.977
Skew:,0.035,Prob(JB):,0.0831
Kurtosis:,3.112,Cond. No.,49.8


### With only degree day controls

In [85]:
t  = np.array(reg_data_sample_df[['CDD_1', 'CDD_2', 'HDD_1', 'HDD_2']])
w  = np.array(reg_data_sample_df[['ln_coal_rel']])
p  = np.array(reg_data_sample_df['ln_price_rel'])
q  = np.array(reg_data_sample_df['ln_load_rel'])
tw = np.concatenate((t,w.reshape(-1,1)), axis = 1)

# Estimation of CE(Q | T)
ce_q_t, _  = nparam.KernelReg(endog=[q], exog=[t], reg_type='lc', var_type='c'*np.shape(t)[1], 
                              bw=bw_silverman(t)).fit(t)

# Estimation of CE(P | T)
ce_p_t, _ = nparam.KernelReg(endog=[p], exog=[t], reg_type='lc', var_type='c'*np.shape(t)[1], 
                             bw=bw_silverman(t)).fit(t)

# Estimation of CE(P | W)
ce_p_w, _ = nparam.KernelReg(endog=[p], exog=[w], reg_type='lc', var_type='c'*np.shape(w)[1], 
                             bw=[bw_silverman(w)]).fit(w)

# Estimation of CE(P | T, W)
ce_p_tw, _ = nparam.KernelReg(endog=[p], exog=[tw], reg_type='lc', var_type='c'*(np.shape(t)[1]+np.shape(w)[1]), 
                             bw=bw_silverman(tw)).fit(tw)

In [86]:
reg_data_sample_df['ce_q_t']  = ce_q_t
reg_data_sample_df['ce_p_t']  = ce_p_t
reg_data_sample_df['ce_p_w']  = ce_p_w
reg_data_sample_df['ce_p_tw'] = ce_p_tw

In [87]:
fit_2 = smf.ols('np.add(ln_load_rel, -ce_q_t) ~ np.add(ce_p_tw, -ce_p_t)', 
              data = reg_data_sample_df
             ).fit(cov_type = 'HC1')
fit_2.summary()

0,1,2,3
Dep. Variable:,"np.add(ln_load_rel, -ce_q_t)",R-squared:,0.086
Model:,OLS,Adj. R-squared:,0.085
Method:,Least Squares,F-statistic:,548.8
Date:,"Wed, 19 Jun 2019",Prob (F-statistic):,8.49e-117
Time:,13:32:32,Log-Likelihood:,2917.9
No. Observations:,6817,AIC:,-5832.0
Df Residuals:,6815,BIC:,-5818.0
Df Model:,1,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0001,0.002,-0.064,0.949,-0.004,0.004
"np.add(ce_p_tw, -ce_p_t)",-1.2123,0.052,-23.427,0.000,-1.314,-1.111

0,1,2,3
Omnibus:,91.609,Durbin-Watson:,1.051
Prob(Omnibus):,0.0,Jarque-Bera (JB):,160.113
Skew:,-0.071,Prob(JB):,1.71e-35
Kurtosis:,3.737,Cond. No.,25.1


### With all controls

In [88]:
t  = np.array(reg_data_sample_df[['time_diff', 'CDD_1', 'CDD_2', 'HDD_1', 'HDD_2']])
w  = np.array(reg_data_sample_df[['ln_coal_rel']])
p  = np.array(reg_data_sample_df['ln_price_rel'])
q  = np.array(reg_data_sample_df['ln_load_rel'])
tw = np.concatenate((t,w.reshape(-1,1)), axis = 1)

# Estimation of CE(Q | T)
ce_q_t, _  = nparam.KernelReg(endog=[q], exog=[t], reg_type='lc', var_type='c'*np.shape(t)[1], 
                              bw=bw_silverman(t)).fit(t)

# Estimation of CE(P | T)
ce_p_t, _ = nparam.KernelReg(endog=[p], exog=[t], reg_type='lc', var_type='c'*np.shape(t)[1], 
                             bw=bw_silverman(t)).fit(t)

# Estimation of CE(P | W)
ce_p_w, _ = nparam.KernelReg(endog=[p], exog=[w], reg_type='lc', var_type='c'*np.shape(w)[1], 
                             bw=[bw_silverman(w)]).fit(w)

# Estimation of CE(P | T, W)
ce_p_tw, _ = nparam.KernelReg(endog=[p], exog=[tw], reg_type='lc', var_type='c'*(np.shape(t)[1]+np.shape(w)[1]), 
                             bw=bw_silverman(tw)).fit(tw)

In [89]:
reg_data_sample_df['ce_q_t']  = ce_q_t
reg_data_sample_df['ce_p_t']  = ce_p_t
reg_data_sample_df['ce_p_w']  = ce_p_w
reg_data_sample_df['ce_p_tw'] = ce_p_tw

In [90]:
fit_3 = smf.ols('np.add(ln_load_rel, -ce_q_t) ~ np.add(ce_p_tw, -ce_p_t)', 
              data = reg_data_sample_df
             ).fit(cov_type = 'HC1')
fit_3.summary()

0,1,2,3
Dep. Variable:,"np.add(ln_load_rel, -ce_q_t)",R-squared:,0.094
Model:,OLS,Adj. R-squared:,0.093
Method:,Least Squares,F-statistic:,403.4
Date:,"Wed, 19 Jun 2019",Prob (F-statistic):,3.19e-87
Time:,13:33:32,Log-Likelihood:,6513.7
No. Observations:,6817,AIC:,-13020.0
Df Residuals:,6815,BIC:,-13010.0
Df Model:,1,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0001,0.001,-0.126,0.899,-0.002,0.002
"np.add(ce_p_tw, -ce_p_t)",-0.8847,0.044,-20.085,0.000,-0.971,-0.798

0,1,2,3
Omnibus:,408.105,Durbin-Watson:,1.232
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1754.108
Skew:,-0.077,Prob(JB):,0.0
Kurtosis:,5.48,Cond. No.,29.6


## Regression Summary

In [92]:
# Collect regression results
fits = (fit_1, fit_2, fit_3)
reg_results = {'sigma coeff': [-fit.params[1] for fit in fits],
               'sigma stder': [np.sqrt(np.diag(fit.get_robustcov_results(cov_type='HC1').cov_params()))[1] for fit in fits]}

# Format as dataframe
reg_results_df = pd.DataFrame(reg_results)
reg_results_df.index = ['(1)', '(2)', '(3)']
reg_results_df['sigma t-stat'] = reg_results_df['sigma coeff']/reg_results_df['sigma stder']
reg_results_df.T.round(decimals = 4)

Unnamed: 0,(1),(2),(3)
sigma coeff,2.9976,1.2123,0.8847
sigma stder,0.1692,0.0518,0.044
sigma t-stat,17.7157,23.4269,20.0852
