In [147]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from helpers.helpers import *
import statsmodels.api as sm
import seaborn as sns
%matplotlib inline

In [148]:
emission_data = pd.read_excel("../data/Emission/verified_emissions_2018_en.xlsx", sheet_name ="verified_emissions_2018", header = 13)
emission_data = emission_data.reset_index()

In [149]:
emissions = emission_data[[c for c in emission_data.columns if "VERIFIED_EMISSIONS" in c or c == "index"]]
emissions.columns = [c.split("_")[2] if "VERIFIED_EMISSIONS" in c else c for c in emissions.columns ]

In [150]:
panel = pd.melt(emissions, id_vars = "index",value_vars=[c for c in emissions.columns if c!="index"], var_name ="year", value_name = "verified_emission")
panel = pd.merge(panel, emission_data[["index","REGISTRY_CODE","MAIN_ACTIVITY_TYPE_CODE"]], how = "left", left_on = "index",right_on="index")
panel = panel.fillna("NA")

bpanel = panel.copy()

panel = pd.get_dummies(panel, prefix=['dreg', 'dmac'],columns = ["REGISTRY_CODE","MAIN_ACTIVITY_TYPE_CODE"], drop_first=True,dummy_na=True)
country_control = [c for c in panel.columns if "dreg" in c]
activity_type_control = [c for c in panel.columns if "dmac" in c]

In [185]:
start_year = 2000
cutoff_year = 2017


panel = panel.loc[~(panel.verified_emission == "Excluded")]
panel = panel.loc[~(panel.verified_emission == -1)]

panel = panel.loc[panel.year > start_year]
panel = panel.astype(int)

In [186]:
panel["post_cutoff"] = (panel["year"]>cutoff_year).astype(int)
panel["running_variable"] = panel["year"] - start_year
panel["intercept"] = 1

In [187]:
model = sm.OLS(panel[["verified_emission"]], panel[["intercept","post_cutoff","running_variable"]]).fit()
model.summary()

0,1,2,3
Dep. Variable:,verified_emission,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,20.3
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,1.53e-09
Time:,21:44:12,Log-Likelihood:,-1785700.0
No. Observations:,118040,AIC:,3571000.0
Df Residuals:,118037,BIC:,3571000.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,2.501e+05,1.27e+04,19.754,0.000,2.25e+05,2.75e+05
post_cutoff,1.176e+04,1.05e+04,1.121,0.262,-8801.949,3.23e+04
running_variable,-5851.7461,972.935,-6.015,0.000,-7758.683,-3944.809

0,1,2,3
Omnibus:,246440.827,Durbin-Watson:,1.539
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1225754281.319
Skew:,17.983,Prob(JB):,0.0
Kurtosis:,500.924,Cond. No.,72.9


In [162]:
model = sm.OLS(panel[["verified_emission"]], panel[["intercept","post_cutoff","running_variable"]+country_control]).fit()
model.summary()

0,1,2,3
Dep. Variable:,verified_emission,R-squared:,0.009
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,32.79
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,6.07e-199
Time:,21:36:07,Log-Likelihood:,-1785200.0
No. Observations:,118040,AIC:,3570000.0
Df Residuals:,118007,BIC:,3571000.0
Df Model:,32,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,2.422e+05,2.5e+04,9.702,0.000,1.93e+05,2.91e+05
post_cutoff,2.249e+04,9063.777,2.482,0.013,4727.877,4.03e+04
running_variable,-8062.0391,1320.856,-6.104,0.000,-1.07e+04,-5473.183
dreg_BE,2869.4783,2.5e+04,0.115,0.909,-4.62e+04,5.19e+04
dreg_BG,1.31e+05,3.17e+04,4.129,0.000,6.88e+04,1.93e+05
dreg_CY,1.877e+05,7.43e+04,2.527,0.011,4.21e+04,3.33e+05
dreg_CZ,7.353e+04,2.46e+04,2.991,0.003,2.53e+04,1.22e+05
dreg_DE,1.209e+05,2.06e+04,5.880,0.000,8.06e+04,1.61e+05
dreg_DK,-8.46e+04,2.43e+04,-3.484,0.000,-1.32e+05,-3.7e+04

0,1,2,3
Omnibus:,246450.627,Durbin-Watson:,1.552
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1234024552.5
Skew:,17.981,Prob(JB):,0.0
Kurtosis:,502.61,Cond. No.,1.03e+16


In [163]:
model = sm.OLS(panel[["verified_emission"]], panel[["intercept","post_cutoff","running_variable"]+activity_type_control]).fit()
model.summary()

0,1,2,3
Dep. Variable:,verified_emission,R-squared:,0.023
Model:,OLS,Adj. R-squared:,0.022
Method:,Least Squares,F-statistic:,71.55
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.0
Time:,21:36:15,Log-Likelihood:,-1784400.0
No. Observations:,118040,AIC:,3569000.0
Df Residuals:,118001,BIC:,3569000.0
Df Model:,38,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,2.214e+05,1.79e+04,12.348,0.000,1.86e+05,2.57e+05
post_cutoff,1.799e+04,9015.650,1.996,0.046,321.215,3.57e+04
running_variable,-6778.6380,1321.306,-5.130,0.000,-9368.376,-4188.900
dmac_2.0,1.154e+06,9.92e+04,11.628,0.000,9.59e+05,1.35e+06
dmac_4.0,-1.412e+05,2.68e+05,-0.526,0.599,-6.67e+05,3.85e+05
dmac_5.0,1.915e+05,6.92e+04,2.765,0.006,5.58e+04,3.27e+05
dmac_6.0,6.271e+04,4.69e+04,1.336,0.182,-2.93e+04,1.55e+05
dmac_7.0,-9.66e+04,4.92e+04,-1.963,0.050,-1.93e+05,-134.385
dmac_8.0,-1.302e+05,3.01e+04,-4.324,0.000,-1.89e+05,-7.12e+04

0,1,2,3
Omnibus:,249070.59,Durbin-Watson:,1.551
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1327840099.317
Skew:,18.418,Prob(JB):,0.0
Kurtosis:,521.286,Cond. No.,1.03e+16


In [130]:
model = sm.OLS(panel[["verified_emission"]], panel[["intercept","post_cutoff","running_variable"]+activity_type_control+country_control]).fit()
model.summary()

0,1,2,3
Dep. Variable:,verified_emission,R-squared:,0.031
Model:,OLS,Adj. R-squared:,0.03
Method:,Least Squares,F-statistic:,55.54
Date:,"Wed, 08 Apr 2020",Prob (F-statistic):,0.0
Time:,21:10:30,Log-Likelihood:,-1783800.0
No. Observations:,118040,AIC:,3568000.0
Df Residuals:,117971,BIC:,3568000.0
Df Model:,68,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,1.974e+05,2.6e+04,7.579,0.000,1.46e+05,2.48e+05
post_cutoff,1.717e+04,8897.025,1.930,0.054,-269.771,3.46e+04
running_variable,-6135.6908,1130.020,-5.430,0.000,-8350.511,-3920.871
dmac_2,1.142e+06,9.89e+04,11.547,0.000,9.48e+05,1.34e+06
dmac_4,-1.199e+05,2.67e+05,-0.448,0.654,-6.44e+05,4.04e+05
dmac_5,2.169e+05,6.91e+04,3.138,0.002,8.14e+04,3.52e+05
dmac_6,4.371e+04,4.69e+04,0.931,0.352,-4.83e+04,1.36e+05
dmac_7,-1.089e+05,4.91e+04,-2.217,0.027,-2.05e+05,-1.26e+04
dmac_8,-1.877e+05,3.09e+04,-6.066,0.000,-2.48e+05,-1.27e+05

0,1,2,3
Omnibus:,249043.855,Durbin-Watson:,1.565
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1336398378.047
Skew:,18.41,Prob(JB):,0.0
Kurtosis:,522.964,Cond. No.,4670.0
