In [9]:
import os
import pandas as pd
import openpyxl
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from statsmodels.iolib.table import SimpleTable
from statsmodels.tsa.stattools import adfuller

# Set directory path to current directory
dir_path = os.path.abspath('../')
output_path = os.path.abspath('../outputs')

dat = os.path.join(dir_path, 'control_variables.xlsx')

In [10]:
df = pd.read_excel(dat, index_col=0, sheet_name="data")
df = pd.DataFrame(df)

# df.index = pd.Series(df.index).fillna(method='ffill')

In [11]:
df['energy aid'] = df['distribution cumulative']+df['nrgen cumulative']+df['policy cumulative']+df['rgen cumulative']
df['energy infrastrature'] = df['distribution cumulative']+df['nrgen cumulative']+df['rgen cumulative']
df['energy generation'] = df['nrgen cumulative']+df['rgen cumulative']

# df['test'] = df.iloc[:,[16,20]].sum().values 
#https://stackoverflow.com/questions/68119362/sum-values-from-dataframe-with-iloc

In [12]:
coeffients0 = []
coeffients1 = []
coeffients2 = []

headers0 = ["Constant", "Energy aid"]
headers1 = ["Constant", "Energy infrastructure", "Energy Policy"]
headers2 = ["Constant", "Energy Distribution", "Energy generation", "Energy Policy"]

countries0 = []
countries1 = []
countries2 = []

significance0 = []
significance1 = []
significance2 = []

rsq0 = []
rsq1 = []
rsq2 = []

rsq_adj0 = []
rsq_adj1 = []
rsq_adj2 = []

Model 3 modification - 0 - elec ~ total energy aid

In [13]:
for name, group in df.groupby('country'):
    
    y=group['dElec']

    X0 = group[['energy aid']]
    
    X0 = sm.add_constant(X0)
    
    reg = sm.OLS(y, X0).fit()
    
    # Appending regression results to the lists
    coeffients0.append(reg.params)
    rsq0.append(reg.rsquared)
    rsq_adj0.append(reg.rsquared_adj)
    significance0.append(reg.pvalues)
    countries0.append(name)    

Model 3 modification - 1 - elec ~ energy policy + energy infrastructure

In [14]:
for name, group in df.groupby('country'):
    
    y=group['dElec']

    X1 = group[['energy infrastrature',
                'policy cumulative']]
    
    X1 = sm.add_constant(X1)
    
    reg1 = sm.OLS(y, X1).fit()
    
    # Appending regression results to the lists
    coeffients1.append(reg1.params)
    rsq1.append(reg1.rsquared)
    rsq_adj1.append(reg1.rsquared_adj)
    significance1.append(reg1.pvalues)
    countries1.append(name)    

Model 3 modification - 2 - elec ~ energy policy + generation + energy distribution

In [15]:
for name, group in df.groupby('country'):
    
    y=group['dElec']

    X2 = group[['distribution cumulative',
                'energy generation',
                'policy cumulative']]
    
    X2 = sm.add_constant(X2)
    
    reg2 = sm.OLS(y, X2).fit()
    
    # Appending regression results to the lists
    coeffients2.append(reg2.params)
    rsq2.append(reg2.rsquared)
    rsq_adj2.append(reg2.rsquared_adj)
    significance2.append(reg2.pvalues)
    countries2.append(name)    

In [16]:
output = os.path.join(output_path, 'model3_modi_output.xlsx')
# print(significance)

p0_df = pd.DataFrame(significance0)
p1_df = pd.DataFrame(significance1)
p2_df = pd.DataFrame(significance2)

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheet_exists="new",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    p0_df.to_excel(writer, sheet_name='pvalue-0')
    p1_df.to_excel(writer, sheet_name='pvalue-1')
    p2_df.to_excel(writer, sheet_name='pvalue-2')

In [17]:
# Putting the coefficients into a table
tbl0 = SimpleTable(coeffients0, headers0, countries0,
                  data_fmts=["%3.2f","%d"])
tbl1 = SimpleTable(coeffients1, headers1, countries1,
                  data_fmts=["%3.2f","%d"])
tbl2 = SimpleTable(coeffients2, headers2, countries2,
                  data_fmts=["%3.2f","%d"])

# Turning the table into a dataframe
tbl0_df = pd.DataFrame(tbl0, columns=["Country", "Constant", "Energy aid"])
tbl1_df = pd.DataFrame(tbl1, columns=["Country", "Constant", "Energy infrastructure", "energy policy"])
tbl2_df = pd.DataFrame(tbl2, columns=["Country", "Constant", "Energy distribution", "energy generation", "energy policy"])

# Turning array with r-squared and r-sqaured adjusted values into dataframes
rsq0_df = pd.DataFrame(rsq0, columns=['r-squared-0'])
rsq0_adj_df = pd.DataFrame(rsq_adj0, columns=['r-squared adjusted-0'])
rsq1_df = pd.DataFrame(rsq1, columns=['r-squared-1'])
rsq1_adj_df = pd.DataFrame(rsq_adj1, columns=['r-squared adjusted-1'])
rsq2_df = pd.DataFrame(rsq2, columns=['r-squared-2'])
rsq2_adj_df = pd.DataFrame(rsq_adj2, columns=['r-squared adjusted-2'])

# Concating two data together (Got overlapped because didn't reset index)
rsq0_dat = pd.concat([rsq0_df, rsq0_adj_df], axis=1)
rsq1_dat = pd.concat([rsq1_df, rsq1_adj_df], axis=1)
rsq2_dat = pd.concat([rsq2_df, rsq2_adj_df], axis=1)

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheet_exists="new",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    tbl0_df.to_excel(writer, sheet_name='coef-0')
    tbl1_df.to_excel(writer, sheet_name='coef-1')
    tbl2_df.to_excel(writer, sheet_name='coef-2')
    rsq0_dat.to_excel(writer, sheet_name='rsqua-0')
    rsq1_dat.to_excel(writer, sheet_name='rsqua-1')
    rsq2_dat.to_excel(writer, sheet_name='rsqua-2')


reg operation:

['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']