In [35]:
import os
import pandas as pd
import openpyxl
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from statsmodels.iolib.table import SimpleTable

# Set directory path to current directory
dir_path = os.path.abspath('')
gdp_path = os.path.abspath('../control-variables')

dat = os.path.join(dir_path, 'results_v1.xlsx')
gdp = os.path.join(gdp_path, 'inde-variable-data.xlsx')

In [36]:
df = pd.read_excel(dat, index_col=0, sheet_name="cum_gdp_elec")
df = pd.DataFrame(df)

# df.index = pd.Series(df.index).fillna(method='ffill')

In [37]:
coeffients = []

headers = ["Constant", "Energy Distribution", "Energy generation, nonrenewable", "Energy Policy", "Energy generation, renewable"]

countries = []

significance = []

rsq = []
rsq_adj = []

In [38]:
for name, group in df.groupby('country'):
    # print(name)
    
    y=group['dElec']
    
    print(y)

    # Model 1: The multiple regression model describes the response as a weighted sum of the predictors:(Elec = beta_0 + beta_1*ED + beta_2*EG,nr ~)
    X0 = group[['distribution cumulative',
                'nrgen cumulative',
                'policy cumulative',
                'rgen cumulative'               
                ]]
    
    X0 = sm.add_constant(X0)
    
    reg = sm.OLS(y, X0).fit()
    
    # print(reg.summary())
    
    coeffients.append(reg.params)
    rsq.append(reg.rsquared)
    rsq_adj.append(reg.rsquared_adj)
    # # # coeffients.extend([reg.params.values.flatten(), (100*reg.rsquared).astype(int), (100*reg.rsquared_adj).astype(int)])
    
    significance.append(reg.pvalues)

    countries.append(name)

country
Angola    1.000000
Angola    1.041001
Angola    1.081599
Angola    1.121754
Angola    1.161564
Angola    1.421323
Angola    1.458846
Angola    1.281244
Angola    1.322010
Angola    1.311407
Angola    1.406201
Angola    1.449711
Angola    1.212862
Angola    1.591882
Angola    1.584202
Angola    1.629845
Angola    1.716579
Angola    1.730994
Name: dElec, dtype: float64
country
Benin    1.000000
Benin    1.040599
Benin    1.080745
Benin    1.120394
Benin    1.188595
Benin    1.198784
Benin    1.238049
Benin    1.277716
Benin    1.456988
Benin    1.572013
Benin    1.635916
Benin    1.476425
Benin    1.452728
Benin    1.261871
Benin    1.579839
Benin    1.469769
Benin    1.671642
Benin    1.717658
Name: dElec, dtype: float64
country
Botswana    1.000000
Botswana    1.069255
Botswana    1.138175
Botswana    1.206727
Botswana    1.274990
Botswana    1.343157
Botswana    1.404288
Botswana    1.368313
Botswana    1.634057
Botswana    1.680097
Botswana    1.763228
Botswana    1.826859
Bo

In [39]:
output = os.path.join(dir_path, 'model3_output.xlsx')
# print(significance)

p_df = pd.DataFrame(significance)

# Write the data into an excel sheet
with pd.ExcelWriter(output,w
                    engine='openpyxl',
                    mode='a',
                    if_sheeet_exists="overlay",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    p_df.to_excel(writer, sheet_name='pvalue-eleccum')

  # Remove the CWD from sys.path while we load stuff.


In [40]:
# Putting the coefficients into a table
tbl = SimpleTable(coeffients, headers, countries,
                  data_fmts=["%3.2f","%d"])

# Turning the table into a dataframe
tbl_df = pd.DataFrame(tbl, columns=["Country", "Constant", "Energy Distribution", "Energy generation, nonrenewable", "Energy Policy", "Energy generation, renewable"])

# Write grouped aid data to excel file

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheeet_exists="overlay",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    tbl_df.to_excel(writer, sheet_name='coef-eleccum')


  


In [41]:
# Turning array with r-squared and r-sqaured adjusted values into dataframes
rsq_df = pd.DataFrame(rsq, columns=['r-squared-eleccum'])
rsq_adj_df = pd.DataFrame(rsq_adj, columns=['r-squared adjusted-eleccum'])

# Turning the data into columns
rsq_df = rsq_df.transpose()
rsq_adj_df = rsq_adj_df.transpose()

# Concating two data together (Got overlapped because didn't reset index)
rsq_dat = pd.concat([rsq_df, rsq_adj_df], axis=1)

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheeet_exists="overlay",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    rsq_dat.to_excel(writer, sheet_name='rsqua-eleccum')

  app.launch_new_instance()


reg operation:

['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']