In [1]:
import os
import pandas as pd
import openpyxl
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from statsmodels.iolib.table import SimpleTable
from statsmodels.tsa.stattools import adfuller

# Set directory path to current directory
dir_path = os.path.abspath('../')
output_path = os.path.abspath('../outputs')

dat = os.path.join(dir_path, 'control_variables.xlsx')

In [2]:
df = pd.read_excel(dat, index_col=0, sheet_name="data")
df = pd.DataFrame(df)

# df.index = pd.Series(df.index).fillna(method='ffill')

In [4]:
def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)

In [10]:
for name, group in df.groupby('country'):
    highest_elec_rate = group['elec_rate'].iloc[-1]
    print(f'For country {name}, the highest electrification rate is: \n{highest_elec_rate}')

For country Angola, the highest electrification rate is: 
45.6703147888184
For country Benin, the highest electrification rate is: 
40.3187408447266
For country Botswana, the highest electrification rate is: 
70.1831817626953
For country Burkina Faso, the highest electrification rate is: 
18.3791522979736
For country Burundi, the highest electrification rate is: 
11.0647974014282
For country Cabo Verde, the highest electrification rate is: 
95.5335388183594
For country Cameroon, the highest electrification rate is: 
63.4523124694824
For country Central African Republic, the highest electrification rate is: 
14.3
For country Chad, the highest electrification rate is: 
8.4
For country Comoros, the highest electrification rate is: 
84.0482406616211
For country Cote d'Ivoire, the highest electrification rate is: 
68.5501098632813
For country Eswatini, the highest electrification rate is: 
77.1696395874023
For country Ethiopia, the highest electrification rate is: 
48.2716217041016
For coun

In [5]:
for name, group in df.groupby('country'):
    print(f'This is the ADf test results for {name}\'s changes in Electrifiction date')
    adf_test(group['elec_rate'])

This is the ADf test results for Angola's changes in Electrifiction date
Results of Dickey-Fuller Test:
Test Statistic                 22.288260
p-value                         1.000000
#Lags Used                      7.000000
Number of Observations Used    10.000000
Critical Value (1%)            -4.331573
Critical Value (5%)            -3.232950
Critical Value (10%)           -2.748700
dtype: float64
This is the ADf test results for Benin's changes in Electrifiction date
Results of Dickey-Fuller Test:
Test Statistic                -5.697638e+00
p-value                        7.807249e-07
#Lags Used                     7.000000e+00
Number of Observations Used    1.000000e+01
Critical Value (1%)           -4.331573e+00
Critical Value (5%)           -3.232950e+00
Critical Value (10%)          -2.748700e+00
dtype: float64
This is the ADf test results for Botswana's changes in Electrifiction date
Results of Dickey-Fuller Test:
Test Statistic                -22.856687
p-value              

In [11]:
for name, group in df.groupby('country'):
    print(f'This is the ADf test results for {name}\'s changes in CPI')
    adf_test(group['dCPI'])

This is the ADf test results for Angola's changes in CPI
Results of Dickey-Fuller Test:
Test Statistic                  5.944708
p-value                         1.000000
#Lags Used                      7.000000
Number of Observations Used    10.000000
Critical Value (1%)            -4.331573
Critical Value (5%)            -3.232950
Critical Value (10%)           -2.748700
dtype: float64
This is the ADf test results for Benin's changes in CPI
Results of Dickey-Fuller Test:
Test Statistic                 -5.103364
p-value                         0.000014
#Lags Used                      7.000000
Number of Observations Used    10.000000
Critical Value (1%)            -4.331573
Critical Value (5%)            -3.232950
Critical Value (10%)           -2.748700
dtype: float64
This is the ADf test results for Botswana's changes in CPI
Results of Dickey-Fuller Test:
Test Statistic                 -5.429704
p-value                         0.000003
#Lags Used                      7.000000
Number o

In [12]:
for name, group in df.groupby('country'):
    print(f'This is the ADf test results for {name}\'s cumulative value for aid in energy distribution')
    adf_test(group['distribution cumulative'])

This is the ADf test results for Angola's cumulative value for aid in energy distribution
Results of Dickey-Fuller Test:
Test Statistic                 -0.649014
p-value                         0.859518
#Lags Used                      0.000000
Number of Observations Used    17.000000
Critical Value (1%)            -3.889266
Critical Value (5%)            -3.054358
Critical Value (10%)           -2.666984
dtype: float64
This is the ADf test results for Benin's cumulative value for aid in energy distribution
Results of Dickey-Fuller Test:
Test Statistic                 -3.199437
p-value                         0.020017
#Lags Used                      7.000000
Number of Observations Used    10.000000
Critical Value (1%)            -4.331573
Critical Value (5%)            -3.232950
Critical Value (10%)           -2.748700
dtype: float64
This is the ADf test results for Botswana's cumulative value for aid in energy distribution
Results of Dickey-Fuller Test:
Test Statistic                -3

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Test Statistic                -42.579953
p-value                         0.000000
#Lags Used                      0.000000
Number of Observations Used    17.000000
Critical Value (1%)            -3.889266
Critical Value (5%)            -3.054358
Critical Value (10%)           -2.666984
dtype: float64
This is the ADf test results for Cote d'Ivoire's cumulative value for aid in energy distribution
Results of Dickey-Fuller Test:
Test Statistic                 -1.663052
p-value                         0.450243
#Lags Used                      6.000000
Number of Observations Used    11.000000
Critical Value (1%)            -4.223238
Critical Value (5%)            -3.189369
Critical Value (10%)           -2.729839
dtype: float64
This is the ADf test results for Eswatini's cumulative value for aid in energy distribution
Results of Dickey-Fuller Test:
Test Statistic                 19.763340
p-value                         1.000000
#Lags Used                      2.000000
Number of Observations

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


In [25]:
coeffients = []

headers = ["Constant", "Energy Distribution", "Energy generation, nonrenewable", "Energy Policy", "Energy generation, renewable", "GDP", "CPI", "Governance", "Trade openness"]

countries = []

significance = []

rsq = []
rsq_adj = []

In [26]:
for name, group in df.groupby('country'):
    # print(name)
    
    y=group['dElec']

    # Model 1: The multiple regression model describes the response as a weighted sum of the predictors:(Elec = beta_0 + beta_1*ED + beta_2*EG,nr ~)
    X0 = group[['distribution cumulative',
                'nrgen cumulative',
                'policy cumulative',
                'rgen cumulative',
                'dGDP',
                'dCPI',
                'governance',
                'trade'               
                ]]
    
    X0 = sm.add_constant(X0)
    
    reg = sm.OLS(y, X0).fit()
    
    # print(reg.summary())
    
    coeffients.append(reg.params)
    rsq.append(reg.rsquared)
    rsq_adj.append(reg.rsquared_adj)
    # coeffients.extend([reg.params.values.flatten(), (100*reg.rsquared).astype(int), (100*reg.rsquared_adj).astype(int)])
    
    significance.append(reg.pvalues)

    countries.append(name)
    
    

In [27]:
output = os.path.join(output_path, 'model4_output.xlsx')
# print(significance)

p_df = pd.DataFrame(significance)

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheet_exists="new",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    p_df.to_excel(writer, sheet_name='pvalue-eleccum')

In [28]:
# Putting the coefficients into a table
tbl = SimpleTable(coeffients, headers, countries,
                  data_fmts=["%3.2f","%d"])

# Turning the table into a dataframe
tbl_df = pd.DataFrame(tbl, columns=["Country", "Constant", "Energy Distribution", "Energy generation, nonrenewable", "Energy Policy", "Energy generation, renewable", "GDP", "CPI", "Governance", "Trade openness"])

# Write grouped aid data to excel file

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheet_exists="new",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    tbl_df.to_excel(writer, sheet_name='coef-eleccum')


In [29]:
# Turning array with r-squared and r-sqaured adjusted values into dataframes
rsq_df = pd.DataFrame(rsq, columns=['r-squared-eleccum'])
rsq_adj_df = pd.DataFrame(rsq_adj, columns=['r-squared adjusted-eleccum'])

# Turning the data into columns
# rsq_df = rsq_df.transpose()
# rsq_adj_df = rsq_adj_df.transpose()

# Concating two data together (Got overlapped because didn't reset index)
rsq_dat = pd.concat([rsq_df, rsq_adj_df], axis=1)

# Write the data into an excel sheet
with pd.ExcelWriter(output,
                    engine='openpyxl',
                    mode='a',
                    if_sheet_exists="new",
) as writer:
    writer.book = openpyxl.load_workbook(output)
    rsq_dat.to_excel(writer, sheet_name='rsqua-eleccum')

reg operation:

['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']