# Comparing GEP results
For each country, results are defined by the geographic and secnario summary files. In this script, results are downloaded, extracted, and analyzed to see how the scenarios affect results

In [1]:
import os, sys, importlib, shutil, zipfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import GEP
from GEP import gepResults

importlib.reload(GEP)

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
importlib.reload(GEP)
GEP.get_assumptions("1_0_0_1_0_0")

['Population growth: High',
 'Electricity demand target: Top-down low',
 'Intermediate investment: No connections cap',
 'Grid generation cost: High',
 'PV cost: Estimated',
 'Rollout: Least-cost nationwide']

In [3]:
resultsFolder = "/media/gost/DATA1/GEP/Summaries/"
shapesFolder = "/media/gost/DATA1/GEP/Clusters"
outFolder = "/media/gost/DATA1/GEP/Summaries_Summary"
country_summaries_folder = "/media/gost/DATA1/GEP/Country_Scenarios"
chart_folder = "/media/gost/DATA1/GEP/CHARTS/Africa"

for x in [country_summaries_folder]:
    if not os.path.exists(country_summaries_folder):
        os.makedirs(country_summaries_folder)

In [4]:
# Functions used throughout the code to re-classify variables
def get_continent(x):
    ''' classify power pools - used in pandas apply function
    '''
    EAPP = [f'{x}-1' for x in ['bi','dj','et','ke','ly','rw','sd','ss','ug']]
    WAPP = [f'{x}-1' for x in ['bf','bj','ci','gh','gm','gn','gw','lr','ml','ng','sl','sn','tg']]
    SAPP = [f'{x}-1' for x in ['ao','bw','ls','mw','mz','na','sz','tz','za','zm','zw']]
    AFR =  [f'{x}-1' for x in ['cd','cg','cf','cm','er','ga','gq','km','mg','mr','ne','so','st','td']]
    if x in EAPP:
        return('AFR')
    if x in WAPP:
        return('AFR')
    if x in SAPP:
        return('AFR')
    if x in AFR:
        return('AFR')
    return("other")

def get_pp(x):
    ''' classify power pools - used in pandas apply function
    '''
    EAPP = [f'{x}-1' for x in ['bi','dj','et','ke','ly','rw','sd','ss','ug']]
    WAPP = [f'{x}-1' for x in ['bf','bj','ci','gh','gm','gn','gw','lr','ml','ng','sl','sn','tg']]
    SAPP = [f'{x}-1' for x in ['ao','bw','ls','mw','mz','na','sz','tz','za','zm','zw']]
    AFR = [f'{x}-1' for x in ['cd','cg','cf','cm','er','ga','gq','km','mg','mr','ne','so','st','td']]
    if x in EAPP:
        return('EAPP')
    if x in WAPP:
        return('WAPP')
    if x in SAPP:
        return('SAPP')
    if x in AFR:
        return('AFR')
    return("other")

def get_tech(x):
    ''' Convert TECH column in GRID, OFF-GRID (SA and MG)
    '''
    x_spl = x.split("_")
    if x_spl[-1] == "Grid":
        return("GRID")
    else:
        return(x_spl[-2])
    


In [6]:
# Create summary output table containing all the summary files stacked together
resultsFolder = "/media/gost/DATA1/GEP/Summaries/"
all_results = []
for root, dirs, files in os.walk(resultsFolder):
    for f in files:
        if "summary.csv" in f:
            all_results.append(os.path.join(root, f))

In [34]:
broken_res.keys()

dict_keys(['ao-1-0_0_0_0_0_0'])

In [33]:
try:
    del(final)
except:
    pass
all_res = {}
broken_res = {}
for x in all_results:
    xx = GEP.gep_summary(x)
    tempD = xx.get_data()
    if tempD.shape[1] == 5:
        all_res[x.split("/")[-1][:16]] = tempD
        try:
            final = final.append(tempD)
        except:
            final = tempD
    else:
        broken_res[x.split("/")[-1][:16]] = tempD
        
final.columns = ['Attribute','2025','2030','Country','Scenario']
final['Attr'] = final['Attribute'].apply(lambda x: x[:1])
final = final.loc[final['Attr'] != 'M',]
final['Tech'] = final['Attribute'].apply(lambda x: get_tech(x))
final['PP'] = final['Country'].apply(lambda x: get_pp(x))

In [35]:
final['2030Sum'] = final['2025'] + final['2030']
africa = final.loc[final['PP'] != "other"]

In [36]:
final['Country'].unique()

array(['gw-1', 'ao-1', 'bd-1', 'bf-1', 'bi-1', 'bj-1', 'bw-1', 'cd-1',
       'cf-1', 'cg-1', 'ci-1', 'cm-1', 'dj-1', 'er-1', 'et-1', 'fm-1',
       'ga-1', 'gh-1', 'gm-1', 'gn-1', 'gq-1', 'hn-1', 'ht-1', 'ke-1',
       'kh-1', 'km-1', 'lr-1', 'ls-1', 'mg-1', 'ml-1', 'mm-1', 'mn-1',
       'mr-1', 'mw-1', 'mz-1', 'na-1', 'ne-1', 'ng-1', 'ni-1', 'pg-1',
       'pk-1', 'rw-1', 'sb-1', 'sd-1', 'sl-1', 'sn-1', 'so-1', 'ss-1',
       'st-1', 'sz-1', 'td-1', 'tg-1', 'tl-1', 'tz-1', 'ug-1', 'vu-1',
       'za-1', 'zm-1', 'zw-1'], dtype=object)

In [42]:
africa['Country'].unique()
#africa['Scenario'].unique()

array(['gw-1', 'ao-1', 'bf-1', 'bi-1', 'bj-1', 'bw-1', 'cd-1', 'cf-1',
       'cg-1', 'ci-1', 'cm-1', 'dj-1', 'er-1', 'et-1', 'ga-1', 'gh-1',
       'gm-1', 'gn-1', 'gq-1', 'ke-1', 'km-1', 'lr-1', 'ls-1', 'mg-1',
       'ml-1', 'mr-1', 'mw-1', 'mz-1', 'na-1', 'ne-1', 'ng-1', 'rw-1',
       'sd-1', 'sl-1', 'sn-1', 'so-1', 'ss-1', 'st-1', 'sz-1', 'td-1',
       'tg-1', 'tz-1', 'ug-1', 'za-1', 'zm-1', 'zw-1'], dtype=object)

# Calculate wide-averages
Summarize scenarios across Africa and across the entire datasets. We want to calculate the following

1. Total new connections between start and 2030 for all technologies
a. Africa only
b. Global


In [43]:
res_folder = "/media/gost/DATA1/GEP/Analytic_Results"
if not os.path.exists(res_folder):
    os.makedirs(res_folder)

In [None]:
### Global summary of percentage of new connections that are MG
attr='2'
tech='SA'
### Summarize across technologies
ag = final.loc[(final['Attr'] == attr) & (final['Tech'] == tech)].groupby(['Scenario','Attr','Tech'])
summed_ag = ag.sum()
summed_ag = summed_ag.loc[summed_ag['2030Sum'] > 0]
max_mg_new_connections = summed_ag.sort_values(['2030Sum'], ascending=False).iloc[0,]
min_mg_new_connections = summed_ag.sort_values(['2030Sum'], ascending=True ).iloc[0,]
pd.DataFrame([max_mg_new_connections,min_mg_new_connections])

In [46]:
### africa summary of percentage of new connections that are MG
attr='2'
tech='MG'
for attr in africa['Attr'].unique():
    for tech in africa['Tech'].unique():
        ### Summarize across technologies
        ag = africa.loc[(africa['Attr'] == attr) & (africa['Tech'] == tech)].groupby(['Scenario','Attr','Tech'])
        summed_ag = ag.sum()
        summed_ag = summed_ag.loc[summed_ag['2030Sum'] > 0]
        summed_ag.to_csv(os.path.join(res_folder, f"AFRICA_{attr}_{tech}.csv"))
'''
max_mg_new_connections = summed_ag.sort_values(['2030Sum'], ascending=False).iloc[0,]
min_mg_new_connections = summed_ag.sort_values(['2030Sum'], ascending=True ).iloc[0,]
pd.DataFrame([max_mg_new_connections,min_mg_new_connections])
'''

"\nmax_mg_new_connections = summed_ag.sort_values(['2030Sum'], ascending=False).iloc[0,]\nmin_mg_new_connections = summed_ag.sort_values(['2030Sum'], ascending=True ).iloc[0,]\npd.DataFrame([max_mg_new_connections,min_mg_new_connections])\n"

In [49]:
### africa compare all technologies 
ag = africa.groupby(['Scenario','Attr','Tech'])
summed_ag = ag.sum()
summed_ag = summed_ag.loc[summed_ag['2030Sum'] > 0]
summed_ag.to_csv(os.path.join(res_folder, f"AFRICA_compare_everything.csv"))

# Compare 2025 and 2030 new connections

In [None]:
### Global summary of percentage of new connections that are MG
attr='2'
tech='MG'
### Summarize across technologies
ag = final.loc[(final['Attr'] == attr) & (final['Tech'] == tech)].groupby(['Country','Scenario','Attr','Tech'])
for name, group in ag:
    if group['2025'].sum() > group['2030'].sum() and name[1] == "1_1_0_1_1_0":
        break

# Create plots

In [None]:
importlib.reload(GEP)
# Create plots describing all costs for all separate scenarios
attributes = joined_data.index.get_level_values(0)
scenarios = joined_data.index.get_level_values(1)
for selected_attribute in ['1','2','3','4']:
    for selected_scenario in scenarios:
        if selected_scenario[-1] == '0':
            GEP.extract_plot(joined_data, selected_attribute, selected_scenario, "Africa", chart_folder)
            GEP.extract_plot(wapp_data,   selected_attribute, selected_scenario, "WAPP", chart_folder)
            GEP.extract_plot(eapp_data,   selected_attribute, selected_scenario, "EAPP", chart_folder)
            GEP.extract_plot(sapp_data,   selected_attribute, selected_scenario, "SAPP", chart_folder)
            break

In [None]:
importlib.reload(GEP)
# Create boxplots of scenarios for selected countries
countries = ['ht-1']
for country in countries:        
    inD = final.loc[final['Country'] == country]
    inD = inD.loc[inD['Scenario'].apply(lambda x: int(x[-1]) == 0)]
    chart_folder = '/media/gost/DATA1/GEP/CHARTS/Box_Plot'
    for selected_attribute in ['1','2','3','4']:
        res = GEP.box_plot(inD, selected_attribute, f'{chart_folder}/boxPlot_{country}_{selected_attribute}.png')
        

In [None]:
wapp_data = pd.DataFrame(final[final['Country'].isin(WAPP)].groupby(['Attribute','Scenario']).sum())
wapp_data.reset_index(inplace=True)
eapp_data = pd.DataFrame(final[final['Country'].isin(EAPP)].groupby(['Attribute','Scenario']).sum())
eapp_data.reset_index(inplace=True)
sapp_data = pd.DataFrame(final[final['Country'].isin(SAPP)].groupby(['Attribute','Scenario']).sum())
sapp_data.reset_index(inplace=True)

for selected_attribute in ['1','2','3','4']:
    res = box_plot(wapp_data, selected_attribute, f'{chart_folder}/boxPlot_WAPP_{selected_attribute}.png')
    res = box_plot(eapp_data, selected_attribute, f'{chart_folder}/boxPlot_EAPP_{selected_attribute}.png')
    res = box_plot(sapp_data, selected_attribute, f'{chart_folder}/boxPlot_SAPP_{selected_attribute}.png')

# Summarize all scenarios for single country

In [50]:
country = "mz-1"

In [55]:
sel_data = final.loc[final['Country'] == country]
#remove scenarios from final lever
sel_data['GOOD'] = sel_data['Scenario'].apply(lambda x: x[-1])
sel_data = sel_data.loc[sel_data['GOOD'] == '0']
sel_grouped = sel_data.groupby(['Attr', 'Tech'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [74]:
def get_vals(x):
    return(
        x['2025'].min(),
        x['2030'].max(),
        x['2025'].min(),
        x['2030'].max()
    )

    
#results = pd.DataFrame(sel_grouped.apply(get_vals))

results = pd.DataFrame([sel_grouped['2025'].min(),
                      sel_grouped['2025'].max(),
                      sel_grouped['2030'].min(),
                      sel_grouped['2030'].max(),
                     ]).transpose()
results.reset_index(inplace=True)
results.columns = ['Attr','Tech','2025min','2025max','2030min','2030max']
results

Unnamed: 0,Attr,Tech,2025min,2025max,2030min,2030max
0,1,GRID,13458707.0,26043279.085,19099579.053,34826328.864
1,1,MG,0.0,8681982.301,0.0,2924008.864
2,1,SA,0.0,12806850.512,0.0,24122385.326
3,2,GRID,5215877.0,17800449.085,5640872.052,12433350.387
4,2,MG,0.0,8681982.301,0.0,2837062.246
5,2,SA,0.0,12806850.512,0.0,11627811.376
6,3,GRID,166035.753,1019536.085,192841.019,767450.044
7,3,MG,0.0,1084599.042,0.0,355240.869
8,3,SA,0.0,778716.558,0.0,927577.32
9,4,GRID,770824898.321,5116734078.12,477927700.683,2374228408.59


# Summarize scenarios across countries

In [None]:
### Generate national summary table - as simple as possible
scenario = "1_1_1_1_1_0"

attr = '2'
summaryData = final.loc[(final['Scenario'] == scenario) & (final['Attr'] == attr),]
#summaryData.loc[summaryData['2025'] != summaryData['2030'],]

summaryData['Attr'] = summaryData['Attribute'].apply(lambda x: x[:1])
summaryData = summaryData.loc[summaryData['Attr'] != 'M',]
summaryData['Tech'] = summaryData['Attribute'].apply(lambda x: get_tech(x))
summaryData['PP'] = summaryData['Country'].apply(lambda x: get_pp(x))
summaryData['Continent'] = summaryData['Country'].apply(lambda x: get_continent(x)) 

#summary_grouped = summaryData.groupby(['Country','Attr','Tech'])
#pd.DataFrame(summary_grouped.mean()).to_csv(f"{chart_folder}_country_breakdown_{scenario}.csv")

summary_grouped = summaryData.groupby(['Continent','Attr','Tech'])
#pd.DataFrame(summary_grouped.mean()).to_csv(f"{chart_folder}_PP_breakdown_{scenario}.csv")
summary_grouped.sum()

In [None]:
# Generate output table of Country with population (1) and investment (4) by grid and off grid
scenario = scenario #"0_0_0_0_0_0"
summaryData = final.loc[final['Scenario'] == scenario,]
summaryData['Attr'] = summaryData['Attribute'].apply(lambda x: x[:1])
summaryData = summaryData.loc[summaryData['Attr'] != 'M',]
summaryData['Tech'] = summaryData['Attribute'].apply(lambda x: get_tech(x))
summary_grouped = summaryData.groupby(['Country','Attr','Tech'])
pd.DataFrame(summary_grouped.mean()).to_csv(f"{chart_folder}_country_breakdown_GRID_OFFGRID.csv")


In [None]:
# For a single country and scenario, generate description of scenario 
# with population (1) and investment (4) by grid, MG, off grid
scenario = "0_1_0_0_0_0"
country = 'bi-1'
out_file = os.path.join(country_summaries_folder, f"{country}_{scenario}")

summaryData = final.loc[(final['Scenario'] == scenario) & (final['Country'] == country),]
summary_grouped = summaryData.groupby(['Attr','Tech'])
grouped_vals = pd.DataFrame(summary_grouped.mean())

#Print outputs to screen and or file
grid_pop = grouped_vals.loc[(grouped_vals.index.get_level_values('Attr') == '1') & 
                            (grouped_vals.index.get_level_values('Tech') == 'GRID'),'2030'].iloc[0]
off_grid_pop = grouped_vals.loc[(grouped_vals.index.get_level_values('Attr') == '1') & 
                            (grouped_vals.index.get_level_values('Tech') == 'SA'),'2030'].iloc[0]
mini_grid_pop = grouped_vals.loc[(grouped_vals.index.get_level_values('Attr') == '1') & 
                            (grouped_vals.index.get_level_values('Tech') == 'MG'),'2030'].iloc[0]

grid_i = grouped_vals.loc[(grouped_vals.index.get_level_values('Attr') == '4') & 
                            (grouped_vals.index.get_level_values('Tech') == 'GRID'),'2030Sum'].iloc[0]
off_grid_i = grouped_vals.loc[(grouped_vals.index.get_level_values('Attr') == '4') & 
                            (grouped_vals.index.get_level_values('Tech') == 'SA'),'2030Sum'].iloc[0]
mini_grid_i = grouped_vals.loc[(grouped_vals.index.get_level_values('Attr') == '4') & 
                            (grouped_vals.index.get_level_values('Tech') == 'MG'),'2030Sum'].iloc[0]

scenario_description = GEP.get_assumptions(scenario)

messages = []
messages.append(f"***Scenario {scenario}***")
messages.append("\n".join(scenario_description))
messages.append("\n")
messages.append(f'2030 Grid pop: \t {round(grid_pop)} \t Grid Investment {round(grid_i)}')
messages.append(f'2030 Offgrid pop: \t {round(off_grid_pop)} \t Grid Investment {round(off_grid_i)}')
messages.append(f'2030 Mini grid pop: \t {round(mini_grid_pop)} \t Grid Investment {round(mini_grid_i)}')

if out_file != '':
    with open(out_file, 'w') as output_file:
        for m in messages:
            output_file.write(m)
            output_file.write("\n")

for m in messages:
    print(m)

In [75]:
# Calculate the percentage of new connections that are GRID in each scenario
maxConnect = 0
minConnect = 1000
attr = '2' # Looking at number of grid connections
for scenario in final['Scenario'].unique():
    if scenario[-1] == '0':
        summaryData = final.loc[final['Scenario'] == scenario,]
        allConnect  = summaryData.loc[summaryData['Attr'] == attr,'2030Sum'].sum()
        gridConnect = summaryData.loc[summaryData['Attribute'] == '2.New_Connections_Grid', '2030Sum'].sum()
        gridTotal = gridConnect / allConnect
        if gridTotal > maxConnect:
            maxConnect = gridTotal
            maxScenario = scenario
        if gridTotal < minConnect:
            minConnect = gridTotal
            minScenario = scenario
        print(f'{scenario} - {gridTotal}')
print('***')
print(f'Maximum Grid: {maxScenario} - {maxConnect}')
print(f'Minimum Grid: {minScenario} - {minConnect}')

0_0_0_0_0_0 - 0.4207840565137332
0_0_0_0_1_0 - 0.44316012321334963
0_0_0_0_2_0 - 0.42054937519242763
0_0_0_1_0_0 - 0.42582272836639684
0_0_0_1_1_0 - 0.441839906940209
0_0_0_1_2_0 - 0.41715708868456985
0_0_1_0_0_0 - 0.4265105280699017
0_0_1_0_1_0 - 0.43808271590495773
0_0_1_0_2_0 - 0.42054937519242763
0_0_1_1_1_0 - 0.43718903650188073
0_0_1_1_2_0 - 0.41715708868456985
0_1_0_0_0_0 - 0.7327929260758483
0_1_0_0_1_0 - 0.7543031626452641
0_1_0_0_2_0 - 0.6929173329590144
0_1_0_1_0_0 - 0.723153873821864
0_1_0_1_1_0 - 0.7481496711617864
0_1_0_1_2_0 - 0.6782841730065392
0_1_1_0_0_0 - 0.5966016112218074
0_0_1_1_0_0 - 0.42560997244187054
0_1_1_1_2_0 - 0.5725589926555446
1_0_0_0_1_0 - 0.4583667012251782
1_1_0_1_0_0 - 0.7294235271084275
1_2_0_1_2_0 - 0.6114033744397896
0_1_1_0_1_0 - 0.6116699146007565
0_1_1_0_2_0 - 0.5836779404449048
0_1_1_1_0_0 - 0.5903790268317124
0_1_1_1_1_0 - 0.607645026190623
0_2_0_0_0_0 - 0.6481171726017038
0_2_0_0_1_0 - 0.6766621393804345
0_2_0_0_2_0 - 0.6077218976421738
0_2_

In [None]:
# Calculate the percentage of new connections that are SA in each scenario in 2030
maxConnect = 0
minConnect = 1000
attr = '2' # Looking at number of grid connections
for scenario in final['Scenario'].unique():
    if scenario[-1] == '0':
        summaryData = final.loc[final['Scenario'] == scenario,]
        allConnect  = summaryData.loc[summaryData['Attr'] == attr,'2030'].sum()
        SAConnect = summaryData.loc[summaryData['Attribute'] == '2.New_Connections_SA_PV', '2030'].sum()
        SATotal = SAConnect / allConnect
        if SATotal > maxConnect:
            maxConnect = SATotal
            maxScenario = scenario
        if SATotal < minConnect:
            minConnect = SATotal
            minScenario = scenario
        print(f'{scenario} - {gridTotal}')
print('***')
print(f'Maximum SA: {maxScenario} - {maxConnect}')
print(f'Minimum SA: {minScenario} - {minConnect}')