# Comparing GEP results
For each country, results are defined by the geographic and secnario summary files. In this script, results are downloaded, extracted, and analyzed to see how the scenarios affect results

In [1]:
import os, sys, importlib, shutil, zipfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import GEP
from GEP import gepResults

importlib.reload(GEP)

pd.set_option('display.float_format', lambda x: '%.3f' % x)

scenarioDefs = {
               0:[["Baseline","High"], "Population Growth"],
               1:[["Baseline","Low", "High"], "Electricity demand targer"],
               2:[["Baseline","Best Practice"], "5-year investment plan"],
               3:[["Baseline","High"], "Grid generation cost"],
               4:[["Baseline","High"], "PV Cost"],
               5:[["Baseline","High"], "Prioritization"],
               }

In [2]:
resultsFolder = "/media/gost/DATA1/GEP/Summaries/"
shapesFolder = "/media/gost/DATA1/GEP/Clusters"
outFolder = "/media/gost/DATA1/GEP/Summaries_Summary"
chart_folder = "/media/gost/DATA1/GEP/CHARTS/Africa"

In [3]:
# Functions used throughout the code to re-classify variables
def get_pp(x):
    ''' classify power pools - used in pandas apply function
    '''
    EAPP = [f'{x}-1' for x in ['bi','dj','eg','et','ke','ly','rw','sd','ss','ug']]
    WAPP = [f'{x}-1' for x in ['bf','bj','ci','gh','gm','gn','gw','lr','ml','ng','sl','sn','tg']]
    SAPP = [f'{x}-1' for x in ['ao','bw','ls','mw','mz','na','sz','tz','za','zm','zw']]
    AFR = [f'{x}-1' for x in ['cd','cg','cf','cm','er','ga','gq','km','mg','mr','ne','so','st','td']]
    if x in EAPP:
        return('EAPP')
    if x in WAPP:
        return('WAPP')
    if x in SAPP:
        return('SAPP')
    if x in AFR:
        return('AFR')
    return("other")

def get_tech(x):
    ''' Convert TECH column in GRID, OFF-GRID (SA and MG)
    '''
    x_spl = x.split("_")
    if x_spl[-1] == "Grid":
        return("GRID")
    else:
        return(x_spl[-2])
    


In [4]:
# Create summary output table containing all the summary files stacked together
resultsFolder = "/media/gost/DATA1/GEP/Summaries/"
all_results = []
for root, dirs, files in os.walk(resultsFolder):
    for f in files:
        if "summary.csv" in f:
            all_results.append(os.path.join(root, f))

In [5]:
for x in all_results:
    xx = GEP.gep_summary(x)
    tempD = xx.get_data()
    try:
        final = final.append(tempD)
    except:
        final = tempD
final.columns = ['Attribute','2025','2030','Country','Scenario']
final['Attr'] = final['Attribute'].apply(lambda x: x[:1])
final = final.loc[final['Attr'] != 'M',]
final['Tech'] = final['Attribute'].apply(lambda x: get_tech(x))
final['PP'] = final['Country'].apply(lambda x: get_pp(x))

In [8]:
africa = final.loc[final['PP'] != "other"]

# Calculate wide-averages
Summarize scenarios across Africa and across the entire datasets

In [None]:
### Global-ish summaries
attr='2'
tech='MG'
### Summarize across technologies
#ag = final.loc[(final['Attr'] == attr) & (final['Tech'] == tech)].groupby(['Scenario','Attr','Tech'])
#ag.sum().sort_values(['2030'], ascending=False)

### Group all technologies to get all summaries
ag = final.loc[(final['Attr'] == attr)].groupby(['Scenario','Attr'])
#ag.sum().sort_values(['2030'], ascending=False)
ag.sum().sort_values(['2030'], ascending=False).iloc[0:100]

In [None]:
### Africa summaries
attr='2'
tech='MG'
ag = africa.loc[(africa['Attr'] == attr) & (africa['Tech'] == tech)].groupby(['Scenario','Attr','Tech'])
ag.sum().sort_values(['2030'], ascending=False).iloc[50:150]

#ag = africa.loc[(africa['Attr'] == attr)].groupby(['Scenario','Attr'])
#ag.sum().sort_values(['2030'], ascending=False)

# Create plots

In [None]:
importlib.reload(GEP)
# Create plots describing all costs for all separate scenarios
attributes = joined_data.index.get_level_values(0)
scenarios = joined_data.index.get_level_values(1)
for selected_attribute in ['1','2','3','4']:
    for selected_scenario in scenarios:
        if selected_scenario[-1] == '0':
            GEP.extract_plot(joined_data, selected_attribute, selected_scenario, "Africa", chart_folder)
            GEP.extract_plot(wapp_data,   selected_attribute, selected_scenario, "WAPP", chart_folder)
            GEP.extract_plot(eapp_data,   selected_attribute, selected_scenario, "EAPP", chart_folder)
            GEP.extract_plot(sapp_data,   selected_attribute, selected_scenario, "SAPP", chart_folder)
            break

In [None]:
importlib.reload(GEP)
# Create boxplots of scenarios for selected countries
countries = ['zm-1']
for country in countries:        
    inD = final.loc[final['Country'] == country]
    inD = inD.loc[inD['Scenario'].apply(lambda x: int(x[-1]) == 0)]
    chart_folder = '/media/gost/DATA1/GEP/CHARTS/Box_Plot'
    for selected_attribute in ['1','2','3','4']:
        res = GEP.box_plot(inD, selected_attribute, f'{chart_folder}/boxPlot_{country}_{selected_attribute}.png')
        

In [None]:
wapp_data = pd.DataFrame(final[final['Country'].isin(WAPP)].groupby(['Attribute','Scenario']).sum())
wapp_data.reset_index(inplace=True)
eapp_data = pd.DataFrame(final[final['Country'].isin(EAPP)].groupby(['Attribute','Scenario']).sum())
eapp_data.reset_index(inplace=True)
sapp_data = pd.DataFrame(final[final['Country'].isin(SAPP)].groupby(['Attribute','Scenario']).sum())
sapp_data.reset_index(inplace=True)

for selected_attribute in ['1','2','3','4']:
    res = box_plot(wapp_data, selected_attribute, f'{chart_folder}/boxPlot_WAPP_{selected_attribute}.png')
    res = box_plot(eapp_data, selected_attribute, f'{chart_folder}/boxPlot_EAPP_{selected_attribute}.png')
    res = box_plot(sapp_data, selected_attribute, f'{chart_folder}/boxPlot_SAPP_{selected_attribute}.png')

# Summarize scenarios across countries

In [12]:
### Generate national summary table - as simple as possible
scenario = "0_2_0_0_0_0"
summaryData = final.loc[final['Scenario'] == scenario,]
#summaryData.loc[summaryData['2025'] != summaryData['2030'],]

summaryData['Attr'] = summaryData['Attribute'].apply(lambda x: x[:1])
summaryData = summaryData.loc[summaryData['Attr'] != 'M',]
summaryData['Tech'] = summaryData['Attribute'].apply(lambda x: get_tech(x))
summaryData['PP'] = summaryData['Country'].apply(lambda x: get_pp(x))

summary_grouped = summaryData.groupby(['Country','Attr','Tech'])
pd.DataFrame(summary_grouped.mean()).to_csv(f"{chart_folder}_country_breakdown_{scenario}.csv")

summary_grouped = summaryData.groupby(['PP','Attr','Tech'])
pd.DataFrame(summary_grouped.mean()).to_csv(f"{chart_folder}_PP_breakdown_{scenario}.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [None]:
# Generate output table of Country with population (1) and investment (4) by grid and off grid
scenario = scenario #"0_0_0_0_0_0"
summaryData = final.loc[final['Scenario'] == scenario,]
summaryData['Attr'] = summaryData['Attribute'].apply(lambda x: x[:1])
summaryData = summaryData.loc[summaryData['Attr'] != 'M',]
summaryData['Tech'] = summaryData['Attribute'].apply(lambda x: get_tech(x))
summary_grouped = summaryData.groupby(['Country','Attr','Tech'])
pd.DataFrame(summary_grouped.mean()).to_csv(f"{chart_folder}_country_breakdown_GRID_OFFGRID.csv")

In [11]:
# Calculate the percentage of new connections that are GRID in each scenario
maxConnect = 0
minConnect = 1000
attr = '2' # Looking at number of grid connections
for scenario in final['Scenario'].unique():
    if scenario[-1] == '0':
        summaryData = final.loc[final['Scenario'] == scenario,]
        allConnect  = summaryData.loc[summaryData['Attr'] == attr,'2030'].sum()
        gridConnect = summaryData.loc[summaryData['Attribute'] == '2.New_Connections_Grid', '2030'].sum()
        gridTotal = gridConnect / allConnect
        if gridTotal > maxConnect:
            maxConnect = gridTotal
            maxScenario = scenario
        if gridTotal < minConnect:
            minConnect = gridTotal
            minScenario = scenario
        print(f'{scenario} - {gridTotal}')
print('***')
print(f'Maximum Grid: {maxScenario} - {maxConnect}')
print(f'Minimum Grid: {minScenario} - {minConnect}')

0_0_0_0_0_0 - 0.3985938972330056
0_0_0_0_1_0 - 0.42518069142867826
0_0_0_0_2_0 - 0.3717679690102625
0_0_0_1_0_0 - 0.3964323345811144
0_0_0_1_1_0 - 0.4225421074666658
0_0_0_1_2_0 - 0.36459121480167883
0_0_1_0_0_0 - 0.3984044686421864
0_0_1_0_1_0 - 0.4204845157085565
0_0_1_0_2_0 - 0.37178059772114747
0_0_1_1_1_0 - 0.4184432590235899
0_0_1_1_2_0 - 0.3646036054573854
0_1_0_0_0_0 - 0.6461752307652961
0_1_0_0_1_0 - 0.6777296967514571
0_1_0_0_2_0 - 0.598660634629906
0_1_0_1_0_0 - 0.6316465222119607
0_1_0_1_1_0 - 0.6684758878405409
0_1_0_1_2_0 - 0.5795757835693217
0_1_1_0_0_0 - 0.7122535348945389
0_0_1_1_0_0 - 0.3962548216524002
0_1_1_1_2_0 - 0.6616803543953556
1_0_0_0_1_0 - 0.43909868111745737
1_1_0_1_0_0 - 0.6484981065622344
1_2_0_1_2_0 - 0.5942697394162845
0_1_1_0_1_0 - 0.7375020550153555
0_1_1_0_2_0 - 0.6795314933347237
0_1_1_1_0_0 - 0.6989811771729301
0_1_1_1_1_0 - 0.7288379340135013
0_2_0_0_0_0 - 0.6431206399969854
0_2_0_0_1_0 - 0.6774317945981845
0_2_0_0_2_0 - 0.5951590689193798
0_2_0_1