# Comparing GEP results
For each country, results are defined by the geographic and secnario summary files. In this script, results are downloaded, extracted, and analyzed to see how the scenarios affect results

In [1]:
import os, sys, importlib, shutil, zipfile
import pandas as pd

scenarioDefs = {
               0:[["Baseline","High"], "Population Growth"],
               1:[["Baseline","Low", "High"], "Electricity demand targer"],
               2:[["Baseline","Best Practice"], "5-year investment plan"],
               3:[["Baseline","High"], "Grid generation cost"],
               4:[["Baseline","High"], "PV Cost"],
               5:[["Baseline","High"], "Prioritization"],
               }

In [80]:
class gepResults():
    def __init__(self, s3Folder, localFolder, code):
        self.s3Folder = s3Folder
        self.localFolder = localFolder
        self.countryCode = code
        self.summaryResultsFolder = os.path.join(localFolder, code, "outputs", "%s-scenarios-summaries" % code)
    
    def extractSummaries(self):
        outputFolder = os.path.join(self.localFolder, self.countryCode)
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
        s3ZipFolder = os.path.join(self.s3Folder, self.countryCode, 'outputs')
        summaryResultsZip = os.path.join(s3ZipFolder, "%s-scenarios-summaries.zip" % self.countryCode)
        localResultsZip = os.path.join(outputFolder, "%s-scenarios-summaries.zip" % self.countryCode)
        if not os.path.exists(localResultsZip):
            shutil.copy(summaryResultsZip, localResultsZip)
        with zipfile.ZipFile(localResultsZip, 'r') as inZip:
            inZip.extractall(outputFolder)
            
    def processSummaryResults(self):
        # Summarize the scenario files
        scenarioFiles = os.listdir(self.summaryResultsFolder)
        for f in scenarioFiles:
            scenarioName = f[5:16]
            if int(scenarioName[-1]) == 0:
                inD = pd.read_csv(os.path.join(self.summaryResultsFolder, f))
                scenarioName = scenarioName.replace("_","")
                inD.columns = ["%s_%s" % (x, scenarioName) for x in inD.columns]
                try:
                    final = final.join(inD.iloc[:,1:3])
                except:
                    final = inD
        final2025 = final.loc[:,[x for x in final.columns if '2025' in x]]
        final2030 = final.loc[:,[x for x in final.columns if '2030' in x]]
        #Calculate summary columns for specific data
        final2030.loc['4.SA_total'] = final2030.iloc[[25, 26]].apply(lambda x: x.sum())
        final2030.loc['4.MG_total'] = final2030.iloc[[27, 28, 29, 30, 31]].apply(lambda x: x.sum())        
        ### Running for 2030
        # Identify the scenario with the minimum and maximum value for each row in the table
        idxMax = final2030.apply(lambda x: x.idxmax(), axis=1)
        valMax = final2030.apply(lambda x: x.max(), axis=1)
        idxMin = final2030.apply(lambda x: x.idxmin(), axis=1)
        valMin = final2030.apply(lambda x: x.min(), axis=1)
        curRange = final2030.apply(lambda x: x.max() - x.min(), axis=1)
        xx = pd.DataFrame([idxMax, valMax, idxMin, valMin, curRange]).transpose()
        xx_index = list(inD.iloc[:,0])
        xx_index.append('4.SA_total')
        xx_index.append('4.MG_total')
        xx.index = xx_index
        xx.columns = ["MaxScenario","MaxVal","MinScenario","MinVal","Range"]
        return(xx)

In [81]:
resultsFolder = "/media/gost/DATA1/GEP/Summaries/"
shapesFolder = "/media/gost/DATA1/GEP/Clusters"
outFolder = "/media/gost/DATA1/GEP/Summaries_Summary"
xx = gepResults(resultsFolder, resultsFolder, 'bj-1')
summary = xx.processSummaryResults() 
summary
#summary.to_csv(os.path.join(outFolder, "%s-summary.csv" % 'mw-1'))

Unnamed: 0,MaxScenario,MaxVal,MinScenario,MinVal,Range
1.Population_Grid,2030_111010,15233000.0,2030_000020,9904020.0,5329030.0
1.Population_SA_Diesel,2030_000000,0.0,2030_000000,0.0,0.0
1.Population_SA_PV,2030_100020,5922220.0,2030_111010,584154.0,5338070.0
1.Population_MG_Diesel,2030_000000,0.0,2030_000000,0.0,0.0
1.Population_MG_PV,2030_110110,395329.0,2030_000000,0.0,395329.0
1.Population_MG_Wind,2030_000000,0.0,2030_000000,0.0,0.0
1.Population_MG_Hydro,2030_111010,7595.21,2030_000000,0.0,7595.21
1.Population_MG_Hybrid,2030_000000,0.0,2030_000000,0.0,0.0
2.New_Connections_Grid,2030_121010,4494100.0,2030_000020,1783820.0,2710280.0
2.New_Connections_SA_Diesel,2030_000000,0.0,2030_000000,0.0,0.0


In [84]:
resultsFolder = "/media/gost/DATA1/GEP/Summaries/"
shapesFolder = "/media/gost/DATA1/GEP/Clusters"
outFolder = "/media/gost/DATA1/GEP/Summaries_Summary"
allSummaries = {}
submittedCountries = os.listdir(resultsFolder)
#submittedCountries = ['zw-1']
for focalCountry in submittedCountries:
    '''try:
        
        xx.extractSummaries()
    except:
        print("Error Processing %s" % focalCountry)'''
        
    try:
        xx = gepResults("", resultsFolder, focalCountry)
        summary = xx.processSummaryResults() 
        allSummaries[focalCountry] = summary
        summary.to_csv(os.path.join(outFolder, "%s-summary.csv" % focalCountry))
    except:
        print("Error Summarizing %s" % focalCountry)

In [85]:
maxVals = []
rangeVals = []
for key, values in allSummaries.items():
    cVals = list(values['MaxVal'])
    cVals.append(key)
    maxVals.append(cVals)
    cVals = list(values['Range'])
    cVals.append(key)    
    rangeVals.append(cVals)

columnNames = list(values.index)
columnNames.append("Country")
maxDF = pd.DataFrame(maxVals, columns = columnNames)
rangeDF = pd.DataFrame(rangeVals, columns = columnNames)

In [86]:
maxDF.to_csv("%s_maxVals.csv" % outFolder)
rangeDF.to_csv("%s_rangeVals.csv" % outFolder)