# Process and download GEP data

In [2]:
import os, sys, logging, importlib, math, time, boto3, shutil
import rasterio, affine

import geopandas as gpd
import pandas as pd

from zipfile import ZipFile
from affine import Affine
from rasterio import features
from rasterio.mask import mask
from rasterio.features import rasterize

import GEP
from GEP import gep_results

'''prints the time along with the message'''
def tPrint(s):
    print("%s\t%s" % (time.strftime("%H:%M:%S"), s))

In [3]:
scenario = "0_0_0_0_1_0" 
base_folder = '/media/gost/DATA1/GEP'
scenario_base_folder =  os.path.join(base_folder, 'Scenarios')
scenario_folder = os.path.join(scenario_base_folder, scenario)
summaries_folder = os.path.join(base_folder, 'Scenario_Summaries')
zip_folder =       os.path.join(scenario_base_folder, 'zipfiles')


## Download results from S3

In [None]:
s3_bucket = "wbg-geography01"
prefix = 'GEP/models-2021/'

s3_client = boto3.client('s3')
s3 = boto3.resource('s3')
bucket = s3.Bucket(s3_bucket)

for obj in bucket.objects.filter(Prefix=prefix):
    key = obj.key    
    if 'scenarios-summaries' in key:
        print(key)
        out_file = os.path.join(summaries_folder, os.path.basename(key))
        s3_client.download_file(s3_bucket, key, out_file)
    
    if 'scenarios-results' in key:
        print(key)
        out_file = os.path.join(scenario_folder, os.path.basename(key))
        s3_client.download_file(s3_bucket, key, out_file)
        
    

## Extract scenario files from zipfiles

In [4]:
importlib.reload(GEP)
   
for zFile in os.listdir(zip_folder):
    cur_zip = os.path.join(zip_folder, zFile)
    country = "-".join(zFile.split("-")[:2])  
    out_scenario_file = os.path.join(scenario_folder, f'{country}-{scenario}.csv')
    if not os.path.exists(out_scenario_file):
        if country in ['fm-2','sb-2','so-2','vu-2']:
            spl = scenario.split("_")
            new_scenario = "_".join([spl[0],spl[1],spl[3]])
            cur_res = GEP.gep_results(country, scenariosFolder=scenario_base_folder, scenario=scenario)
            out_file = cur_res.extract_sample_scenario(new_scenario, scenario_folder)
            os.rename(out_file, out_scenario_file)        
        else:
            cur_res = GEP.gep_results(country, scenariosFolder=scenario_base_folder, scenario=scenario)
            cur_res.extract_sample_scenario(scenario, scenario_folder)
        print(country)

sb-2
so-2
vu-2


## Connect scenario data to geospatial data

In [None]:
cluster_folder = '/media/gost/DATA1/GEP/Clusters'
scenario_base = '/media/gost/DATA1/GEP/Scenarios'
vrt_folder = '/media/gost/DATA1/GEP/GEP_VRTs'
scenario = '0_0_0_0_1_0'
scenario_folder = os.path.join(scenario_base, scenario)
countries = [x[:4] for x in os.listdir(scenario_folder)]
countries   

In [None]:
importlib.reload(GEP)

for country in countries:
    tPrint(country)
    out_file = '/media/gost/DATA1/GEP/Country_Scenarios/%s_electrifrication_status.shp' % country
    if not os.path.exists(out_file):
        xx_so = GEP.gep_results(country, scenario=scenario)
        res = xx_so.join_results()
        sel_res = res.loc[:,['FinalElecCode2018','Pop','geometry']]
        sel_res.to_file(out_file)
        