In [2]:
import batch_data_support_general as bds_gen
import importlib
import inspect
import itertools
import logging
import matplotlib.pyplot as plt
import model_attributes as ma
import model_afolu as mafl
import model_ippu as mi
import model_circular_economy as mc
import model_electricity as ml
import model_energy as me
import model_socioeconomic as se
import setup_analysis as sa
import support_classes as sc
import support_functions as sf
import numpy as np
import os, os.path
import pandas as pd
import pycountry
import re
import time
from typing import *
import warnings

warnings.filterwarnings("ignore")




In [3]:
regions = sc.Regions(sa.model_attributes)
time_periods = sc.TimePeriods(sa.model_attributes)

In [4]:

def read_inputs_from_repository(
    dir_repo_data: str,
    dict_modvars: Dict[str, Union[List[str], None]],
    model_attributes: ma.ModelAttributes,
    field_iso: str = "iso_code3",
    field_year: str = "year",
    fields_drop: Union[List[str], None] = ["nation"],
    fields_index: Union[List[str], None] = None,
    fields_to_iso: Union[List[str], None] = ["location_code"],
    regions: Union[sc.Regions, None] = None,
    time_periods: Union[sc.TimePeriods, None] = None,
) -> pd.DataFrame: 
    """
    Read inputs from the repository for use.
    
    Function Arguements
    -------------------
    - dir_repo_data: directory containing data
    - dict_modvars: dictionary with model variables as keys and a list of 
        categories to apply to (or None to read all applicable)
    - model_attributes: model attributes object to use
        
    Keyword Arguements
    ------------------
    - field_iso: field storing the 3-digit alpha ISO code
    - field_year: field containing year
    - fields_drop: fields to drop from input files (always lower case)
    - fields_index: ORDERED fields to use for sorting (always lower case)
    - fields_to_iso: fields to convert to iso if present
    - regions: optional support_classes.Regions object to pass to prevent 
        endogenous instantiation
    - time_periods: optional support_classes.TimePeriods object to pass to 
        prevent endogenous instantiation
    """
    
    regions = (
        sc.Regions(model_attributes) 
        if not isinstance(regions, sc.Regions) 
        else regions
    )
    time_periods = (
        sc.TimePeriods(model_attributes) 
        if not isinstance(time_periods, sc.TimePeriods) 
        else time_periods
    )

    # some needed dictionaries
    dict_subsec_to_subdir = {
        "AFOLU": "AFOLU",
        "Energy": "Energy",
        "IPPU": "IPPU", 
        "Socioeconomic": "SocioEconomic"
    }
    dict_subsec_abv_to_sector = model_attributes.dict_attributes.get("abbreviation_subsector").field_maps.get("abbreviation_subsector_to_sector")
    dict_subsec_to_subsec_abv = model_attributes.dict_attributes.get("abbreviation_subsector").field_maps.get("subsector_to_abbreviation_subsector")
    dict_modvars = (
        dict((x, None) for x in model_attributes.all_model_variables)
        if not isinstance(dict_modvars, dict)
        else dict_modvars
    ) 
    
    
    # some fields
    field_iso = field_iso.lower() if isinstance(field_iso, str) else "iso_code3"
    field_year = field_year.lower() if isinstance(field_year, str) else "year"
    fields_index = [field_iso, field_year] if (fields_index is None) else [x.lower() for x in fields_index if isinstance(x, str)]
    fields_to_iso = [] if not isinstance(fields_to_iso, list) else fields_to_iso
    
    # initialize output
    df_out = None
    df_index = None # used to govern merges
    dict_modvar_to_fields = {}
    dict_modvar_to_ordered_cats = {}
    
    modvars = list(dict_modvars.keys())
    
    for k, modvar in enumerate(modvars):
        
        cats_defined = model_attributes.get_variable_categories(modvar)
        cats = dict_modvars.get(modvar)
        cats = cats_defined if (cats is None) else cats
        cats = (
            [x for x in cats_defined if x in cats]
            if (cats_defined is not None)
            else [None]
        )
        
        subsec = model_attributes.get_variable_subsector(modvar)
        sector = dict_subsec_abv_to_sector.get(
            dict_subsec_to_subsec_abv.get(subsec)
        )
        sector_repo = dict_subsec_to_subdir.get(sector)
        
        
        if (sector_repo is not None) and (len(cats) > 0):

            for cat in cats:
                
                restriction = None if (cat is None) else [cat]
                
                var_name = model_attributes.build_varlist(
                    subsec,
                    modvar, 
                    restrict_to_category_values = restriction
                )[0]

                df_var = []
                
                for subdir in ["historical", "projected"]:
                    
                    fp_read = os.path.join(
                        dir_repo_data, 
                        sector_repo, 
                        var_name,
                        "input_to_sisepuede",
                        subdir, 
                        f"{var_name}.csv"
                    )

                    if os.path.exists(fp_read):
                        
                        try:
                            # read
                            df_var_cur = pd.read_csv(fp_read)
                            
                            # rename where necessary
                            dict_rnm_to_iso = dict(
                                (x, field_iso) 
                                for x in fields_to_iso
                                if x in df_var_cur.columns
                            )
                            df_var_cur.rename(
                                columns = dict_rnm_to_iso,
                                inplace = True
                            )
                            
                            # clean the fields
                            dict_rnm = dict((x, x.lower()) for x in df_var_cur.columns)
                            df_var_cur.rename(
                                columns = dict_rnm,
                                inplace = True
                            )

                            # drop any unwanted columns
                            df_var_cur = df_var_cur[fields_index + [var_name]]
                            df_var_cur.set_index(fields_index, inplace = True)

                            if subdir == "projected":
                                inds_prev = df_var[0].index
                                df_var_cur = df_var_cur[
                                    [(x not in inds_prev) for x in df_var_cur.index]
                                ]
                                df_var[0].reset_index(inplace = True)
                                df_var_cur.reset_index(inplace = True)
                            
                            df_var.append(df_var_cur)

                        except Exception as e:
                            warnings.warn(f"Error trying to read {fp_read}: {e}")
                            
                    

                # concatenate and sort
                df_var = pd.concat(df_var, axis = 0) if (len(df_var) > 0) else None
                
                if ((fields_index is not None) and (df_var is not None)):                    
                    # get dictionaries
                    fields_add = sorted([x for x in df_var.columns if x not in fields_index])
                    fields_exist = dict_modvar_to_fields.get(modvar)
                    
                    (
                        dict_modvar_to_fields.update({modvar: fields_add}) 
                        if fields_exist is None
                        else dict_modvar_to_fields[modvar].extend(fields_add)
                    )
                    
                    (
                        dict_modvar_to_ordered_cats.update({modvar: [cat]}) 
                        if fields_exist is None
                        else dict_modvar_to_ordered_cats[modvar].append(cat)
                    )
                    
                    
                if df_var is not None:

                    df_var.sort_values(by = fields_index, inplace = True)
                    #df_var.set_index(fields_index, inplace = True)
                    
                    df_var.reset_index(drop = True, inplace = True)

                    if (df_out is None):

                        df_out = [df_var]
                        df_index = df_var[fields_index].copy()

                    else:
                        #df_out.append(df_var)
                        
                        
                        fold_q = (
                            True
                            if df_var[fields_index].shape != df_index.shape
                            else not all(df_var[fields_index] == df_index)
                        )

                        # setup indexing data frame
                        if fold_q:
                            df_out = pd.concat(df_out, axis = 1)

                            df_index = (
                                pd.merge(
                                    df_index, 
                                    df_var[fields_index],
                                    how = "outer"
                                )
                                .sort_values(by = fields_index)
                                .reset_index(drop = True)
                            )


                            df_out = (
                                pd.merge(
                                    df_index, 
                                    df_out, 
                                    how = "left", 
                                    on = fields_index
                                )
                                .sort_index()
                                .reset_index(drop = True)
                            )
                            df_out = [df_out]
                            df_var = (
                                pd.merge(
                                    df_index, 
                                    df_var, 
                                    how = "left", 
                                    on = fields_index
                                )
                                .sort_index()
                                .reset_index(drop = True)
                            )

                        #""";
                        print(f"appended {var_name}")
                        print(df_index.shape)
                        print(df_out[0].shape)
                        print(df_var.shape)
                        print("\n")
                        df_out.append(df_var[[var_name]])

    print("here") 
    if (df_out is not None) and (fields_index is not None):

        """
        df_out = (
            df_out[0].join(
                df_out[1:],
                how = "outer"
            )
            if len(df_out) > 1
            else df_out[0]
        )
        """;
        df_out = pd.concat(df_out, axis = 1)
        df_out.sort_values(by = fields_index, inplace = True) 
        df_out.reset_index(drop = False, inplace = True) 
            
    return df_out, dict_modvar_to_fields, dict_modvar_to_ordered_cats

In [5]:
dir_repo_data = "/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data"
t0 = time.time()
df_complete, dict_modvar_to_fields, dict_modvar_to_ordered_cats = read_inputs_from_repository(dir_repo_data, None, sa.model_attributes)
sf.get_time_elapsed(t0)

appended frac_agrc_cereals_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_fibers_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_fruits_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_herbs_and_other_perennial_crops_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_nuts_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_other_annual_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_other_woody_perennial_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_pulses_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_rice_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_sugar_cane_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_tubers_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_vegetables_and_vines_cl2_dry
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_bevs_and_spices_cl1_temperate
(8815, 2)
(8815, 3)
(8815, 3)


appended frac_agrc_cereals_cl1_temperate
(8815, 2)
(8815, 3)
(8815

348.05

In [32]:
df_complete.drop(["index"], axis = 1, inplace = True) if ("index" in df_complete.columns) else None
df_complete["year"] = df_complete["year"].astype(int)
df_complete = df_complete[~df_complete["iso_code3"].isna()]
df_complete = df_complete[~df_complete["frac_agrc_bevs_and_spices_cl2_dry"].isna()].reset_index(drop = True)

In [42]:

#df_complete.dropna()
df2020 = df_complete[df_complete["year"].isin([2020])]
df20202 = df2020[["iso_code3", "year"] + [x for x in df2020.columns if "scoe" in x]]

check = df20202[df20202["iso_code3"].isin(["CHL"])]

for x in check.columns:
    if len(check[x].dropna()) == 0:
        print(x)

In [452]:
fields_index = ["iso_code3", "year"]
#df_var[1].set_index(fields_index, inplace = True)

In [456]:
df_var_cur = df_var[1]

In [442]:
df_var = pd.concat(df_var, axis = 0) if (len(df_var) > 0) else None

In [458]:
#df_var[["iso_code3", "year"]]

In [459]:
inds_prev = df_var[0].index

In [481]:
fp_read2 = "/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/IPPU/net_imports_cement_clinker_tonne/input_to_sisepuede/historical/net_imports_cement_clinker_tonne.csv"

fp_read == fp_read2

False

In [486]:
df = pd.read_csv(fp_read)
dict_rnm = dict((x, x.lower()) for x in df.columns)
df.rename(columns = dict_rnm, inplace = True)
df[fields_index].drop_duplicates().shape# == df[fields_index].shape


(5431, 2)

In [387]:
df = pd.read_csv(fp_read)
df[
    df["iso_code3"].isin(["EGY"]) 
    & df["Year"].isin([2019])
]

Unnamed: 0,Year,Nation,iso_code3,population_gnrl_urban
1768,2019,"Egypt, Arab Rep.",EGY,39966800.0


In [301]:
fp_read_2 = '/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/SocioEconomic/population_gnrl_urban/input_to_sisepuede/historical/population_gnrl_urban.csv'
df_2 = pd.read_csv(fp_read_2)
df_2[
    df_2["iso_code3"].isin(["EGY"]) 
    & df_2["Year"].isin([2019])
]



Unnamed: 0,Year,Nation,iso_code3,population_gnrl_urban
1505,2019,"Egypt, Arab Rep.",EGY,42895820.0


KeyboardInterrupt: 

In [None]:
df2020 = df[df["year"] == 2020]
vals = []
for k in df.columns:
    tt = len(df[k].dropna())
    
    if tt < len(df):
        vals.append(k)
df[df[vals[0]].isna()]["iso_code3"].unique()

In [90]:
df[
    df["yf_agrc_rice_tonne_ha"].isna()
]

Unnamed: 0,year,iso_code3,frac_agrc_bevs_and_spices_cl2_dry,frac_agrc_cereals_cl2_dry,frac_agrc_fibers_cl2_dry,frac_agrc_fruits_cl2_dry,frac_agrc_herbs_and_other_perennial_crops_cl2_dry,frac_agrc_nuts_cl2_dry,frac_agrc_other_annual_cl2_dry,frac_agrc_other_woody_perennial_cl2_dry,...,yf_agrc_fruits_tonne_ha,yf_agrc_herbs_and_other_perennial_crops_tonne_ha,yf_agrc_nuts_tonne_ha,yf_agrc_other_annual_tonne_ha,yf_agrc_other_woody_perennial_tonne_ha,yf_agrc_pulses_tonne_ha,yf_agrc_rice_tonne_ha,yf_agrc_sugar_cane_tonne_ha,yf_agrc_tubers_tonne_ha,yf_agrc_vegetables_and_vines_tonne_ha
0,2010,ABW,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,,,,,,,,,,
1,2010,AFG,0.994548,0.994548,0.994548,0.994548,0.994548,0.994548,0.994548,0.994548,...,,,,,,,,,,
2,2010,AGO,0.615825,0.615825,0.615825,0.615825,0.615825,0.615825,0.615825,0.615825,...,,,,,,,,,,
3,2010,ALB,0.320181,0.320181,0.320181,0.320181,0.320181,0.320181,0.320181,0.320181,...,,,,,,,,,,
4,2010,AND,0.117647,0.117647,0.117647,0.117647,0.117647,0.117647,0.117647,0.117647,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9058,2050,SXM,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,,,,,,,,,,
9061,2050,TCA,0.086397,0.086397,0.086397,0.086397,0.086397,0.086397,0.086397,0.086397,...,,,,,,,,,,
9072,2050,TUR,0.887015,0.887015,0.887015,0.887015,0.887015,0.887015,0.887015,0.887015,...,,,,,,,,,,
9084,2050,VGB,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,,,,,,,,,,


In [91]:
df_clim = pd.read_csv("/Users/jsyme/Documents/Projects/git_jbus/lac_decarbonization/ref/batch_data_generation/koppen_climate_classifications/climate_fields_by_country.csv")




In [94]:
df_clim[df_clim["iso_alpha_3"].isin(["FRA"])]

Unnamed: 0,iso_alpha_3,time_period,year,frac_agrc_bevs_and_spices_cl1_temperate,frac_agrc_bevs_and_spices_cl1_tropical,frac_agrc_bevs_and_spices_cl2_dry,frac_agrc_bevs_and_spices_cl2_wet,frac_agrc_cereals_cl1_temperate,frac_agrc_cereals_cl1_tropical,frac_agrc_cereals_cl2_dry,...,frac_frst_secondary_cl1_temperate_nutrient_rich,frac_frst_secondary_cl1_tropical,frac_lndu_grasslands_cl1_temperate,frac_lndu_grasslands_cl1_tropical,frac_lndu_grasslands_cl2_dry,frac_lndu_grasslands_cl2_wet,frac_lndu_other_cl2_dry,frac_lndu_other_cl2_wet,frac_lndu_settlements_cl2_dry,frac_lndu_settlements_cl2_wet


In [95]:
df_clim["iso_alpha_3"].unique()

array(['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ASM',
       'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD',
       'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA',
       'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN',
       'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB',
       'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM',
       'DZA', 'ECU', 'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN', 'FJI',
       'FRO', 'FSM', 'GAB', 'GBR', 'GEO', 'GHA', 'GIB', 'GIN', 'GMB',
       'GNB', 'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUM', 'GUY', 'HKG',
       'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IRL', 'IRN',
       'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN',
       'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO', 'LBN', 'LBR',
       'LBY', 'LCA', 'LIE', 'LKA', 'LSO', 'LTU', 'LUX', 'LVA', 'MAC',
       'MAF', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEX', 'MHL', 'MKD',
       'MLI', 'MLT',