In [1]:
import os, os.path
import numpy as np
import pandas as pd
#from model_attributes import *
import model_attributes as ma
from attribute_table import AttributeTable
import model_afolu as mafl
import model_ippu as mi
import model_circular_economy as mc
import model_energy as me
import model_socioeconomic as se
from model_socioeconomic import Socioeconomic
import setup_analysis as sa
import support_functions as sf
import importlib
import time
import warnings
import matplotlib.pyplot as plt
from typing import Union
import inspect
import ingestion as ing
import logging
from sisepuede_file_structure import *


#importlib.reload(ma)
importlib.reload(sa)
importlib.reload(sf)
importlib.reload(mafl)
importlib.reload(mc)
importlib.reload(mi)
importlib.reload(me)
importlib.reload(se)


import sisepuede as ssp
def _setup_logger(namespace: str, fn_out: Union[str, None] = None) -> None:
    global logger
    
    format_str = "%(asctime)s - %(levelname)s - %(message)s"
    # configure
    if fn_out is not None:
        logging.basicConfig(
            filename = fn_out,
            filemode = "w",
            format = format_str,
            level = logging.DEBUG
        )
    else:
        logging.basicConfig(
            format = format_str,
            level = logging.DEBUG
        )
        
    logger = logging.getLogger(namespace)
    # create console handler and set level to debug
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    # create formatter
    formatter = logging.Formatter(format_str)
    # add formatter to ch
    ch.setFormatter(formatter)
    # add ch to logger
    logger.addHandler(ch)

    return logger

_setup_logger(__name__, os.path.join(os.getcwd(), "log_temp.log"))



<Logger __main__ (DEBUG)>

In [24]:
##########################
#   LOAD INPUT TABLES    #
##########################

##  setup location of calibrated files by sector

df_fake_data = pd.read_csv(os.path.join(sa.dir_ref, "fake_data", "fake_data_complete.csv"))
dir_calibs = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/calibrated_input_files_from_edmundo"
dict_calibration_file_paths = {
    "af": os.path.join(dir_calibs, "af/data_complete_future_2022_09_30_test.csv"),
    "ce": os.path.join(dir_calibs, "ce/data_complete_future_2022_06_13_cleaned.csv"),
    "en": os.path.join(dir_calibs, "en/data_complete_future_2022_12_09_test.csv"),
    "ip": os.path.join(dir_calibs, "ip/data_complete_future_2022_06_28_pivot_new_rescaled.csv"),
    # use most recent for socioeconomic
    "se": os.path.join(dir_calibs, "en/data_complete_future_2022_12_09_test.csv")
}


# initialize 
all_regions = None
dict_calibration_tables = {}
field_region = "nation"
fields_drop = ["iso_code3", "year", "nation"] # only apply later
field_time_period = sa.model_attributes.dim_time_period

# load in tables and make some quick modifications
dict_sets = {}
for k in ["ce"]:#dict_calibration_file_paths.keys():
    # read and clean columns
    df_read = pd.read_csv(dict_calibration_file_paths.get(k))
    dict_rnm = dict((x, x.lower()) for x in df_read.columns if x != x.lower())
    df_read.rename(columns = dict_rnm, inplace = True)
    
    # filter time periods and do some field cleaning
    df_read = df_read[df_read[field_time_period] >= 0].reset_index(drop = True)
    df_read[field_region] = [x.lower().replace(" ", "_").split(",")[0] for x in list(df_read[field_region])]
    
    # fields missing from input file: take from fake data
    fields_missing = list(set(df_fake_data.columns) - set(df_read.columns))
    fields_eliminate = list((set(df_read.columns) - set(df_fake_data.columns)) - set(fields_drop))
    
    df_read = pd.merge(
        df_read, 
        df_fake_data[[field_time_period] + fields_missing],
        on = [field_time_period],
        how = "left"
    ).drop(fields_eliminate, axis = 1)
    
    dict_calibration_tables.update({k: df_read})
    set_merge = set(df_read[field_region])
    dict_sets.update({k: set_merge})
    all_regions = set_merge if (all_regions is None) else (all_regions & set_merge)

    
attr_region = sa.model_attributes.dict_attributes.get("region")
attr_sector = sa.model_attributes.dict_attributes.get("abbreviation_sector")
attr_strat = sa.model_attributes.dict_attributes.get(f"dim_{sa.model_attributes.dim_strategy_id}")
all_regions = sorted(list(set(attr_region.key_values) & all_regions))



In [30]:
#df = dict_calibration_tables["ce"]
#df = pd.read_csv(os.path.join(dir_calibs, "ce/data_complete_future_2022_06_13_cleaned.csv"))
#df = pd.read_csv(os.path.join(dir_calibs, "ce/data_complete_future_2022_12_08_test.csv"))
#df = data_complete_future_2022_12_08_test
#set(df[[x for x in df.columns if x.startswith("frac_wali_ww_indus")]].sum(axis = 1))

In [31]:


#####################################################################
#    INITIALIZE AN EMPTY INPUT TEMPLATE OBJECT, USE FOR BUILDING    #
#####################################################################

importlib.reload(ing)
warnings.filterwarnings("ignore")
input_template = ing.InputTemplate(
    None,
    sa.model_attributes
)

file_struct = SISEPUEDEFileStructure();
dir_templates = file_struct.dict_data_mode_to_template_directory.get("calibrated")
dir_templates_demo = file_struct.dict_data_mode_to_template_directory.get("demo")
bid = ing.BaseInputDatabase(dir_templates, sa.model_attributes, None, demo_q = False);
bid_demo = ing.BaseInputDatabase(dir_templates_demo, sa.model_attributes, list(all_regions)[0], demo_q = True);




In [32]:
########################################################
#   LOOP OVER SECTOR/REGION TO CONVERT TO TEMPLATES    #
########################################################

dict_sector_abv_to_sector = attr_sector.field_maps.get(f"{attr_sector.key}_to_sector")

for sector_abv in ["ce"]:#dict_calibration_file_paths.keys():
    
    t_0 = time.time()
    
    # get input data
    df_inputs = dict_calibration_tables.get(sector_abv)
    fields_drop_cur = [x for x in fields_drop if x in df_inputs.columns]
    sector = dict_sector_abv_to_sector.get(sector_abv)
    print(f"Starting sector {sector}...")
    
    # get baseline "demo" template, use for ranges
    fp_read = bid_demo.get_template_path(list(all_regions)[0], sector)
    df_template = pd.read_excel(
        fp_read, 
        sheet_name = input_template.name_sheet_from_index(input_template.baseline_strategy)
    )
    
    # fields to extract
    fields_ext = [x for x in input_template.list_fields_required_base]
    fields_ext += [x for x in df_template.columns if input_template.regex_template_max.match(str(x)) is not None]
    fields_ext += [x for x in df_template.columns if input_template.regex_template_min.match(str(x)) is not None]
    df_template = df_template[fields_ext].drop_duplicates()
    
    # loop over regions to build template
    for region in all_regions:
        
        # get input component and add baseline strategy marker
        df_input = df_inputs[
            (df_inputs[field_region] == region)
        ].drop(
            fields_drop_cur, 
            axis = 1
        ).sort_values(
            by = [field_time_period]
        ).reset_index(
            drop = True
        )
        df_input[attr_strat.key] = input_template.baseline_strategy
        
        # dictionary to export to excel
        dict_write = input_template.template_from_inputs(
            df_input, 
            df_template, 
            sector_abv
        )
        
        # export 
        fp_write = bid.get_template_path(
            region, 
            sector,
            create_export_dir = True
        )
        
        sf.dict_to_excel(
            fp_write,
            dict_write   
        )
        print(f"\tCompleted region {region}.")
    
    t_elapse = sf.get_time_elapsed(t_0)
    print(f"\nSector {sector} complete in {t_elapse} seconds.\n\n")
        
        

    

Starting sector Circular Economy...
	Completed region argentina.
	Completed region bahamas.
	Completed region barbados.
	Completed region belize.
	Completed region brazil.
	Completed region chile.
	Completed region colombia.
	Completed region costa_rica.
	Completed region dominican_republic.
	Completed region ecuador.
	Completed region el_salvador.
	Completed region guatemala.
	Completed region guyana.
	Completed region haiti.
	Completed region honduras.
	Completed region jamaica.
	Completed region mexico.
	Completed region nicaragua.
	Completed region panama.
	Completed region paraguay.
	Completed region peru.
	Completed region uruguay.

Sector Circular Economy complete in 6.06 seconds.




In [59]:
df_input = df_input
df_variable_information = df_template
sectors = sector_abv

field_key_strategy: Union[str, None] = None,
field_req_normalize_group: Union[str, None] = None,
field_req_subsector: Union[str, None] = None,
field_req_trajgroup_no_vary_q: Union[str, None] = None,
field_req_uniform_scaling_q: Union[str, None] = None,
field_req_variable: Union[str, None] = None,
field_req_variable_trajectory_group: Union[str, None] = None,
field_req_variable_trajectory_group_trajectory_type: Union[str, None] = None,
regex_max: Union[re.Pattern, None] = None,
regex_min: Union[re.Pattern, None] = None,
regex_tp: Union[re.Pattern, None] = None


# attributes
pydim_time_period = input_template.model_attributes.get_dimensional_attribute(input_template.model_attributes.dim_time_period, "pydim")
attr_tp = input_template.model_attributes.dict_attributes.get(pydim_time_period)
# fields
field_key_strategy = input_template.attribute_strategy.key if (field_key_strategy is None) else field_key_strategy
field_req_normalize_group = input_template.field_req_normalize_group if (field_req_normalize_group is None) else field_req_normalize_group
field_req_subsector = input_template.field_req_subsector if (field_req_subsector is None) else field_req_subsector
field_req_trajgroup_no_vary_q = input_template.field_req_trajgroup_no_vary_q if (field_req_trajgroup_no_vary_q is None) else field_req_trajgroup_no_vary_q
field_req_uniform_scaling_q = input_template.field_req_uniform_scaling_q if (field_req_uniform_scaling_q is None) else field_req_uniform_scaling_q
field_req_variable = input_template.field_req_variable if (field_req_variable is None) else field_req_variable
field_req_variable_trajectory_group = input_template.field_req_variable_trajectory_group if (field_req_variable_trajectory_group is None) else field_req_variable_trajectory_group
field_req_variable_trajectory_group_trajectory_type = input_template.field_req_variable_trajectory_group_trajectory_type if (field_req_variable_trajectory_group_trajectory_type is None) else field_req_variable_trajectory_group_trajectory_type
field_time_period = input_template.model_attributes.dim_time_period
# regular expressions
regex_max = input_template.regex_template_max if not isinstance(regex_max, re.Pattern) else regex_max
regex_min = input_template.regex_template_min if not isinstance(regex_min, re.Pattern) else regex_min
regex_tp = input_template.regex_template_time_period if not isinstance(regex_tp, re.Pattern) else regex_tp


In [66]:
df_input#["factor_lndu_soil_management_grasslands"]

Unnamed: 0,time_period,population_gnrl_urban,population_gnrl_rural,gdp_mmm_usd,area_gnrl_country_ha,avgload_trns_freight_tonne_per_vehicle_rail_freight,avgload_trns_freight_tonne_per_vehicle_road_heavy_freight,avgload_trns_freight_tonne_per_vehicle_water_borne,avgmass_lvst_animal_buffalo_kg,avgmass_lvst_animal_cattle_dairy_kg,...,pij_lndu_wetlands_to_forests_primary,pij_lndu_wetlands_to_forests_secondary,pij_lndu_wetlands_to_grasslands,pij_lndu_wetlands_to_other,pij_lndu_wetlands_to_settlements,pij_lndu_wetlands_to_wetlands,trajmax_factor_lndu_soil_management_grasslands,trajmix_factor_lndu_soil_management_grasslands,trajmin_factor_lndu_soil_management_grasslands,strategy_id
0,0,39467042.0,3664923.0,594.749285,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.996474,0.9,0.0,0.9,0
1,1,39940546.0,3649821.0,582.37655,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.996377,0.9,0.0,0.9,0
2,2,40410673.0,3634137.0,598.790851,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.99628,0.9,0.0,0.9,0
3,3,40877098.0,3617403.0,583.11812,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.996183,0.9,0.01,0.9,0
4,4,41339570.0,3599141.0,571.304531,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.996086,0.9,0.02,0.9,0
5,5,40083972.38,3485562.816,548.961232,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.995989,0.9,0.03,0.9,0
6,6,40386364.08,3457040.991,554.907685,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.995892,0.9,0.04,0.9,0
7,7,40688755.79,3428519.166,560.854139,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.995795,0.9,0.05,0.9,0
8,8,40991147.49,3399997.341,566.800592,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.995698,0.9,0.06,0.9,0
9,9,41293539.19,3371475.517,572.747045,278040000,2000,20,2000,286.404607,461.884256,...,0.0,0.000316,7.9e-05,0.000158,0.0,0.995601,0.9,0.07,0.9,0


In [67]:
ing.dfb

for df in ing.dfi:
    i, df = df
    
df_filt = pd.merge(ing.dfb, df, how = "left")#["strategy_id"]
df_filt[df_filt["strategy_id"].isna()]

Unnamed: 0,subsector,variable,time_period,strategy_id,value
13680,Land Use,factor_lndu_soil_management_grasslands,0,,
13681,Land Use,factor_lndu_soil_management_grasslands,1,,
13682,Land Use,factor_lndu_soil_management_grasslands,2,,
13683,Land Use,factor_lndu_soil_management_grasslands,3,,
13684,Land Use,factor_lndu_soil_management_grasslands,4,,
13685,Land Use,factor_lndu_soil_management_grasslands,5,,
13686,Land Use,factor_lndu_soil_management_grasslands,6,,
13687,Land Use,factor_lndu_soil_management_grasslands,7,,
13688,Land Use,factor_lndu_soil_management_grasslands,8,,
13689,Land Use,factor_lndu_soil_management_grasslands,9,,


In [702]:
#df_template[
#    df_template["variable"] == "timespan_ippu_average_lifetime_of_housing"
#]
#df_input["timespan_ippu_average_lifetime_of_housing"]
#dict_write["strategy_id-0"]["timespan_ippu_average_lifetime_of_housing"]



In [51]:
with open("/Users/jsyme/Desktop/untitled_5.txt", "r+") as fl:
    lines = fl.readlines()
    lines = [x for x in lines if not ("#" in x)]
    lines = [x.strip().split(":")[0] for x in lines]
    lines = [f"{x} = self.{x} if ({x} is None) else {x}" for x in lines]
    #lines = [x.strip().split(":")[0] + ": Union[str, none] = None," for x in lines]
    
for x in sorted(lines):
    print(x)

field_req_normalize_group = self.field_req_normalize_group if (field_req_normalize_group is None) else field_req_normalize_group
field_req_subsector = self.field_req_subsector if (field_req_subsector is None) else field_req_subsector
field_req_trajgroup_no_vary_q = self.field_req_trajgroup_no_vary_q if (field_req_trajgroup_no_vary_q is None) else field_req_trajgroup_no_vary_q
field_req_uniform_scaling_q = self.field_req_uniform_scaling_q if (field_req_uniform_scaling_q is None) else field_req_uniform_scaling_q
field_req_variable = self.field_req_variable if (field_req_variable is None) else field_req_variable
field_req_variable_trajectory_group = self.field_req_variable_trajectory_group if (field_req_variable_trajectory_group is None) else field_req_variable_trajectory_group
field_req_variable_trajectory_group_trajectory_type = self.field_req_variable_trajectory_group_trajectory_type if (field_req_variable_trajectory_group_trajectory_type is None) else field_req_variable_trajectory_gro

In [708]:
#subsector: str,
key_type = "key_varreqs_partial"
category_outer_tuple = category_ij_tuple
target_field: str = "variable_schema"
field_to_split_on: str = "variable_schema"
variable = None
variable_type = "input"

In [712]:
sisepude.model_attributes
key_attribute = sa.model_attributes.get_subsector_attribute(subsector, key_type)
dict_vr_vvs = sa.model_attributes.dict_varreqs[sa.model_attributes.get_subsector_attribute(subsector, key_type)].field_maps[f"variable_to_{field_to_split_on}"].copy()
dict_vr_vtf = sa.model_attributes.dict_varreqs[sa.model_attributes.get_subsector_attribute(subsector, key_type)].field_maps[f"variable_to_{target_field}"].copy()


In [736]:
dict_var_types = sa.model_attributes.dict_varreqs[sa.model_attributes.get_subsector_attribute(subsector, key_type)].field_maps[f"variable_to_variable_type"]
drop_vars = [x for x in dict_var_types.keys() if dict_var_types[x].lower() != variable_type.lower()]


In [740]:
category_outer_tuple

('$CAT-INDUSTRY-I$', '$CAT-INDUSTRY-J$')