In [26]:
import os, os.path
import numpy as np
import pandas as pd
#from model_attributes import *
import model_attributes as ma
from attribute_table import AttributeTable
import model_afolu as mafl
import model_ippu as mi
import model_circular_economy as mc
import model_energy as me
import model_socioeconomic as se
from model_socioeconomic import Socioeconomic
import setup_analysis as sa
import support_functions as sf
import importlib
import time
import warnings
import matplotlib.pyplot as plt
from typing import Union
import inspect
import ingestion as ing
import logging
from sisepuede_file_structure import *


importlib.reload(ma)
importlib.reload(sa)
importlib.reload(sf)
importlib.reload(mafl)
importlib.reload(mc)
importlib.reload(mi)
importlib.reload(me)
importlib.reload(se)

warnings.filterwarnings("ignore")
import sisepuede as ssp
def _setup_logger(namespace: str, fn_out: Union[str, None] = None) -> None:
    global logger
    
    format_str = "%(asctime)s - %(levelname)s - %(message)s"
    # configure
    if fn_out is not None:
        logging.basicConfig(
            filename = fn_out,
            filemode = "w",
            format = format_str,
            level = logging.DEBUG
        )
    else:
        logging.basicConfig(
            format = format_str,
            level = logging.DEBUG
        )
        
    logger = logging.getLogger(namespace)
    # create console handler and set level to debug
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    # create formatter
    formatter = logging.Formatter(format_str)
    # add formatter to ch
    ch.setFormatter(formatter)
    # add ch to logger
    logger.addHandler(ch)

    return logger

_setup_logger(__name__, os.path.join(os.getcwd(), "log_temp.log"))

<Logger __main__ (DEBUG)>

In [35]:
##########################
#   LOAD INPUT TABLES    #
##########################

##  setup location of calibrated files by sector

df_fake_data = pd.read_csv(os.path.join(sa.dir_ref, "fake_data", "fake_data_complete.csv"))
dir_calibs = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/calibrated_input_files_from_edmundo"
dict_calibration_file_paths = {
    "af": os.path.join(dir_calibs, "af", "data_complete_future_2022_09_30_test_updated.csv"),
    "ce": os.path.join(dir_calibs, "ce", "data_complete_future_2022_12_13_test.csv"),
    "en": os.path.join(dir_calibs, "en", "data_complete_future_2023_02_24_with_transformations.csv"),#"data_complete_future_2022_12_09_test.csv"),
    "ip": os.path.join(dir_calibs, "ip", "data_ippu_2023_02_07_fixed.csv"),
    # use most recent for socioeconomic
    "se": os.path.join(dir_calibs, "en", "data_complete_future_2022_12_09_test.csv")
}

"""
###
###    TEMPORARY APPROACH! 20230220
###
dict_calibration_file_paths = {
    "af": os.path.join(dir_calibs, "all", "datos_calibrados_20230221_with_transformations.csv"),
    "ce": os.path.join(dir_calibs, "all", "datos_calibrados_20230221_with_transformations.csv"),
    "en": os.path.join(dir_calibs, "all", "datos_calibrados_20230221_with_transformations.csv"),#"data_complete_future_2022_12_09_test.csv"),
    "ip": os.path.join(dir_calibs, "all", "datos_calibrados_20230221_with_transformations.csv"),
    # use most recent for socioeconomic
    "se": os.path.join(dir_calibs, "all", "datos_calibrados_20230221_with_transformations.csv")
}
""";

# initialize 
all_regions = None
dict_isos = {}
dict_calibration_tables = {}
dict_replace_iso = sa.model_attributes.dict_attributes.get("region").field_maps.get("region_abbreviation_to_region")
field_iso = "iso_code3"
field_region = "nation"
fields_drop = ["iso_code3", "year", "nation"] # only apply later
field_time_period = sa.model_attributes.dim_time_period


# load in tables and make some quick modifications
dict_sets = {}
for k in dict_calibration_file_paths.keys():
    # read and clean columns
    df_read = pd.read_csv(dict_calibration_file_paths.get(k))
    dict_rnm = dict((x, x.lower()) for x in df_read.columns if x != x.lower())
    df_read.rename(columns = dict_rnm, inplace = True)

    
    # filter time periods and do some field cleaning
    df_read = df_read[df_read[field_time_period] >= 0].reset_index(drop = True)
    df_read[field_region] = [dict_replace_iso.get(x.lower()) for x in list(df_read[field_iso])]
    dict_isos.update({k: set(df_read[field_iso])})
    
    # fields missing from input file: take from fake data
    fields_missing = list(set(df_fake_data.columns) - set(df_read.columns))
    fields_eliminate = list((set(df_read.columns) - set(df_fake_data.columns)) - set(fields_drop) - set(["strategy_id"]))
    
    df_read = pd.merge(
        df_read, 
        df_fake_data[[field_time_period] + fields_missing],
        on = [field_time_period],
        how = "left"
    ).drop(fields_eliminate, axis = 1)
    
    dict_calibration_tables.update({k: df_read})
    set_merge = set(df_read[field_region])
    dict_sets.update({k: set_merge})
    all_regions = set_merge if (all_regions is None) else (all_regions & set_merge)

    
attr_region = sa.model_attributes.dict_attributes.get("region")
attr_sector = sa.model_attributes.dict_attributes.get("abbreviation_sector")
attr_strat = sa.model_attributes.dict_attributes.get(f"dim_{sa.model_attributes.dim_strategy_id}")
all_regions = sorted(list(set(attr_region.key_values) & all_regions))


In [28]:


#####################################################################
#    INITIALIZE AN EMPTY INPUT TEMPLATE OBJECT, USE FOR BUILDING    #
#####################################################################



file_struct = SISEPUEDEFileStructure();
dir_templates = file_struct.dict_data_mode_to_template_directory.get("calibrated")
dir_templates_demo = file_struct.dict_data_mode_to_template_directory.get("demo")
bid = ing.BaseInputDatabase(dir_templates, sa.model_attributes, None, demo_q = False);
bid_demo = ing.BaseInputDatabase(dir_templates_demo, sa.model_attributes, list(all_regions)[0], demo_q = True);




ERROR: Can't pickle <class 'model_attributes.ModelAttributes'>: it's not the same object as model_attributes.ModelAttributes


In [36]:

warnings.filterwarnings("ignore")


########################################################
#   LOOP OVER SECTOR/REGION TO CONVERT TO TEMPLATES    #
########################################################

importlib.reload(sf)
importlib.reload(ing)
input_template = ing.InputTemplate(
    None,
    sa.model_attributes
)
dict_sector_abv_to_sector = attr_sector.field_maps.get(f"{attr_sector.key}_to_sector")

for sector_abv in dict_calibration_file_paths.keys():
    
    t_0 = time.time()
    
    # get input data
    df_inputs = dict_calibration_tables.get(sector_abv)
    fields_drop_cur = [x for x in fields_drop if x in df_inputs.columns]
    sector = dict_sector_abv_to_sector.get(sector_abv)
    print(f"Starting sector {sector}...")
    
    # get baseline "demo" template, use for ranges
    fp_read = bid_demo.get_template_path(list(all_regions)[0], sector)
    df_template = pd.read_excel(
        fp_read, 
        sheet_name = input_template.name_sheet_from_index(input_template.baseline_strategy)
    )
    
    # fields to extract
    fields_ext = [x for x in input_template.list_fields_required_base]
    fields_ext += [x for x in df_template.columns if input_template.regex_template_max.match(str(x)) is not None]
    fields_ext += [x for x in df_template.columns if input_template.regex_template_min.match(str(x)) is not None]
    df_template = df_template[fields_ext].drop_duplicates()
    
    # loop over regions to build template
    for region in ["brazil", "chile", "ecuador", "mexico"]:#all_regions:
        
        # get input component and add baseline strategy marker
        fields_sort = [attr_strat.key, field_time_period] if (attr_strat.key in df_inputs.columns) else [field_time_period]
        df_input = df_inputs[
            (df_inputs[field_region] == region)
        ].drop(
            fields_drop_cur, 
            axis = 1
        ).sort_values(
            by = fields_sort
        ).reset_index(
            drop = True
        )
        
        if attr_strat.key not in df_input.columns:
            df_input[attr_strat.key] = input_template.baseline_strategy
        
        # dictionary to export to excel
        dict_write = input_template.template_from_inputs(
            df_input,
            df_template,
            sector_abv
        )
        
        # export 
        fp_write = bid.get_template_path(
            region, 
            sector,
            create_export_dir = True
        )
        
        sf.dict_to_excel(
            fp_write,
            dict_write   
        ) if True else None
        print(f"\tCompleted region {region}.")
    
    t_elapse = sf.get_time_elapsed(t_0)
    print(f"\nSector {sector} complete in {t_elapse} seconds.\n\n")
        
        


Starting sector AFOLU...
	Completed region brazil.
	Completed region chile.
	Completed region ecuador.
	Completed region mexico.

Sector AFOLU complete in 1.92 seconds.


Starting sector Circular Economy...
	Completed region brazil.
	Completed region chile.
	Completed region ecuador.
	Completed region mexico.

Sector Circular Economy complete in 0.84 seconds.


Starting sector Energy...
	Completed region brazil.
	Completed region chile.
	Completed region ecuador.
	Completed region mexico.

Sector Energy complete in 138.4 seconds.


Starting sector IPPU...
	Completed region brazil.
	Completed region chile.
	Completed region ecuador.
	Completed region mexico.

Sector IPPU complete in 0.62 seconds.


Starting sector Socioeconomic...
	Completed region brazil.
	Completed region chile.
	Completed region ecuador.
	Completed region mexico.

Sector Socioeconomic complete in 0.3 seconds.




In [29]:
######################################################
#    AGGREGATED DATABASE FOR HERMILLO/CALIBRATION    #
######################################################

def exp_aggregate_db(
    dict_calibration_tables: dict,
    dir_calibs: str,
    tag: str
) -> None:
    """
    Export aggregate database
    """
    attr_sec = sa.model_attributes.dict_attributes.get("abbreviation_sector")
    attr_subsec = sa.model_attributes.dict_attributes.get("abbreviation_subsector")
    tab_subsec = attr_subsec.table.copy()


    # setup some dicts
    dict_subsec_to_abv_subsec = attr_subsec.field_maps.get(f"subsector_to_{attr_subsec.key}")
    dict_abv_subsec_to_sec = attr_subsec.field_maps.get(f"{attr_subsec.key}_to_sector")
    dict_subsec_to_sec = dict((x, dict_abv_subsec_to_sec.get(dict_subsec_to_abv_subsec.get(x))) for x in dict_subsec_to_abv_subsec.keys())
    dict_abv_sec_to_sec = attr_sec.field_maps.get(f"{attr_sec.key}_to_sector")


    # 
    df_vars = sa.model_attributes.build_variable_dataframe_by_sector(None)
    all_vars = set(df_vars["variable"])
    df_vars = df_vars[
        df_vars["time_period"].isin([0])
    ].drop(["time_period"], axis = 1).reset_index(drop = True)

    df_vars["sector"] = df_vars["subsector"].replace(dict_subsec_to_sec)

    fields_index = ["time_period", "iso_code3"]
    fields_data = []
    dict_subset = {"strategy_id": [0]}

    df_out = None

    for abv in dict_abv_sec_to_sec.keys():

        sector = dict_abv_sec_to_sec.get(abv)
        df_ext = dict_calibration_tables.get(abv)
        vars_ext = list(df_vars[df_vars["sector"] == sector]["variable"])
        fields_data += vars_ext

        fields_ext = fields_index + vars_ext

        df = dict_calibration_tables.get(abv)
        df = sf.subset_df(df, dict_subset)[fields_ext]
        df.drop(["strategy_id"], axis = 1, inplace = True) if ("strategy_id" in df.columns) else None

        df_out = df if (df_out is None) else pd.merge(df_out, df, on = fields_index, how = "inner")

    fields_data.sort()
    df_out = df_out[fields_index + fields_data]
    
    # export
    fp_out = os.path.join(dir_calibs, f"sisepuede_aggregate_calibration_db_{tag}.csv")
    print(f"exporting calibration db to: {fp_out}")
    df_out.to_csv(
        fp_out, 
        index = None, 
        encoding = "UTF-8"
    )
    
    
    return df_out

df_calib = exp_aggregate_db(dict_calibration_tables, dir_calibs, "20220220")



exporting calibration db to: /Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/calibrated_input_files_from_edmundo/sisepuede_aggregate_calibration_db_20220220.csv


In [30]:
df_calib[[x for x in df_calib.columns if (x.startswith("efficfactor"))]].iloc[0]




efficfactor_ccsq_heat_energy_direct_air_capture_geothermal                0.9500
efficfactor_ccsq_heat_energy_direct_air_capture_hydrogen                  0.8000
efficfactor_ccsq_heat_energy_direct_air_capture_natural_gas               0.8000
efficfactor_enfu_industrial_energy_fuel_biomass                           0.6000
efficfactor_enfu_industrial_energy_fuel_coal                              0.6000
efficfactor_enfu_industrial_energy_fuel_coke                              0.6000
efficfactor_enfu_industrial_energy_fuel_diesel                            0.7500
efficfactor_enfu_industrial_energy_fuel_electricity                       0.9900
efficfactor_enfu_industrial_energy_fuel_gas_furnace                       0.8000
efficfactor_enfu_industrial_energy_fuel_gas_petroleum_liquid              0.7500
efficfactor_enfu_industrial_energy_fuel_gasoline                          0.7500
efficfactor_enfu_industrial_energy_fuel_hydrogen                          0.8000
efficfactor_enfu_industrial_

In [24]:
importlib.reload(sf)
df_cur = ing.df_cur_out
df_base = ing.dict_is.get(0)

sf.filter_df_on_reference_df_rows(
    df_cur,
    df_base,
    ["subsector", "variable", "time_period"],#'subsector', 'variable', 'time_period'],
    ["value"],
    ["variable"]
)


AttributeError: module 'ingestion' has no attribute 'df_cur_out'

In [96]:
df_base.iloc[0:0]

Unnamed: 0,subsector,variable,time_period,value


In [69]:
#df_template[df_template["subsector"] == sa.model_attributes.subsec_name_scoe]
importlib.reload(ing)
warnings.filterwarnings("ignore")
input_template = ing.InputTemplate(
    None,
    sa.model_attributes
)
input_template.template_from_inputs(
    df_input, 
    df_template, 
    sector_abv
)

starting strat 0
starting strat 3001
starting strat 3002
starting strat 3003
starting strat 3004
starting strat 3005
starting strat 3006
starting strat 3007
starting strat 3008
starting strat 3009
starting strat 3010
starting strat 3011
starting strat 3012
starting strat 3013
starting strat 3014
starting strat 3015
starting strat 3016
starting strat 3017
starting strat 3018
starting strat 3019
starting strat 3020


TypeError: Can only merge Series or DataFrame objects, a <class 'NoneType'> was passed

In [66]:
df_base = sa.model_attributes.build_variable_dataframe_by_sector(
    ["Energy"],
    field_subsector = input_template.field_req_subsector,
    field_variable = input_template.field_req_variable,
    include_time_periods = True
)
set(df_base[
    
    [x.startswith("scalar") for x in list(df_base["variable"])]
]["subsector"])

{'Industrial Energy', 'Stationary Combustion and Other Energy'}

In [67]:
ing.dfi

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x13bc5f6d0>

In [51]:
with open("/Users/jsyme/Desktop/untitled_5.txt", "r+") as fl:
    lines = fl.readlines()
    lines = [x for x in lines if not ("#" in x)]
    lines = [x.strip().split(":")[0] for x in lines]
    lines = [f"{x} = self.{x} if ({x} is None) else {x}" for x in lines]
    #lines = [x.strip().split(":")[0] + ": Union[str, none] = None," for x in lines]
    
for x in sorted(lines):
    print(x)

field_req_normalize_group = self.field_req_normalize_group if (field_req_normalize_group is None) else field_req_normalize_group
field_req_subsector = self.field_req_subsector if (field_req_subsector is None) else field_req_subsector
field_req_trajgroup_no_vary_q = self.field_req_trajgroup_no_vary_q if (field_req_trajgroup_no_vary_q is None) else field_req_trajgroup_no_vary_q
field_req_uniform_scaling_q = self.field_req_uniform_scaling_q if (field_req_uniform_scaling_q is None) else field_req_uniform_scaling_q
field_req_variable = self.field_req_variable if (field_req_variable is None) else field_req_variable
field_req_variable_trajectory_group = self.field_req_variable_trajectory_group if (field_req_variable_trajectory_group is None) else field_req_variable_trajectory_group
field_req_variable_trajectory_group_trajectory_type = self.field_req_variable_trajectory_group_trajectory_type if (field_req_variable_trajectory_group_trajectory_type is None) else field_req_variable_trajectory_gro