In [183]:
# add path to sispeuede to sys.path in python
import sys
import pathlib
import warnings
warnings.filterwarnings("ignore")


path_git = pathlib.Path("/Users/usuario/git")
for subdir in [
    "sisepuede", 
    "sisepuede_data_pipeline",
    "sisepuede_juypyter"
]:
    _PATH_CUR = path_git.joinpath(subdir)
    if str(_PATH_CUR) not in sys.path:
        sys.path.append(str(_PATH_CUR))

path_pipeline = path_git.joinpath("sisepuede_data_pipeline")


import importlib
import matplotlib.pyplot as plt
import numpy as np
import os, os.path
import pandas as pd
import sisepuede.core.attribute_table as att
import sisepuede.core.support_classes as sc
import sisepuede.legacy.data_api as api
import sisepuede.manager.sisepuede_examples as sxl
import sisepuede.manager.sisepuede_file_structure as sfs
import sisepuede.manager.sisepuede_models as sm
import sisepuede.utilities._plotting as spu
import sisepuede.utilities._toolbox as sf
import sisepuede.visualization.plots as svp


import time
from typing import *

# from sisepuede_data_pipeline
import lib.process_utilities as pu
import lib.sisepuede_data_constructs as dc
import lib._util as lutil

# from sisepuede_jupyter
import temp_update_fields_from_wv_to_main as temp 


# Setup SISEPUEDE elements

In [184]:
def get_file_structure(
    y0: int = 2015,
    y1: int = 2070,
) -> Tuple[sfs.SISEPUEDEFileStructure, att.AttributeTable]:
    """Get the SISEPUEDE File Structure and update the attribute table
        with new years.
    """
    # setup some SISEPUEDE variables and update time period
    file_struct = sfs.SISEPUEDEFileStructure(
        initialize_directories = False,
    )

    # get some keys
    key_time_period = file_struct.model_attributes.dim_time_period
    key_year = file_struct.model_attributes.field_dim_year


    ##  BUILD THE ATTRIBUTE AND UPDATE

    # setup the new attribute table
    years = np.arange(y0, y1 + 1, ).astype(int)
    attribute_time_period = att.AttributeTable(
        pd.DataFrame(
            {
                key_time_period: range(len(years)),
                key_year: years,
            }
        ),
        key_time_period,
        
    )

    # finally, update the ModelAttributes inside the file structure
    (
        file_struct
        .model_attributes
        .update_dimensional_attribute_table(
            attribute_time_period,
        )
    )

    # return the tuple
    out = (file_struct, attribute_time_period, )

    return out

    
# set up some paths
_PATH_CUR = pathlib.Path(os.getcwd())
_PATH_DATA = _PATH_CUR.joinpath("data")
_PATH_OUTPUT = _PATH_DATA.joinpath("output")

# export prefix
_PREFIX_FILENAME_DATASETBUILD_BY_REGION = "sisepuede_raw_inputs_latest_"


# model attributes and associated support classes
_EXAMPLES = sxl.SISEPUEDEExamples()
_FILE_STRUCTURE, _ATTRIBUTE_TABLE_TIME_PERIOD = get_file_structure()
matt = _FILE_STRUCTURE.model_attributes
regions = sc.Regions(matt, )
time_periods = sc.TimePeriods(matt, )

# setup models
models = sm.SISEPUEDEModels(
    matt,
    allow_electricity_run = True,
    fp_julia = _FILE_STRUCTURE.dir_jl,
    fp_nemomod_reference_files = _FILE_STRUCTURE.dir_ref_nemo,
    initialize_julia = True, 
)

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


Precompiling NemoMod...
Info Given NemoMod was explicitly requested, output will be shown live [0K
[0KERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
   1438.6 ms  ? NemoMod
[ Info: Precompiling NemoMod [a3c327a0-d2f0-11e8-37fd-d12fd35c3c72] 
ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
┌ Info: Skipping precompilation due to precompilable error. Importing NemoMod [a3c327a0-d2f0-11e8-37fd-d12fd35c3c72].
└   exception = Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.


In [148]:
##  SOME GLOBALS

df_example_input = _EXAMPLES("input_data_frame")
tab = regions.attributes.table


##  NOTE! Need to ignore these for the moment
# The GDP for these are incomplete:
#   - antigua_and_barbuda
#   - cayman_islands
# There are other issues with these countries:
#   - british_virgin_islands
#   - cuba
#   - curacao
#   - dominica
#   - grenada'
#   - saint_kitts_and_nevis
#   - saint_martin
#   - sint_maarten
#   - turks_and_caicos_islands
#   - united_states_virgin_islands
#
_REGIONS_TO_CHECK = [
    "antigua_and_barbuda", 
    "british_virgin_islands",
    "cayman_islands",
    "cuba",
    "curacao",
    "dominica",
    "grenada",
    "saint_kitts_and_nevis",
    "saint_martin",
    "sint_maarten",
    "turks_and_caicos_islands",
    "united_states_virgin_islands"
]

_REGIONS_BUILD = list(
    tab[
        tab["un_sub_region"].isin(["Latin America and the Caribbean"])
        & ~tab[regions.key].isin(_REGIONS_TO_CHECK)    
    ][regions.key]
    .unique()
)
_REGIONS_BUILD = sorted(
    _REGIONS_BUILD +
    [
        "bulgaria",
        "egypt",
        "libya",
        "morocco"
    ]
)


_REGIONS_ISO = [
    regions.return_region_or_iso(x, return_type = "iso", )
    for x in _REGIONS_BUILD
]


# Setup old repository and read data

In [3]:
repo_old = api.SISEPUEDEBatchDataRepository(
    "/Users/usuario/git/sisepuede_data", 
    matt,
)

print("Getting old repository data...")
df_old = repo_old.read(None)
print("Old repository data complete.")

Getting old repository data...
Old repository data complete.


# Setup new repository

In [149]:
importlib.reload(dc)
construct = dc.SISEPUEDEDataConstructs(
    _PATH_OUTPUT_database = "/Users/usuario/git/sisepuede_data_pipeline/sisepuede_inputs.sqlite",
)

path_repo = pathlib.Path("/Users/usuario/SISEPUEDE_DATA_REPOSITORY")
repo = pu.Repository(
    {
        "local": {
            "path": str(path_repo)
        }
    }
)

# get from pipeline
df_from_pipeline = construct.build_inputs_from_database(
    regions_keep = _REGIONS_BUILD,
    join = "outer"
)


In [269]:

def function_combine(
    df_repo_new: pd.DataFrame,
    df_repo_old: pd.DataFrame,
    df_example: pd.DataFrame,
    region_iso: str,
    years: Union[List[int], None] = None,
) -> pd.DataFrame:
    """Combine DataFrames by region, combining in a hierarchy
    """
    
    ##  FORMAT A BASE 

    # start by setting years
    if not sf.islistlike(years):
        years = time_periods.all_years

    
    df_base = (
        df_repo_old[
            df_repo_old[repo_old.field_repo_iso].isin([region_iso])
            & (df_repo_old[time_periods.field_year] >= min(years))
        ]
        .copy()
        .rename(
            columns = {
                repo_old.field_repo_iso: regions.field_iso,
                repo_old.field_repo_year: time_periods.field_year,
            }
        )
    )

    # cols_drop = [
    #     x for x in df_base.columns 
    #     if df_base[x].dropna().shape[0] != df_base.shape[0]
    # ]
    #
    # df_base = df_base.drop(columns = cols_drop, )
    df_base[time_periods.field_year] = df_base[time_periods.field_year].astype(int)    
    df_base = (
        pd.merge(
            pd.DataFrame({time_periods.field_year: years, }),
            df_base,
            how = "left",
        )
        .ffill()
        .bfill()
    )
        

    ##  ADD IN PIPELINE DATA

    df_out = sf.match_df_to_target_df(
        df_base, 
        df_from_pipeline,
        [
            construct.time_periods.field_year,
            regions.field_iso,
        ],
        overwrite_only = False,
    )
    
    df_out[time_periods.field_year] = df_out[time_periods.field_year].astype(int)
    df_out = (time_periods.years_to_tps(df_out, ))


    ##  PULL MISSING FIELDS FROM EXAMPLE DF
    
    # fields not in peru
    fields_missing = [
        x for x in df_example.columns 
        if (x not in df_out.columns) 
        and (x in matt.all_variable_fields_input)
    ]
    
    # specify fields to pull from the example
    fields_from_ex = [
        x for x in fields_missing 
        if not (
            False#x.startswith("frac_lndu_")
            #x.startswith("factor_lndu")
            #or x.startswith("frac_lndu_")
        )
    ]
    
    fields_from_ex = [
        x for x in fields_from_ex
        if (x not in df_out.columns)
    ]

    # merge in from ex
    df_out = (
        pd.merge(
            df_out,
            df_example[fields_from_ex + [time_periods.field_time_period]],
            how = "left"
        )
        .ffill()
        .bfill()
    )


    ##  TEMPORARY SCRIPT FOR MOVING FROM working_version TO latest full version
    
    df_out = temp.update_fields(
        df_out,
        matt,
    )

    return df_out


df_abw = function_combine(
    df_from_pipeline,
    df_old,
    df_example_input,
    "ARG",
)



# Run the next cell to build a composite file and/or export individual fiels (set `export = True` to do this)

In [274]:

df_out = []
export = False
# years = range(2015, 2071)

def build_path_for_region_file(
    iso: str,
) -> pathlib.Path:
    """Build the output path for a file
    """
    fn = f"{_PREFIX_FILENAME_DATASETBUILD_BY_REGION}{iso}.csv"
    path_out = _PATH_OUTPUT.joinpath(fn)
    
    return path_out



for iso in _REGIONS_ISO:
    print(iso)
    df_cur = function_combine(
        df_from_pipeline,
        df_old,
        df_example_input,
        iso,
    )    
    df_cur[regions.field_iso] = iso

    # write to the output location?
    if export:
        path_write = build_path_for_region_file(iso, )
        df_cur.to_csv(
            path_write,
            encoding = "UTF-8",
            index = None,
        )
    
    df_out.append(df_cur,)

df_out = sf._concat_df(df_out, )




ARG
ABW
BHS
BRB
BLZ
BOL
BRA
BGR
CHL
COL
CRI
DOM
ECU
EGY
SLV
GTM
GUY
HTI
HND
JAM
LBY
MEX
MAR
NIC
PAN
PRY
PER
PRI
LCA
VCT
SUR
TTO
URY
VEN


In [276]:
df_out[
        df_out[regions.field_iso].isin([region_run])
    ]

Unnamed: 0,year,ef_ippu_tonne_nf3_per_tonne_production_chemicals,ef_ippu_tonne_nf3_per_tonne_production_electronics,ef_ippu_tonne_sf6_per_mmm_gdp_other_product_manufacturing,ef_ippu_tonne_sf6_per_tonne_production_chemicals,ef_ippu_tonne_sf6_per_tonne_production_electronics,ef_ippu_tonne_sf6_per_tonne_production_metals,frac_agrc_bevs_and_spices_cl2_dry,frac_agrc_cereals_cl2_dry,frac_agrc_fibers_cl2_dry,...,nemomod_entc_scalar_availability_factor_pp_gas_ccs,nemomod_entc_scalar_availability_factor_pp_geothermal,nemomod_entc_scalar_availability_factor_pp_hydropower,nemomod_entc_scalar_availability_factor_pp_nuclear,nemomod_entc_scalar_availability_factor_pp_ocean,nemomod_entc_scalar_availability_factor_pp_oil,nemomod_entc_scalar_availability_factor_pp_solar,nemomod_entc_scalar_availability_factor_pp_waste_incineration,nemomod_entc_scalar_availability_factor_pp_wind,iso_alpha_3
1176,2015,0.0,0.0,0.000264,0.0,3.663322e-08,1.403871e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1177,2016,0.0,0.0,0.000265,0.0,3.982696e-08,1.57288e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1178,2017,0.0,0.0,0.000266,0.0,3.638876e-08,1.617559e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1179,2018,0.0,0.0,0.000265,0.0,3.606106e-08,1.608518e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1180,2019,0.0,0.0,0.000265,0.0,3.455479e-08,1.636647e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1181,2020,0.0,0.0,0.000263,0.0,3.888212e-08,1.862774e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1182,2021,0.0,0.0,0.000267,0.0,3.687091e-08,1.655789e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1183,2022,0.0,0.0,0.000267,0.0,3.687091e-08,1.655789e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1184,2023,0.0,0.0,0.000267,0.0,3.687091e-08,1.655789e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX
1185,2024,0.0,0.0,0.000267,0.0,3.687091e-08,1.655789e-07,0.729069,0.729069,0.729069,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,MEX


# option to test


In [None]:
region_run = "MEX"

models.project(
    df_out[
        df_out[regions.field_iso].isin([region_run])
    ].reset_index(drop = True),
    time_periods_base = np.arange(12),
    verbose = True,
)

2025-22-Sep 17:59:49.714 Opened SQLite database at /Users/usuario/git/sisepuede_region_nbs/generic_regions/tmpp8wwch9h.sqlite.
2025-22-Sep 17:59:49.862 Added NEMO structure to SQLite database at /Users/usuario/git/sisepuede_region_nbs/generic_regions/tmpp8wwch9h.sqlite.
2025-22-Sep 18:00:06.820 Started modeling scenario. NEMO version = 2.2.0, solver = HiGHS.
2025-22-Sep 18:00:06.821 Validated run-time arguments.
2025-22-Sep 18:00:06.821 Connected to scenario database. Path = /Users/usuario/git/sisepuede_region_nbs/generic_regions/tmpp8wwch9h.sqlite.
2025-22-Sep 18:00:07.198 Dropped pre-existing result tables from database.
2025-22-Sep 18:00:07.410 Created parameter views and indices.
2025-22-Sep 18:00:07.417 Created temporary tables.
2025-22-Sep 18:00:07.426 Started optimizing following years: [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011].
2025-22-Sep 18:00:07.427 Verified that transmission modeling is not enabled.
2025-22-Sep 18:00:07.621 Defined dimensions.