In [1]:
# add path to sispeuede to sys.path in python
import sys
import pathlib
import warnings
warnings.filterwarnings("ignore")


path_git = pathlib.Path("/Users/usuario/git")
for subdir in [
    "sisepuede", 
    "sisepuede_data_pipeline",
    "sisepuede_juypyter"
]:
    _PATH_CUR = path_git.joinpath(subdir)
    if str(_PATH_CUR) not in sys.path:
        sys.path.append(str(_PATH_CUR))

path_pipeline = path_git.joinpath("sisepuede_data_pipeline")


import importlib
import matplotlib.pyplot as plt
import numpy as np
import os, os.path
import pandas as pd
import sisepuede.core.attribute_table as att
import sisepuede.core.support_classes as sc
import sisepuede.legacy.data_api as api
import sisepuede.manager.sisepuede_examples as sxl
import sisepuede.manager.sisepuede_file_structure as sfs
import sisepuede.manager.sisepuede_models as sm
import sisepuede.utilities._plotting as spu
import sisepuede.utilities._toolbox as sf
import sisepuede.visualization.plots as svp


import time
from typing import *

# from sisepuede_data_pipeline
import lib.process_utilities as pu
import lib.sisepuede_data_constructs as dc
import lib._util as lutil

# from sisepuede_jupyter
import temp_update_fields_from_wv_to_main as temp 


# Setup SISEPUEDE elements

In [2]:
def get_file_structure(
    y0: int = 2015,
    y1: int = 2070,
) -> Tuple[sfs.SISEPUEDEFileStructure, att.AttributeTable]:
    """Get the SISEPUEDE File Structure and update the attribute table
        with new years.
    """
    # setup some SISEPUEDE variables and update time period
    file_struct = sfs.SISEPUEDEFileStructure(
        initialize_directories = False,
    )

    # get some keys
    key_time_period = file_struct.model_attributes.dim_time_period
    key_year = file_struct.model_attributes.field_dim_year


    ##  BUILD THE ATTRIBUTE AND UPDATE

    # setup the new attribute table
    years = np.arange(y0, y1 + 1, ).astype(int)
    attribute_time_period = att.AttributeTable(
        pd.DataFrame(
            {
                key_time_period: range(len(years)),
                key_year: years,
            }
        ),
        key_time_period,
        
    )

    # finally, update the ModelAttributes inside the file structure
    (
        file_struct
        .model_attributes
        .update_dimensional_attribute_table(
            attribute_time_period,
        )
    )

    # return the tuple
    out = (file_struct, attribute_time_period, )

    return out

    
# set up some paths
_PATH_CUR = pathlib.Path(os.getcwd())
_PATH_DATA = _PATH_CUR.joinpath("data")
_PATH_OUTPUT = _PATH_DATA.joinpath("output")

# export prefix
_PREFIX_FILENAME_DATASETBUILD_BY_REGION = "sisepuede_raw_inputs_latest_"


# model attributes and associated support classes
_EXAMPLES = sxl.SISEPUEDEExamples()
_FILE_STRUCTURE, _ATTRIBUTE_TABLE_TIME_PERIOD = get_file_structure()
matt = _FILE_STRUCTURE.model_attributes
regions = sc.Regions(matt, )
time_periods = sc.TimePeriods(matt, )

# setup models
models = sm.SISEPUEDEModels(
    matt,
    allow_electricity_run = True,
    fp_julia = _FILE_STRUCTURE.dir_jl,
    fp_nemomod_reference_files = _FILE_STRUCTURE.dir_ref_nemo,
    initialize_julia = True, 
)

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


Precompiling NemoMod...
Info Given NemoMod was explicitly requested, output will be shown live [0K
[0KERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
   4270.6 ms  ? NemoMod
[ Info: Precompiling NemoMod [a3c327a0-d2f0-11e8-37fd-d12fd35c3c72] 
ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
┌ Info: Skipping precompilation due to precompilable error. Importing NemoMod [a3c327a0-d2f0-11e8-37fd-d12fd35c3c72].
└   exception = Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.


In [57]:
##  SOME GLOBALS

df_example_input = _EXAMPLES("input_data_frame")
tab = regions.attributes.table


##  NOTE! Need to ignore these for the moment
# The GDP for these are incomplete:
#   - antigua_and_barbuda
#   - cayman_islands
# There are other issues with these countries:
#   - british_virgin_islands
#   - cuba
#   - curacao
#   - dominica
#   - grenada'
#   - saint_kitts_and_nevis
#   - saint_martin
#   - sint_maarten
#   - turks_and_caicos_islands
#   - united_states_virgin_islands
#
_REGIONS_TO_CHECK = [
    "antigua_and_barbuda", 
    "british_virgin_islands",
    "cayman_islands",
    "cuba",
    "curacao",
    "dominica",
    "grenada",
    "saint_kitts_and_nevis",
    "saint_martin",
    "sint_maarten",
    "turks_and_caicos_islands",
    "united_states_virgin_islands"
]

_REGIONS_BUILD = list(
    tab[
        tab["un_sub_region"].isin(["Latin America and the Caribbean"])
        & ~tab[regions.key].isin(_REGIONS_TO_CHECK)    
    ][regions.key]
    .unique()
)
_REGIONS_BUILD = sorted(
    _REGIONS_BUILD +
    [
        "bulgaria",
        "egypt",
        "libya",
        "morocco",
        "united_republic_of_tanzania"
    ]
)


_REGIONS_ISO = [
    regions.return_region_or_iso(x, return_type = "iso", )
    for x in _REGIONS_BUILD
]


# Setup old repository and read data

In [5]:
repo_old = api.SISEPUEDEBatchDataRepository(
    "/Users/usuario/git/sisepuede_data", 
    matt,
)

print("Getting old repository data...")
df_old = repo_old.read(None)
print("Old repository data complete.")

Getting old repository data...
Old repository data complete.


# Setup new repository

In [58]:
importlib.reload(dc)
construct = dc.SISEPUEDEDataConstructs(
    path_output_database = "/Users/usuario/git/sisepuede_data_pipeline/sisepuede_inputs.sqlite",
)

path_repo = pathlib.Path("/Users/usuario/SISEPUEDE_DATA_REPOSITORY")
repo = pu.Repository(
    {
        "local": {
            "path": str(path_repo)
        }
    }
)

# get from pipeline
df_from_pipeline = construct.build_inputs_from_database(
    regions_keep = _REGIONS_BUILD,
    join = "outer"
)


In [69]:

def function_combine(
    df_repo_new: pd.DataFrame,
    df_repo_old: pd.DataFrame,
    df_example: pd.DataFrame,
    region_iso: str,
    years: Union[List[int], None] = None,
) -> pd.DataFrame:
    """Combine DataFrames by region, combining in a hierarchy
    """
    
    ##  FORMAT A BASE 

    # start by setting years
    if not sf.islistlike(years):
        years = time_periods.all_years

    
    df_base = (
        df_repo_old[
            df_repo_old[repo_old.field_repo_iso].isin([region_iso])
            & (df_repo_old[time_periods.field_year] >= min(years))
        ]
        .copy()
        .rename(
            columns = {
                repo_old.field_repo_iso: regions.field_iso,
                repo_old.field_repo_year: time_periods.field_year,
            }
        )
    )

    # QUICK FIX!
    if "pop_lvst_initial_chickens" in df_base.columns:
        df_base["pop_lvst_initial_chickens"] *= 1000
        print("NOTE! Adjusting poultry population")
        
    # cols_drop = [
    #     x for x in df_base.columns 
    #     if df_base[x].dropna().shape[0] != df_base.shape[0]
    # ]
    #
    # df_base = df_base.drop(columns = cols_drop, )
    df_base[time_periods.field_year] = df_base[time_periods.field_year].astype(int)    
    df_base = (
        pd.merge(
            pd.DataFrame({time_periods.field_year: years, }),
            df_base,
            how = "left",
        )
        .ffill()
        .bfill()
    )


    ##  ADD IN PIPELINE DATA

    df_out = sf.match_df_to_target_df(
        df_base, 
        df_from_pipeline,
        [
            construct.time_periods.field_year,
            regions.field_iso,
        ],
        overwrite_only = False,
    )
    
    df_out[time_periods.field_year] = df_out[time_periods.field_year].astype(int)
    df_out = (time_periods.years_to_tps(df_out, ))


    ##  PULL MISSING FIELDS FROM EXAMPLE DF
    
    # fields not in base
    fields_missing = [
        x for x in df_example.columns 
        if (x not in df_out.columns) 
        and (x in matt.all_variable_fields_input)
    ]
    
    # specify fields to pull from the example
    fields_from_ex = [
        x for x in fields_missing 
        if not (
            False#x.startswith("frac_lndu_")
            #x.startswith("factor_lndu")
            #or x.startswith("frac_lndu_")
        )
    ]
    
    fields_from_ex = [
        x for x in fields_from_ex
        if (x not in df_out.columns)
    ]

    # merge in from ex
    df_out = (
        pd.merge(
            df_out,
            df_example[fields_from_ex + [time_periods.field_time_period]],
            how = "left"
        )
        .ffill()
        .bfill()
    )


    ##  TEMPORARY SCRIPT FOR MOVING FROM working_version TO latest full version
    
    df_out = temp.update_fields(
        df_out,
        matt,
    )

    return df_out


df_mex = function_combine(
    df_from_pipeline,
    df_old,
    df_example_input,
    "MEX",
)



NOTE! Adjusting poultry population


# Run the next cell to build a composite file and/or export individual fiels (set `export = True` to do this)

In [71]:

df_out = []
export = True
# years = range(2015, 2071)

def build_path_for_region_file(
    iso: str,
) -> pathlib.Path:
    """Build the output path for a file
    """
    fn = f"{_PREFIX_FILENAME_DATASETBUILD_BY_REGION}{iso}.csv"
    path_out = _PATH_OUTPUT.joinpath(fn)
    
    return path_out



for iso in _REGIONS_ISO:

    df_cur = function_combine(
        df_from_pipeline,
        df_old,
        df_example_input,
        iso,
    )    
    df_cur[regions.field_iso] = iso

    # write to the output location?
    if export:
        path_write = build_path_for_region_file(iso, )
        df_cur.to_csv(
            path_write,
            encoding = "UTF-8",
            index = None,
        )
    
    df_out.append(df_cur,)
    print(f"Region {iso} complete")

df_out = sf._concat_df(df_out, )




NOTE! Adjusting poultry population
Region ARG complete
NOTE! Adjusting poultry population
Region ABW complete
NOTE! Adjusting poultry population
Region BHS complete
NOTE! Adjusting poultry population
Region BRB complete
NOTE! Adjusting poultry population
Region BLZ complete
NOTE! Adjusting poultry population
Region BOL complete
NOTE! Adjusting poultry population
Region BRA complete
NOTE! Adjusting poultry population
Region BGR complete
NOTE! Adjusting poultry population
Region CHL complete
NOTE! Adjusting poultry population
Region COL complete
NOTE! Adjusting poultry population
Region CRI complete
NOTE! Adjusting poultry population
Region DOM complete
NOTE! Adjusting poultry population
Region ECU complete
NOTE! Adjusting poultry population
Region EGY complete
NOTE! Adjusting poultry population
Region SLV complete
NOTE! Adjusting poultry population
Region GTM complete
NOTE! Adjusting poultry population
Region GUY complete
NOTE! Adjusting poultry population
Region HTI complete
NOTE! Adju

# option to test


In [303]:
region_run = "MEX"
df_run = models.project(
    df_out[
        df_out[regions.field_iso].isin([region_run])
    ].reset_index(drop = True),
    time_periods_base = np.arange(12),
    verbose = True,
)



2025-24-Sep 11:31:01.121 Started modeling scenario. NEMO version = 2.2.0, solver = HiGHS.
2025-24-Sep 11:31:01.128 Validated run-time arguments.
2025-24-Sep 11:31:01.131 Connected to scenario database. Path = /Users/usuario/git/sisepuede_region_nbs/generic_regions/tmpp8wwch9h.sqlite.
2025-24-Sep 11:31:01.275 Dropped pre-existing result tables from database.
2025-24-Sep 11:31:01.400 Created parameter views and indices.
2025-24-Sep 11:31:01.406 Created temporary tables.
2025-24-Sep 11:31:01.409 Started optimizing following years: [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011].
2025-24-Sep 11:31:01.411 Verified that transmission modeling is not enabled.
2025-24-Sep 11:31:01.429 Defined dimensions.
2025-24-Sep 11:31:04.463 Executed core database queries.
2025-24-Sep 11:31:04.468 Defined demand variables.
2025-24-Sep 11:31:04.470 Defined storage variables.
2025-24-Sep 11:31:04.470 Defined capacity variables.
2025-24-Sep 11:31:07.059 Defined activity variables.
2025

Unnamed: 0,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,area_agrc_crops_other_woody_perennial,area_agrc_crops_pulses,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,1776620.0,57775710.0,6123765.0,10809990.0,95945.934079,4174639.0,51076070.0,952640.985459,3638277.0,...,63879510.0,21702.395765,13695720.0,96591140.0,5380426.0,4249135.0,38144420.0,8314306.0,33271820.0,72792510.0
1,1,1765776.0,57423060.0,6086387.0,10744010.0,95360.306264,4149158.0,50764320.0,946826.324685,3616070.0,...,63482200.0,22642.119775,13612120.0,96072920.0,5356823.0,4030275.0,37911600.0,8500211.0,33500910.0,72686030.0
2,2,1749462.0,56892530.0,6030155.0,10644740.0,94479.273875,4110824.0,50295310.0,938078.610977,3582662.0,...,63045300.0,21006.136939,13486360.0,95231040.0,5199789.0,3988301.0,37555970.0,8537499.0,33348060.0,72436720.0
3,3,1740842.0,56612190.0,6000441.0,10592290.0,94013.721262,4090568.0,50047480.0,933456.169137,3565008.0,...,62958530.0,21120.255624,13419900.0,94785230.0,5474726.0,3999300.0,37376250.0,8458248.0,33206390.0,72467890.0
4,4,1728610.0,56214400.0,5958279.0,10517860.0,93353.130332,4061825.0,49695820.0,926897.20443,3539958.0,...,62224270.0,20714.593135,13325610.0,94093200.0,5271402.0,3999685.0,37110970.0,7998781.0,32560820.0,72087380.0
5,5,1716749.0,55828700.0,5917397.0,10445700.0,92712.607196,4033956.0,49354840.0,920537.491565,3515669.0,...,61602020.0,20163.672116,6617089.0,65294720.0,5249084.0,3976892.0,23693360.0,7975904.0,25320980.0,69937470.0
6,6,1704969.0,55445610.0,5876793.0,10374020.0,92076.427924,4006275.0,49016170.0,914220.908647,3491545.0,...,61265940.0,19974.563798,10514870.0,83492780.0,5262042.0,3958070.0,28759840.0,7882006.0,29170200.0,69907280.0
7,7,1693269.0,55065140.0,5836466.0,10302830.0,91444.591932,3978784.0,48679820.0,907947.449876,3467586.0,...,60685110.0,20008.857194,13052300.0,92087700.0,5198711.0,3939701.0,36352260.0,7786658.0,31964350.0,70017200.0
8,8,1681650.0,54687280.0,5796416.0,10232130.0,90817.096041,3951481.0,48345780.0,901717.083675,3443791.0,...,60268680.0,19871.555738,12962730.0,91455790.0,5163037.0,3912667.0,36102810.0,7733226.0,31745010.0,69536740.0
9,9,1670111.0,54312030.0,5756642.0,10161920.0,90193.934733,3924367.0,48014040.0,895529.755279,3420161.0,...,59855140.0,19735.202725,12873790.0,90828250.0,5127610.0,3885819.0,35855080.0,7680163.0,31527180.0,69059590.0


In [None]:
svp.plot_emissions_stack(
    d
)