In [1]:
# add path to sispeuede to sys.path in python
import sys
import warnings
dir_git = "/Users/usuario/git/sisepuede"
if dir_git not in sys.path:
    sys.path.append(dir_git)


warnings.filterwarnings("ignore")

import logging
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os, os.path
import pandas as pd
import pathlib
import sisepuede as si
import sisepuede.core.attribute_table as att
import sisepuede.core.model_attributes as ma
import sisepuede.core.model_variable as mv
import sisepuede.core.support_classes as sc
import sisepuede.manager.sisepuede_examples as sxm
import sisepuede.manager.sisepuede_file_structure as sfs
import sisepuede.manager.sisepuede_models as sm
import sisepuede.transformers as trf
import sisepuede.utilities._toolbox as sf
import time
from typing import *

# hack for now
log_job = None



In [17]:
importlib.reload(sfs)

<module 'sisepuede.manager.sisepuede_file_structure' from '/Users/usuario/git/sisepuede/sisepuede/manager/sisepuede_file_structure.py'>

# Setup the file structure and read in data

In [18]:
struct = sfs.SISEPUEDEFileStructure()
examples = sxm.SISEPUEDEExamples()

matt = struct.model_attributes
regions = sc.Regions(matt)
time_periods = sc.TimePeriods(matt)

# directories
path_cur = pathlib.Path(os.getcwd())
path_data = path_cur.joinpath("data")
path_transformations = path_cur.joinpath("transformations")
if not path_transformations.exists():
    path_transformations.mkdir(exist_ok = True, )

# file paths
fp_inputs = path_data.joinpath("louisiana_inputs_raw.csv")#joinpath("louisiana_working_data_20240826", "MODEL_INPUTS.csv")



##  Format input data

In [14]:
# read in default data
regions_run = ["louisiana"]

df_inputs_base_la = pd.read_csv(fp_inputs, )
df_input = df_inputs_base_la.copy()


# overwrite some variables
for mvname in ["Agriculture Equivalent Exports", "Livestock Equivalent Exports"]:
    modvar = matt.get_variable(mvname)
    if modvar is None:
        raise RuntimeError(f"Error: model variable '{mvname}' not found.")
    df_input[modvar.fields] = 0


# filter
field_region = regions.key
regions_run = [
    x for x in regions_run if x in list(df_input[field_region].unique())
]

# reduce inputs
df_input = (
    df_input[
        df_input[field_region].isin(regions_run)
    ]
    #.drop(columns = [matt.dim_primary_id])
    .reset_index(drop = True)
)




###  Secondary attempt - run if everything is built

In [9]:

df_input = pd.read_csv(path_data.joinpath("louisiana_inputs_raw.csv"))

# build an attribute table
ra = np.arange(0, 56, ).astype(int)
attr_time_period = att.AttributeTable(
    pd.DataFrame(
        {
            time_periods.field_time_period: ra,
            time_periods.field_year: 2015 + ra,
        }
    ),
    time_periods.field_time_period,
)


###  Build a table mapping transformations to sample groups

In [4]:
# get space of transformation codes to restrict to
strat = strategies.get_strategy(6002)            # all actions
strat_transformations = [x.code for x in strat.get_transformation_list(
    transformations.all_transformation_codes,
    transformations
)]


df_base = transformations.get_transformation(transformations.code_baseline, )
df_base = df_base()

dict_transformation_codes_by_field = {}

for tf_code in strat_transformations:#transformations.all_transformation_codes:
    tr_cur = transformations.get_transformation(tf_code, )
    df_cur = tr_cur()

    # fields the 
    fields = []

    for field in df_cur.columns:
        test = all(df_cur[field] == df_base[field])
        if test: continue
        
        # check if codes are specified
        codes = dict_transformation_codes_by_field.get(field, )
        if codes is None:
            dict_transformation_codes_by_field.update({field: [tf_code] })
        else:
            dict_transformation_codes_by_field[field].append(tf_code)
            
dict_transformation_codes_by_field = dict((k, "|".join(v)) for k, v in dict_transformation_codes_by_field.items())


##  MERGE CODES AND SAMPLE GROUPS TOGETHER

df_code_by_field = pd.DataFrame(
    dict_transformation_codes_by_field.items(), 
    columns = ["variable_field", "transformation_code"],
)

df_varspec_to_sg = pd.read_excel(
    "/Users/usuario/Documents/Projects/Louisiana/merges/20250521/variable_specification_to_sample_group.xlsx"
)

df_map = pd.merge(
    df_varspec_to_sg,
    df_code_by_field.rename(
        columns = {"variable_field": "variable_specification", }
    ),
    how = "left"
)

df_map = pd.merge(
    df_map,
    transformations.attribute_transformation.table
    .get(
        ["transformation_code", "transformation_name"]
    ),
    how = "left"
)



NameError: name 'strategies' is not defined

In [76]:
df_map[[("|" in x) if isinstance(x, str) else False for x in df_map["transformation_code"].to_numpy() ]]


Unnamed: 0,variable_specification,sample_group,transformation_code,transformation_name
1755,nemomod_entc_frac_min_share_production_pp_biomass,34,TX:ENTC:LEAST_COST_SOLUTION|TX:ENTC:TARGET_REN...,
1756,nemomod_entc_frac_min_share_production_pp_coal,34,TX:ENTC:LEAST_COST_SOLUTION|TX:ENTC:TARGET_REN...,
1758,nemomod_entc_frac_min_share_production_pp_gas,34,TX:ENTC:LEAST_COST_SOLUTION|TX:ENTC:TARGET_REN...,
1762,nemomod_entc_frac_min_share_production_pp_nuclear,34,TX:ENTC:LEAST_COST_SOLUTION|TX:ENTC:TARGET_REN...,
1764,nemomod_entc_frac_min_share_production_pp_oil,34,TX:ENTC:LEAST_COST_SOLUTION|TX:ENTC:TARGET_REN...,
1986,pij_lndu_croplands_to_croplands,36,TX:LNDU:DEC_DEFORESTATION|TX:LNDU:INC_REFOREST...,
1987,pij_lndu_croplands_to_forests_mangroves,36,TX:LNDU:DEC_DEFORESTATION|TX:LNDU:INC_REFOREST...,
1989,pij_lndu_croplands_to_forests_secondary,36,TX:LNDU:DEC_DEFORESTATION|TX:LNDU:INC_REFOREST...,
1990,pij_lndu_croplands_to_grasslands,36,TX:LNDU:DEC_DEFORESTATION|TX:LNDU:INC_REFOREST...,
1993,pij_lndu_croplands_to_wetlands,36,TX:LNDU:DEC_DEFORESTATION|TX:LNDU:INC_REFOREST...,


In [69]:
df_map.to_csv(
    "/Users/usuario/Documents/Projects/Louisiana/merges/20250521/variable_specification_to_sample_group_and_transformation_code.csv",
    encoding = "UTF-8",
    index = None,
)

In [54]:
df_map[["transformation_code", "sample_group"]].drop_duplicates().dropna()

Unnamed: 0,transformation_code,sample_group
66,TX:IPPU:DEC_DEMAND,4
86,TX:SOIL:DEC_N_APPLIED,3
87,TX:SOIL:DEC_LIME_APPLIED,3
88,TX:TRDE:DEC_DEMAND,114
89,TX:TRDE:DEC_DEMAND,115
...,...,...
2176,TX:SCOE:INC_EFFICIENCY_APPLIANCE,46
2177,TX:SCOE:INC_EFFICIENCY_APPLIANCE,1480
2179,TX:SCOE:DEC_DEMAND_HEAT,47
2180,TX:SCOE:DEC_DEMAND_HEAT,1481


In [117]:
type(sfs.ma.ATTRCUR)

sisepuede.core.attribute_table.AttributeTable

In [110]:
att.is_attribute_table(sfs.ma.ATTRCUR)

False

In [2]:
importlib.reload(sfs.ma.mv)
importlib.reload(sfs.ma)
importlib.reload(sfs)
file_struct = sfs.SISEPUEDEFileStructure()

In [5]:
att.is_attribute_table(file_struct.model_attributes.get_dimensional_attribute_table("time_period"))


True

In [None]:
importlib.reload(sfs)

# build transformations

In [10]:
transformers = trf.Transformers(
    {},
    df_input = df_input,
)

if not path_transformations.exists():
    
    print("BUILDING DEFAULT TRANSFORMATIONS")
    
    trf.instantiate_default_strategy_directory(
        transformers,
        path_transformations,
    )


# then, you can load this back in after modifying (play around with it)
transformations = trf.Transformations(
    path_transformations,
    transformers = transformers,
)
tab = transformations.attribute_transformation.table


# build strategies
#  build the strategies -- will export to path
t0 = time.time()
strategies = trf.Strategies(
    transformations,
    export_path = "transformations",
    prebuild = True,
)

t_elapse = sf.get_time_elapsed(t0)
print(f"Strategies defined at {strategies.transformations.dir_init} initialized in {t_elapse} seconds")

# build whirlpool with existing data
# tss = strategies.build_whirlpool_strategies(6004, ids = list(range(7101, 7160)))





Strategies defined at /Users/usuario/git/sisepuede_region_nbs/louisiana/transformations initialized in 1.68 seconds


## Build the strategies to templates (include default variable trajectory groups)

In [19]:
df_vargroups = examples("variable_trajectory_group_specification")
strategies.build_strategies_to_templates(
    df_trajgroup = df_vargroups,
    include_simplex_group_as_trajgroup = True,
    strategies = [0, 6002],#strategies.all_strategies,#list(range(7101, 7160)),#[0, 6003, 6004, 6005] #+ list(range(7001, 7062)), # list(range(7101, 7160))
)




0

# Run the model


In [78]:

ssp = si.SISEPUEDE(
    "calibrated",
    attribute_time_period = attr_time_period,
    db_type = "csv", # must run as a CSV for now
    initialize_as_dummy = True, # no connection to Julia is initialized if set to True
    logger = log_job,
    regions = ["louisiana"],
    strategies = strategies,
    try_exogenous_xl_types_in_variable_specification = True,
)

log_job = ssp.logger


2025-05-30 16:55:05,817 - INFO - Successfully initialized SISEPUEDEFileStructure.
2025-05-30 16:55:05,818 - INFO - 	Setting export engine to 'csv'.
2025-05-30 16:55:05,819 - INFO - Successfully instantiated table ANALYSIS_METADATA
2025-05-30 16:55:05,819 - INFO - Successfully instantiated table ATTRIBUTE_DESIGN
2025-05-30 16:55:05,820 - INFO - Successfully instantiated table ATTRIBUTE_LHC_SAMPLES_EXOGENOUS_UNCERTAINTIES
2025-05-30 16:55:05,820 - INFO - Successfully instantiated table ATTRIBUTE_LHC_SAMPLES_LEVER_EFFECTS
2025-05-30 16:55:05,821 - INFO - Successfully instantiated table ATTRIBUTE_PRIMARY
2025-05-30 16:55:05,821 - INFO - Successfully instantiated table ATTRIBUTE_STRATEGY
2025-05-30 16:55:05,821 - INFO - Successfully instantiated table MODEL_BASE_INPUT_DATABASE
2025-05-30 16:55:05,822 - INFO - Successfully instantiated table MODEL_INPUT
2025-05-30 16:55:05,822 - INFO - Successfully instantiated table MODEL_OUTPUT
2025-05-30 16:55:05,823 - INFO - SISEPUEDEOutputDatabase succe

# Check the design id--here's where we'll leverage it
- `design_id` controls what variables vary where; we'll vary the effects only
- as of the latest run (20250114), `design_id 3` varies lever effects (LEs) only between 10% and 100% of implementation

In [9]:
matt.get_dimensional_attribute_table(matt.dim_design_id)

Unnamed: 0,design_id,vary_l,vary_x,linear_transform_l_m,linear_transform_l_b,linear_transform_l_inf,linear_transform_l_sup,design_name,include
0,0,0,1,1.0,0.0,1.0,1,Vary Xs (design 0),1
1,1,1,1,0.75,0.25,0.25,1,Vary Xs and Les; Cap LE at 1 (design 1),1
2,2,1,1,1.25,0.0,0.25,1,"Vary Xs and LEs; Cap LE at 1.1 (design 2, 20% ...",1
3,3,1,0,0.9,0.1,0.1,1,Vary LEs; (design 3),1


In [98]:
len(lhs_design.fields_factors_l)

84

In [107]:
#lhs_design.fields_vary_l
#dir(lhs_design)
lhs_design.arr_lhs_l.shape
df_out = pd.DataFrame(
    lhs_design.arr_lhs_l,
)
df_out.columns = lhs_design.fields_factors_l
df_out["future_id"] = range(1, df_out.shape[0] + 1)

df_out.to_csv("/Users/usuario/Desktop/lhs_sample_group_experiments.csv", index = None, encoding = "UTF-8")


In [113]:
df_out.columns[18]

20

###   Since we didn't specify it, we'll run the config default # of samples

In [91]:
dir(ssp.experimental_manager.dict_lhs_design.get("louisiana"))

lhs_design = ssp.experimental_manager.dict_lhs_design.get("louisiana")
?lhs_design.retrieve_lhs_tables_by_design



[0;31mSignature:[0m
[0mlhs_design[0m[0;34m.[0m[0mretrieve_lhs_tables_by_design[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdesign_id[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0marr_lhs_l[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mnumpy[0m[0;34m.[0m[0mndarray[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0marr_lhs_x[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mnumpy[0m[0;34m.[0m[0mndarray[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mattr_design_id[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0msisepuede[0m[0;34m.[0m[0mcore[0m[0;34m.[0m[0mattribute_table[0m[0;34m.[0m[0mAttributeTable[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_trial_flag[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mfloat[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m

##  Run the model here
- will specify `strategy_id` associated with All Actions and `design_id = 3`
- do not specify `future_id` because we are running across all available futures

In [None]:
dict_scens = {
    # we exclude future because we will run across all futures
    ssp.key_design: [3],
    ssp.key_strategy: [6002],
}


ssp.project_scenarios(
    dict_scens,
    check_results = False,
    save_inputs = True,
)


# Finally, we can export a summary file

In [None]:
df_out = ssp.read_output(None)
df_in = ssp.read_input(None)
all_primaries = sorted(list(df_out[ssp.key_primary].unique()))

# build if unable to simply read the data frame
if df_in is None:
    df_in = []
     
    for region in ssp.regions:
        for primary in all_primaries: 
            df_in_filt = ssp.generate_scenario_database_from_primary_key(primary)
            df_in.append(df_in_filt.get(region))
    
    df_in = pd.concat(df_in, axis = 0).reset_index(drop = True)


df_export = pd.merge(
    df_out,
    df_in,
    how = "left",
)





# check output directory 
dir_pkg = os.path.join(
    ssp.file_struct.dir_out, 
    f"sisepuede_summary_results_run_{ssp.id_fs_safe}"
)
os.makedirs(dir_pkg) if not os.path.exists(dir_pkg) else None


for tab in ["ATTRIBUTE_STRATEGY"]:
    ssp.database.db.read_table(tab).to_csv(
        os.path.join(dir_pkg, f"{tab}.csv"),
        index = None,
        encoding = "UTF-8"
    )


df_primary = (
    ssp
    .odpt_primary
    .get_indexing_dataframe(
        sorted(list(df_out[ssp.key_primary].unique()))
    )
)
    
df_primary.to_csv(
    os.path.join(dir_pkg, f"ATTRIBUTE_PRIMARY.csv"),
    index = None,
    encoding = "UTF-8"
)

df_export.to_csv(
    os.path.join(dir_pkg, f"sisepuede_results_{ssp.id_fs_safe}_WIDE_INPUTS_OUTPUTS.csv"),
    index = None,
    encoding = "UTF-8"
)