# Normative Uncertainty in IAMs

#### Testing the hypervolumes for convergence

- First generate reference set from all seeds (or islands of MM Borg MOEA)
- Then generate hypervolumes for each seed (or island) against reference set
- Plot the hypervolumes for each seed (or island) against number of function evaluations

In [None]:
# This code creates a reference set from the different seeds

# NOTE: For MMBorg archives, run the script to convert it to the format recognized by older code with ema-workbench.
# Example 
# python borg_archive_processor.py     --archive /Volumes/justicedrive/NU_data_20_Oct/PRIORITARIAN_200000_ref5_42/mm_intermediate.zip     --base-name PRIORITARIAN_200000_ref5_42     --step 10000

from solvers.convergence.hypervolume import get_global_reference_set, calculate_hypervolume_from_archives
import multiprocessing
# Suppress warnings
import warnings

from justice.util.enumerations import WelfareFunction, SSP
from justice.util.visualizer import plot_hypervolume

warnings.filterwarnings("ignore")

base_path = "data/temporary/NU_DATA/mmBorg/" # Change this to your path

swf = WelfareFunction.PRIORITARIAN
nfe = 100_000
ssp = SSP.SSP4
ssp_ref = 5
path = f"{base_path}/{swf.value[1]}_{str(ssp).split('.')[1]}"


print(f"Loading data from {path}...")

list_of_objectives = [
    "welfare",
    "fraction_above_threshold",
]
data_path = path 

direction_of_optimization = ["min", "min"] #, "max", "max"

get_global_reference_set(
    list_of_objectives=list_of_objectives,
    data_path=data_path,
    #file_name=None,
    swf=[
        swf.value[1],
    ],
    nfe=str(nfe), # Ran for 50k number of function evaluations

    # Setting the same epsilon values as optimization process  (see analysis/analyzer.py)
    epsilons=[
        0.00001,
        0.001,
    ],


    direction_of_optimization=direction_of_optimization,
    output_data_path=path,
    saving=True,
)




Loading data from data/temporary/NU_DATA/mmBorg/PRIORITARIAN_SSP4/200k...
Loading list of files
Loading archives for:  PRIORITARIAN
Filename:  PRIORITARIAN_200000_ref5_42_1.tar.gz
Matching file: PRIORITARIAN_200000_ref5_42_1.tar.gz
Loading archives from: PRIORITARIAN_200000_ref5_42_1.tar.gz
Max key: 200000
Number of rows in archive: 2
Archives loaded for: PRIORITARIAN_200000_ref5_42_1.tar.gz
Filename:  PRIORITARIAN_200000_ref5_42_3.tar.gz
Matching file: PRIORITARIAN_200000_ref5_42_3.tar.gz
Loading archives from: PRIORITARIAN_200000_ref5_42_3.tar.gz
Max key: 200000
Number of rows in archive: 3
Archives loaded for: PRIORITARIAN_200000_ref5_42_3.tar.gz
Filename:  PRIORITARIAN_200000_ref5_42_0.tar.gz
Matching file: PRIORITARIAN_200000_ref5_42_0.tar.gz
Loading archives from: PRIORITARIAN_200000_ref5_42_0.tar.gz
Max key: 200000
Number of rows in archive: 2
Archives loaded for: PRIORITARIAN_200000_ref5_42_0.tar.gz
Filename:  PRIORITARIAN_200000_ref5_42_2.tar.gz
Matching file: PRIORITARIAN_200

{'PRIORITARIAN':     center 0  center 1  center 2  center 3  center 4  center 5  center 6  \
 12  0.136858 -0.950275  0.042237 -0.932331  0.047157 -0.129507  0.013975   
 13  0.136858 -0.951947  0.053598  0.100470  0.231806 -0.129114  0.053326   
 
     center 7   radii 0   radii 1  ...  weights 220  weights 221  weights 222  \
 12 -0.045703  0.133822  0.999859  ...     0.938101     0.999547     0.939041   
 13 -0.016753  0.133822  0.999863  ...     0.938100     0.999543     0.939041   
 
     weights 223  weights 224  weights 225  weights 226  weights 227  \
 12     0.567953     0.980678     0.979616     0.994322     0.936356   
 13     0.567955     0.980678     0.979616     0.994050     0.936992   
 
        welfare  fraction_above_threshold  
 12  498.445120                      0.58  
 13  498.139221                      0.60  
 
 [2 rows x 246 columns]}

Computing the Hypervolume for the reference set

In [2]:
## This block computes the Hypervolume for the reference set

filenames = [


    # Loading Archives for the different seeds  Borg
    f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_0.tar.gz", 
    f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_1.tar.gz",
    f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_2.tar.gz",
    f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_3.tar.gz",
    f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_4.tar.gz",


]

reference_set = f"{swf.value[1]}_reference_set.csv"
# reference_set =  "final_archive/100000.csv"

with multiprocessing.Pool() as pool:
    # Enumerate through the filenames
    for filename in filenames:
        scores = calculate_hypervolume_from_archives(
            list_of_objectives=list_of_objectives,
            direction_of_optimization=direction_of_optimization,
            input_data_path=data_path,
            file_name=filename,
            output_data_path=path,
            saving=True,
            global_reference_set=True,
            global_reference_set_path=path,
            global_reference_set_file=reference_set,
            pool=pool,
          )  # NOTE: Change this according to the PF refset
        



Loading archives for PRIORITARIAN_200000_ref5_42_0.tar.gz
Archives loaded
list_of_archives:  (45, 2)
reference_set (2, 2)
type of reference_set <class 'numpy.ndarray'>
nfes: 
 [100, 10000, 100000, 110000, 120000, 130000, 140000, 150000, 160000, 170000, 180000, 190000, 20000, 200000, 30000, 40000, 50000, 60000, 70000, 80000, 90000]
Computing hypervolume for  PRIORITARIAN_200000_ref5_42_0.tar.gz
Time taken for Hypervolume Calculation: 2.921 seconds
data/temporary/NU_DATA/mmBorg/PRIORITARIAN_SSP4/200k/PRIORITARIAN_200000_ref5_42_0_hv.csv
Loading archives for PRIORITARIAN_200000_ref5_42_1.tar.gz
Archives loaded
list_of_archives:  (56, 2)
reference_set (2, 2)
type of reference_set <class 'numpy.ndarray'>
nfes: 
 [100, 10000, 100000, 110000, 120000, 130000, 140000, 150000, 160000, 170000, 180000, 190000, 20000, 200000, 30000, 40000, 50000, 60000, 70000, 80000, 90000]
Computing hypervolume for  PRIORITARIAN_200000_ref5_42_1.tar.gz
Time taken for Hypervolume Calculation: 0.003 seconds
data/tem

Plotting the Hypervolumes for each seed (or island) against number of function evaluations

In [3]:
input_data_path_list = {
    
   swf.value[1]: [
        f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_0_hv.csv",
        f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_1_hv.csv",
        f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_2_hv.csv",
        f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_3_hv.csv",
        f"{swf.value[1]}_{nfe}_ref{ssp_ref}_42_4_hv.csv",
    ],
}


fig = plot_hypervolume(
    path_to_data=path,
    path_to_output=path,
    input_data=input_data_path_list,
    yaxis_upper_limit=1.0,
    width=1000,
    height=800,
    fontsize=20,
    saving=True,
)

fig.show()

## Launch the Mapping Script in Util
```
python justice/util/postprocessing_for_regret_calculations.py data/temporary/NU_DATA/mmBorg/ UTILITARIAN SSP2
```
- This reevaluates all the Pareto optimal policy candidates to compute the 90th percentile regret values for welfare (utilitarian/prioritarian) and temperature rise in degree celsius.
- NOTE: This script takes a long time (around 30 minutes to several hours depending on the number of policy candidates, scenarios and computational resources available).
- Call the script separately for each social welfare function and reference scenario (under which the polices are optimized) combination.

# Automated Regret Calculation

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from justice.util.model_time import TimeHorizon
from justice.util.data_loader import DataLoader
import json

from justice.util.enumerations import WelfareFunction, SSP

from pathlib import Path
import numpy as np
import pandas as pd
from justice.util.output_data_processor import compute_p90_regret_dataframe, minimax_regret_policy
from justice.util.enumerations import WelfareFunction, SSP


scenario_list = ["SSP126","SSP245","SSP370","SSP460","SSP534"]

# print(f"Processing scenario: {ssp}")

# print(SSP.get_index("SSP3"))

ethical_framing_and_regret = {
    "SSP1": { "UTILITARIAN": {"Temperature_Regret", "Welfare_Regret"}, "PRIORITARIAN": {"Temperature_Regret", "Welfare_Regret"}},
    "SSP2": { "UTILITARIAN": {"Temperature_Regret", "Welfare_Regret"}, "PRIORITARIAN": {"Temperature_Regret", "Welfare_Regret"}}, 
    "SSP3": { "UTILITARIAN": {"Temperature_Regret", "Welfare_Regret"}, "PRIORITARIAN": {"Temperature_Regret", "Welfare_Regret"}},
    "SSP4": { "UTILITARIAN": {"Temperature_Regret", "Welfare_Regret"}, "PRIORITARIAN": {"Temperature_Regret", "Welfare_Regret"}},
    "SSP5": { "UTILITARIAN": {"Temperature_Regret", "Welfare_Regret"}, "PRIORITARIAN": {"Temperature_Regret", "Welfare_Regret"}},
}

# Create a dictionary to hold the policy indices with minimum regret for each scenario, ethical framing, and regret type
min_regret_policy_indices = {}

base_path = "data/temporary/NU_DATA/mmBorg/"
save_regret_dfs = True

for key, value in ethical_framing_and_regret.items():
    print(f"Scenario: {key}")
    # print(SSP.get_index(key))

    baseline_scenario = None
    if key == "SSP1":
        baseline_scenario = "SSP126"
    elif key == "SSP2":
        baseline_scenario = "SSP245"
    elif key == "SSP3":
        baseline_scenario = "SSP370"
    elif key == "SSP4":
        baseline_scenario = "SSP460"
    elif key == "SSP5":
        baseline_scenario = "SSP534"
        
    for ethical_framing, regret_types in value.items():
        print(f"  Ethical Framing: {ethical_framing}")
        
        # swf = WelfareFunction.get_index(ethical_framing)
        # print(f"  Welfare Function: {WelfareFunction.get_string(swf)}")
        # print(f"  SSP: {SSP.get_index(key)}")

        for regret_type in regret_types:
            # print(f"    Regret Type: {regret_type}, Policy Index: {policy_index}")
            if regret_type == "Temperature_Regret":
                variable_of_interest = "global_temperature"  # Ensure same welfare function
                direction_of_interest = "min"  # Use min for global temperature

                p90_delta_df = compute_p90_regret_dataframe(
                    base_path=base_path + f"{ethical_framing}_{key}/",
                    welfare_function_name=ethical_framing,
                    baseline_scenario=baseline_scenario,
                    scenario_list=scenario_list,
                    variable_of_interest=variable_of_interest,
                    direction_of_interest=direction_of_interest,
                    mapping_subdir="mapping",
                    hdf5_filename_template="mapping_{}.h5",
                    save_df=save_regret_dfs,  # Save CSV file
                    df_output_path=None  # Will save to default location '<base_path>/p90_regret_<welfare_function_name>.csv'
                )
                temp_idx = minimax_regret_policy(p90_delta_df)
                print(f"Processing {ethical_framing} with {regret_type} for {key}  and baseline scenario {baseline_scenario}")
                print("Policy index with minimum regret:", temp_idx)

                # Fill the dictionary
                if key not in min_regret_policy_indices:
                    min_regret_policy_indices[key] = {}
                if ethical_framing not in min_regret_policy_indices[key]:
                    min_regret_policy_indices[key][ethical_framing] = {}
                min_regret_policy_indices[key][ethical_framing][regret_type] = temp_idx

            elif regret_type == "Welfare_Regret":
                if ethical_framing == "UTILITARIAN":
                    variable_of_interest = "utilitarian_welfare"
                    direction_of_interest = "max"  # Use max for welfare variables

                    p90_delta_df = compute_p90_regret_dataframe(
                        base_path=base_path + f"{ethical_framing}_{key}/",
                        welfare_function_name=ethical_framing,
                        baseline_scenario=baseline_scenario,
                        scenario_list=scenario_list,
                        variable_of_interest=variable_of_interest,
                        direction_of_interest=direction_of_interest,
                        mapping_subdir="mapping",
                        hdf5_filename_template="mapping_{}.h5",
                        save_df=save_regret_dfs,  # Save CSV file
                        df_output_path=None  # Will save to default location '<base_path>/p90_regret_<welfare_function_name>.csv'
                    )
                    temp_idx = minimax_regret_policy(p90_delta_df)
                    print(f"Processing {ethical_framing} with {regret_type} with variable of interest {variable_of_interest} for {key} and baseline scenario {baseline_scenario}")
                    print("Policy index with minimum regret:", temp_idx)

                    # Fill the dictionary
                    if key not in min_regret_policy_indices:
                        min_regret_policy_indices[key] = {}
                    if ethical_framing not in min_regret_policy_indices[key]:
                        min_regret_policy_indices[key][ethical_framing] = {}
                    min_regret_policy_indices[key][ethical_framing][regret_type] = temp_idx

                elif ethical_framing == "PRIORITARIAN":
                    variable_of_interest = "prioritarian_welfare"
                    direction_of_interest = "max"

                    p90_delta_df = compute_p90_regret_dataframe(
                        base_path=base_path + f"{ethical_framing}_{key}/",
                        welfare_function_name=ethical_framing,
                        baseline_scenario=baseline_scenario,
                        scenario_list=scenario_list,
                        variable_of_interest=variable_of_interest,
                        direction_of_interest=direction_of_interest,
                        mapping_subdir="mapping",
                        hdf5_filename_template="mapping_{}.h5",
                        save_df=save_regret_dfs,  # Save CSV file
                        df_output_path=None  # Will save to default location '<base_path>/p90_regret_<welfare_function_name>.csv'
                    )
                    temp_idx = minimax_regret_policy(p90_delta_df)
                    print(f"Processing {ethical_framing} with {regret_type} with variable of interest {variable_of_interest} for {key} and baseline scenario {baseline_scenario}")
                    print("Policy index with minimum regret:", temp_idx)
                    # Fill the dictionary
                    if key not in min_regret_policy_indices:
                        min_regret_policy_indices[key] = {}
                    if ethical_framing not in min_regret_policy_indices[key]:
                        min_regret_policy_indices[key][ethical_framing] = {}
                    min_regret_policy_indices[key][ethical_framing][regret_type] = temp_idx



# Save this dictionary at the base path
with open(base_path + "min_regret_policy_indices.json", "w") as f:
    json.dump(min_regret_policy_indices, f, indent=4)



  from .autonotebook import tqdm as notebook_tqdm


Scenario: SSP1
  Ethical Framing: UTILITARIAN
Saved p90 delta data to data/temporary/NU_DATA/mmBorg/UTILITARIAN_SSP1/p90_regret_UTILITARIAN_global_temperature.csv
Processing UTILITARIAN with Temperature_Regret for SSP1  and baseline scenario SSP126
Policy index with minimum regret: 6
Saved p90 delta data to data/temporary/NU_DATA/mmBorg/UTILITARIAN_SSP1/p90_regret_UTILITARIAN_utilitarian_welfare.csv
Processing UTILITARIAN with Welfare_Regret with variable of interest utilitarian_welfare for SSP1 and baseline scenario SSP126
Policy index with minimum regret: 4
  Ethical Framing: PRIORITARIAN
Saved p90 delta data to data/temporary/NU_DATA/mmBorg/PRIORITARIAN_SSP1/p90_regret_PRIORITARIAN_global_temperature.csv
Processing PRIORITARIAN with Temperature_Regret for SSP1  and baseline scenario SSP126
Policy index with minimum regret: 0
Saved p90 delta data to data/temporary/NU_DATA/mmBorg/PRIORITARIAN_SSP1/p90_regret_PRIORITARIAN_prioritarian_welfare.csv
Processing PRIORITARIAN with Welfare_Re

## Run the reevaluation script
```
python justice/util/reevaluate_optimal_policy.py
```
- Reevaluates the policy candidates selected in the previous step across all scenarios.
- Extracts relevant variables - emissions, temperature, emission control rates and saves them in npy files for further analysis.
- NOTE: This script generates big files (several GBs). At least ensure 100 GB of free space in the drive. Select the appropriate output path in the script before running.

# Visualize the Pathways

In [None]:
from justice.util.visualizer import plot_comparison_with_boxplots, plot_choropleth_2D_data
from justice.util.enumerations import WelfareFunction, SSP
import json
import numpy as np
import plotly.express as px
import pandas as pd

variable_name = "emissions"

base_path = "data/temporary/NU_DATA/mmBorg/"
# Read the dictionary back
with open(base_path + "min_regret_policy_indices.json", "r") as f:
    loaded_min_regret_policy_indices = json.load(f)

# Print the final dictionary of minimum regret policy indices
print("\nMinimum Regret Policy Indices:")
for scenario, ethical_data in loaded_min_regret_policy_indices.items():
    print(f"Scenario: {scenario}")
    for ethical_framing, regret_data in ethical_data.items():
        print(f"  Ethical Framing: {ethical_framing}")
        for regret_type, policy_index in regret_data.items():
            print(f"    Regret Type: {regret_type}, Policy Index: {policy_index}")

            plot_comparison_with_boxplots(
                data_paths=[

                    base_path + f"{ethical_framing}_{scenario}/ref_{scenario}_{regret_type}_idx{policy_index}/{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP126_{variable_name}.npy",
                    base_path + f"{ethical_framing}_{scenario}/ref_{scenario}_{regret_type}_idx{policy_index}/{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP245_{variable_name}.npy",
                    base_path + f"{ethical_framing}_{scenario}/ref_{scenario}_{regret_type}_idx{policy_index}/{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP370_{variable_name}.npy",
                    base_path + f"{ethical_framing}_{scenario}/ref_{scenario}_{regret_type}_idx{policy_index}/{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP460_{variable_name}.npy",
                    base_path + f"{ethical_framing}_{scenario}/ref_{scenario}_{regret_type}_idx{policy_index}/{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP534_{variable_name}.npy",
                
                
                ],
                labels=[
                    
                    'SSP1',
                    'SSP2',
                    'SSP3',
                    'SSP4',
                    'SSP5',
                    ], 
                start_year=2015,
                end_year=2300,
                data_timestep=5,
                timestep=1,
                visualization_start_year=2015,
                visualization_end_year=2100,
                yaxis_range=[0, 80],
                plot_title=' ',
                xaxis_title='Year',
                yaxis_title='Global Emissions (GtCO2)',
                template='plotly_white',
                width=1000,
                height=700,
                output_path=base_path +"/"+ "plots",
                saving=True,
                show_red_dashed_line=False,
                show_interquartile_range=True,
                linecolors=[
            

                    "rgba(141,211,199, 1)",
                    "rgba(254,217,166, 1)", 
                    "rgba(190,186,218, 1)", 
                    "rgba(128,177,211, 1)", 
                    "rgba(251,128,114, 1)", 
                    ],
                colors = [ 

                    "rgba(141,211,199, 0.4)", 
                    "rgba(254,217,166, 0.4)",
                    "rgba(190,186,218, 0.4)", 
                    "rgba(128,177,211, 0.4)", 
                    "rgba(251,128,114, 0.4)", 

                    ],
                first_plot_proportion=[0, 0.75],
                second_plot_proportion=[0.85, 1],
                transpose_data=True,
                show_min_max = False,
                dtick=10,
                output_name_suffix=regret_type,
            )


Minimum Regret Policy Indices:
Scenario: SSP1
  Ethical Framing: UTILITARIAN
    Regret Type: Temperature_Regret, Policy Index: 6
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
    Regret Type: Welfare_Regret, Policy Index: 4
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of data after summing:  (286, 1001)
Data is 3D
Shape of data:  (57, 286, 1001)
Shape of dat

# Visualize the Distribution of Emission Control Rates across SSPs

In [None]:
from justice.util.visualizer import plot_comparison_with_boxplots, plot_choropleth_2D_data
from justice.util.enumerations import WelfareFunction, SSP
import json
import numpy as np
import plotly.express as px
import pandas as pd

variable_name = "constrained_emission_control_rate"

base_path = "data/temporary/NU_DATA/mmBorg/"

# Read the dictionary back # This plots everything

# with open(base_path + "min_regret_policy_indices.json", "r") as f:
#     loaded_min_regret_policy_indices = json.load(f)

# For plotting some, hardcode the dictionary
loaded_min_regret_policy_indices = {

    "SSP2": {
        "UTILITARIAN": {
            # "Temperature_Regret": 25,
            "Welfare_Regret": 9
        },
        "PRIORITARIAN": {
            # "Temperature_Regret": 0,
            "Welfare_Regret": 4
        }
    },

}   

# Print the final dictionary of minimum regret policy indices
print("\nMinimum Regret Policy Indices:")
for scenario, ethical_data in loaded_min_regret_policy_indices.items():
    print(f"Scenario: {scenario}")
    for ethical_framing, regret_data in ethical_data.items():
        print(f"  Ethical Framing: {ethical_framing}")
        for regret_type, policy_index in regret_data.items():
            print(f"    Regret Type: {regret_type}, Policy Index: {policy_index}")


            fig, prior_data = plot_choropleth_2D_data(
                path_to_data=base_path + f"{ethical_framing}_{scenario}/ref_{scenario}_{regret_type}_idx{policy_index}/",
                path_to_output=base_path +"/"+ "plots", #"./data/temporary", #/rbf_dist_test
                projection= "natural earth1", 
                colourmap= px.colors.sequential.Reds,
                year_to_visualize=2050,
                input_data_path_list=[


                    f"{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP126_{variable_name}.npy",
                    f"{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP245_{variable_name}.npy",
                    f"{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP370_{variable_name}.npy",
                    f"{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP460_{variable_name}.npy",
                    f"{ethical_framing}_ref_{scenario}_{regret_type}_idx{policy_index}_{variable_name}_idx{policy_index}_SSP534_{variable_name}.npy",


                ],
                    
                data_label="Emission Control Rate",
                legend_label="", 
                data_normalization=True,
                saving=True,
                show_colorbar=False,
                normalized_colorbar=True,
                plot_saving_format="svg",

            )

            fig.show()



Minimum Regret Policy Indices:
Scenario: SSP2
  Ethical Framing: UTILITARIAN
    Regret Type: Welfare_Regret, Policy Index: 9
Taking average over the last dimension.
Taking average over the last dimension.
Taking average over the last dimension.
Taking average over the last dimension.
Taking average over the last dimension.
0
1
2
3
4


  Ethical Framing: PRIORITARIAN
    Regret Type: Welfare_Regret, Policy Index: 4
Taking average over the last dimension.
Taking average over the last dimension.
Taking average over the last dimension.
Taking average over the last dimension.
Taking average over the last dimension.
0
1
2
3
4


# Feature Importance Analysis

In [1]:
from justice.util.feature_importance import build_long_dataframe, run_all_ml_importance


long_df = build_long_dataframe(
    base_path="data/temporary/NU_DATA/mmBorg/",
    region_mapping_path="data/input/12_regions.json",
    rice_region_dict_path="data/input/rice50_regions_dict.json",
    years_of_interest=(2030, 2050, 2070, 2100),
)

print("Long DF shape:", long_df.shape)


# # 2) Run CatBoost + SHAP for mean/median/P90, both global and per-region
# #    Plots are saved in ml_importance_plots/<scope>/<stat>/...
results = run_all_ml_importance(
    long_df=long_df,
    years=(2030, 2050, 2070, 2100),
    target_stats=("median", "p90"), #("mean", "median", "p90"),
    output_dir="ml_importance_plots",
    cv_folds=5,
    random_state=42,
    model_params=dict(
        depth=6,
        learning_rate=0.05,
        n_estimators=800,  # upper bound; early stopping finds best < this in CV
        l2_leaf_reg=3.0,
        loss_function="RMSE",  # overridden per statistic internally
        random_seed=42,
        od_type="Iter",
        od_wait=50,
        use_best_model=True,
        verbose=False,
        allow_writing_files=False,
    ),
    normalized_plots=True,  # set False to see raw mean |SHAP|
    model_type="final",  # "final" or "cv-mean"
)




Long DF shape: (5205200, 9)
Saving plots to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/global/median
Saving feature importance data to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/global/median/global_2030_shap_full.csv
Saving feature importance data to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/global/median/global_2050_shap_full.csv
Saving feature importance data to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/global/median/global_2070_shap_full.csv
Saving feature importance data to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/global/median/global_2100_shap_full.csv
Saving plots to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/regional/median
Saving feature importance data to: /Users/palokbiswas/Desktop/pollockdevis_git/JUSTICE/ml_importance_plots/regional/median/Brazil_2030_shap_full.csv
Saving feature importance d

In [2]:
long_df.head()

Unnamed: 0,Optimization,Regret,Scenario,Welfare,Region,Year,Sample,AbatedEmission,Scope
0,SSP1,Temperature_Regret,SSP126,UTILITARIAN,Rest of the World,2030,0,0.447325,Regional
1,SSP1,Temperature_Regret,SSP126,UTILITARIAN,Rest of the World,2030,1,0.447762,Regional
2,SSP1,Temperature_Regret,SSP126,UTILITARIAN,Rest of the World,2030,2,0.449517,Regional
3,SSP1,Temperature_Regret,SSP126,UTILITARIAN,Rest of the World,2030,3,0.447558,Regional
4,SSP1,Temperature_Regret,SSP126,UTILITARIAN,Rest of the World,2030,4,0.448138,Regional


In [4]:
# Get the unique variable names in each column except AbatedEmissions
for col in long_df.columns:
    if col != "AbatedEmissions":
        unique_vars = long_df[col].unique()
        print(f"Column: {col}, Unique Variables: {unique_vars}")

Column: Optimization, Unique Variables: ['SSP1', 'SSP2', 'SSP3', 'SSP4', 'SSP5']
Categories (5, object): ['SSP1', 'SSP2', 'SSP3', 'SSP4', 'SSP5']
Column: Regret, Unique Variables: ['Temperature_Regret', 'Welfare_Regret']
Categories (2, object): ['Temperature_Regret', 'Welfare_Regret']
Column: Scenario, Unique Variables: ['SSP126', 'SSP245', 'SSP370', 'SSP460', 'SSP534']
Categories (5, object): ['SSP126', 'SSP245', 'SSP370', 'SSP460', 'SSP534']
Column: Welfare, Unique Variables: ['UTILITARIAN', 'PRIORITARIAN']
Categories (2, object): ['PRIORITARIAN', 'UTILITARIAN']
Column: Region, Unique Variables: ['Rest of the World', 'Europe', 'Sub-Saharan Africa', 'Gulf Countries', 'Other High Income', ..., 'South Asia', 'Brazil', 'United States', 'Russia', 'Global']
Length: 13
Categories (13, object): ['Brazil', 'China', 'Europe', 'Global', ..., 'South Asia', 'Southeast Asia', 'Sub-Saharan Africa', 'United States']
Column: Year, Unique Variables: [2030 2050 2070 2100]
Column: Sample, Unique Variabl

In [3]:
import os
import re
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


def plot_grouped_stacked_feature_importance_from_csvs(
    base_dir,
    scope="global",            # "global" or "regional"
    stat="mean",               # "mean", "median", "p90"
    model_type="final",        # "final" -> shap_full.csv, "cv-mean" -> shap_cv.csv
    years=(2030, 2050, 2070, 2100),
    region=None,               # for regional plots: a specific region (underscored name); if None, plot all regions found
    output_file=None,          # path to save figure; for regional with multiple regions, region name will be appended
    normalized=True,           # assume CSVs are normalized if True (affects y-axis label text only)
    figsize=(9, 4),
    bar_width=0.6,             # width of the single stacked bar per year
    legend_fontsize=9          # smaller legend
):
    """
    Build a stacked bar chart with one bar per year:
      - For each year: a single stacked bar for Scenario + Regret + Welfare + Optimization.

    Styling:
      - White background, no top/right spines (keep left and bottom only)

    Expected directory structure from previous pipeline:
      base_dir/<scope>/<stat>/*.csv
        - Global filenames:  global_<year>_shap_full.csv or global_<year>_shap_cv.csv
        - Regional filenames: <region>_<year>_shap_full.csv or <region>_<year>_shap_cv.csv
          where <region> uses underscores instead of spaces.

    Returns a dict:
      {
        'data': DataFrame used for plotting (global or all regions stacked),
        'figure': Figure (global) or dict(region -> Figure) for regional
      }
    """
    # Fixed order and colors
    feature_order = ["Scenario", "Regret", "Welfare", "Optimization"]
    colors = {
        "Scenario": "#fd8d3c",
        "Regret": "#b2e2e2",
        "Welfare": "#66c2a4",
        "Optimization": "#238b45",
    }
    year_order = list(years)

    kind = "shap_full" if model_type == "final" else "shap_cv"
    root = Path(base_dir) / scope.lower() / stat.lower()
    if not root.exists():
        raise FileNotFoundError(f"Directory not found: {root}")

    # Helper to read a single CSV and return Series Feature->Importance (ensuring all features present)
    def read_importance_csv(path: Path):
        if not path.exists():
            return None
        df = pd.read_csv(path)
        # Expect columns: Feature, Importance
        s = pd.Series(df["Importance"].values, index=df["Feature"].values)
        for f in feature_order:
            if f not in s.index:
                s.loc[f] = 0.0
        return s[feature_order]

    # Seaborn style
    sns.set_theme(style="white")

    # GLOBAL case
    if scope.lower() == "global":
        # Build plotting dataframe
        rows = []
        for yr in year_order:
            fpath = root / f"global_{yr}_{kind}.csv"
            s = read_importance_csv(fpath)
            if s is None:
                continue
            rows.append({
                "Year": yr,
                "Scenario": float(s["Scenario"]),
                "Regret": float(s["Regret"]),
                "Welfare": float(s["Welfare"]),
                "Optimization": float(s["Optimization"]),
            })
        if not rows:
            raise FileNotFoundError(f"No CSVs found for scope=global, stat={stat}, kind={kind} in {root}")

        df_plot = pd.DataFrame(rows)
        df_plot["Year"] = pd.Categorical(df_plot["Year"], categories=year_order, ordered=True)

        # Matplotlib single stacked bar per year
        fig, ax = plt.subplots(figsize=figsize)
        x = range(len(df_plot))
        bottoms = [0.0] * len(df_plot)

        # Stack in the fixed order
        for feat in feature_order:
            ax.bar(
                x,
                df_plot[feat],
                width=bar_width,
                bottom=bottoms,
                color=colors[feat],
                label=feat
            )
            bottoms = [b + v for b, v in zip(bottoms, df_plot[feat])]

        # X-axis: years
        ax.set_xticks(list(x))
        ax.set_xticklabels([str(y) for y in df_plot["Year"]])

        # Style spines
        sns.despine(ax=ax, top=True, right=True, left=False, bottom=False)
        ax.set_xlabel("")
        ax.set_ylabel("Importance" + (" (normalized)" if normalized else ""))

        # Legend: 1 column at far right, outside the axes
        handles, labels = ax.get_legend_handles_labels()
        uniq = dict(zip(labels, handles))
        ordered_handles = [uniq[l] for l in feature_order if l in uniq]
        ordered_labels = [l for l in feature_order if l in uniq]
        ax.legend(
            ordered_handles,
            ordered_labels,
            title="",
            frameon=False,
            fontsize=legend_fontsize,
            ncol=1,
            loc="upper left",
            bbox_to_anchor=(1.02, 1.0),  # push legend outside to the right
            borderaxespad=0.0
        )

        if output_file:
            Path(output_file).parent.mkdir(parents=True, exist_ok=True)
            fig.savefig(output_file, dpi=300, bbox_inches="tight")
            plt.close(fig)
        else:
            plt.show()

        return {"data": df_plot, "figure": fig}

    # REGIONAL case
    else:
        # Discover regions by scanning files
        pattern = re.compile(rf"^(?P<region>.+)_(?P<year>\d{{4}})_{kind}\.csv$")
        files = [p for p in root.glob("*.csv") if p.is_file()]
        region_set = set()
        for p in files:
            m = pattern.match(p.name)
            if not m:
                continue
            yy = int(m.group("year"))
            if yy in years:
                region_set.add(m.group("region"))

        if region:
            region_list = [region]
        else:
            region_list = sorted(region_set)

        if not region_list:
            raise FileNotFoundError(f"No regional CSVs found for stat={stat}, kind={kind} in {root}")

        figs = {}
        all_rows = []

        for rgn in region_list:
            rows = []
            for yr in year_order:
                fpath = root / f"{rgn}_{yr}_{kind}.csv"
                s = read_importance_csv(fpath)
                if s is None:
                    continue
                rows.append({
                    "Region": rgn,
                    "Year": yr,
                    "Scenario": float(s["Scenario"]),
                    "Regret": float(s["Regret"]),
                    "Welfare": float(s["Welfare"]),
                    "Optimization": float(s["Optimization"]),
                })
            if not rows:
                continue

            df_plot = pd.DataFrame(rows)
            df_plot["Year"] = pd.Categorical(df_plot["Year"], categories=year_order, ordered=True)

            fig, ax = plt.subplots(figsize=figsize)
            x = range(len(df_plot))
            bottoms = [0.0] * len(df_plot)

            for feat in feature_order:
                ax.bar(
                    x,
                    df_plot[feat],
                    width=bar_width,
                    bottom=bottoms,
                    color=colors[feat],
                    label=feat
                )
                bottoms = [b + v for b, v in zip(bottoms, df_plot[feat])]

            ax.set_xticks(list(x))
            ax.set_xticklabels([str(y) for y in df_plot["Year"]])

            sns.despine(ax=ax, top=True, right=True, left=False, bottom=False)
            ax.set_xlabel("")
            ax.set_ylabel("Importance" + (" (normalized)" if normalized else ""))
            ax.set_title(rgn.replace("_", " "))

            # Legend: 1 column at far right, outside the axes
            handles, labels = ax.get_legend_handles_labels()
            uniq = dict(zip(labels, handles))
            ordered_handles = [uniq[l] for l in feature_order if l in uniq]
            ordered_labels = [l for l in feature_order if l in uniq]
            ax.legend(
                ordered_handles,
                ordered_labels,
                title="",
                frameon=False,
                fontsize=legend_fontsize,
                ncol=1,
                loc="upper left",
                bbox_to_anchor=(1.02, 1.0),
                borderaxespad=0.0
            )

            if output_file:
                outpath = Path(output_file)
                outname = f"{outpath.stem}_{rgn}{outpath.suffix}"
                Path(outpath.parent).mkdir(parents=True, exist_ok=True)
                fig.savefig(Path(outpath.parent) / outname, dpi=300, bbox_inches="tight")
                plt.close(fig)
            else:
                plt.show()

            figs[rgn] = fig
            all_rows.append(df_plot)

        df_all = pd.concat(all_rows, ignore_index=True) if all_rows else pd.DataFrame()
        return {"data": df_all, "figure": figs}


def render_all_grouped_stacked_charts(
    base_dir,
    scope="global",
    stat="mean",
    model_type="final",       # "final" or "cv-mean"
    years=(2030, 2050, 2070, 2100),
    output_dir=None,          # directory to save figures; if None, show interactively
    normalized=True,
    figsize=(9, 6),
    legend_fontsize=9,
):
    """
    Convenience wrapper that renders stacked charts from saved CSVs.
    For scope='regional' it will create one figure per region.
    """
    if output_dir is not None:
        Path(output_dir).mkdir(parents=True, exist_ok=True)

    if scope.lower() == "global":
        outfile = None if output_dir is None else str(Path(output_dir) / f"global_{stat}_{model_type}_stacked.png")
        return plot_grouped_stacked_feature_importance_from_csvs(
            base_dir=base_dir, scope="global", stat=stat, model_type=model_type,
            years=years, region=None, output_file=outfile, normalized=normalized,
            figsize=figsize, legend_fontsize=legend_fontsize
        )
    else:
        outfile = None if output_dir is None else str(Path(output_dir) / f"regional_{stat}_{model_type}_stacked.png")
        return plot_grouped_stacked_feature_importance_from_csvs(
            base_dir=base_dir, scope="regional", stat=stat, model_type=model_type,
            years=years, region=None, output_file=outfile, normalized=normalized,
            figsize=figsize, legend_fontsize=legend_fontsize
        )

# Example usage:
base_dir = "data/temporary/NU_DATA/mmBorg/ml_importance_plots"
render_all_grouped_stacked_charts(base_dir, scope="global", stat="median", model_type="final", years=(2030,2050,2070,2100), output_dir="figs")
# render_all_grouped_stacked_charts(base_dir, scope="regional", stat="p90", model_type="cv-mean", years=(2030,2050,2070,2100), output_dir="figs")

FileNotFoundError: Directory not found: data/temporary/NU_DATA/mmBorg/ml_importance_plots/global/median