In [276]:
import pandas as pd
import numpy as np

In [277]:
elevate_scens = pd.read_csv(
    r"X:\user\dekkerm\Data\ELEVATE\ELEVATE_Data_D2.3_vetted_20250211.csv"
)
engage_scens = pd.read_csv(
    r"X:\user\dekkerm\Data\ENGAGE\PolicyScenarios\ENGAGE_internal_2610_onlyemis.csv"
)

In [278]:
# Filter for variables and scenarios
variables = ["Emissions|Kyoto Gases", "Emissions|CO2"]
# scenarios = {"CurPol": "ELV-SSP2-CP-D0", "CurPol_national": "ELV-SSP2-CP-D0", "NDC": "ELV-SSP2-NDC-D0", "NetZero": "ELV-SSP2-LTS"}
scenarios = {
    "ELV-SSP2-CP-D0": "CurPol",
    "ELV-SSP2-CP-D0-N": "CurPol",
    "Current Policies": "CurPol",
    "ELV-SSP2-NDC-D0": "NDC",
    "ELV-SSP2-LTS": "NetZero"
}
elevate_scens_filtered = elevate_scens[elevate_scens["Variable"].isin(variables)]
elevate_scens_filtered = elevate_scens_filtered[elevate_scens_filtered["Scenario"].isin(scenarios.keys())]

# Rename columns: Remove leading 'X' from year columns
elevate_scens_filtered.columns = [
    col[1:] if col.startswith("X") and col[1:].isdigit() else col
    for col in elevate_scens_filtered.columns
]

#Rename scenarios
elevate_scens_filtered["Scenario"] = elevate_scens_filtered["Scenario"].replace(scenarios)

### Expand regions

In [279]:
# Split the column by '|' and expand into new columns
split_columns = elevate_scens_filtered['Region'].str.split('|', expand=True)

# Rename the new columns (optional)
split_columns.columns = ['Model_2', 'Region_2']

# Add the new columns to the original DataFrame
elevate_scens_filtered = pd.concat([elevate_scens_filtered, split_columns], axis=1)

In [280]:
elevate_scens_filtered.columns

Index(['Model', 'Scenario', 'Region', 'Variable', 'Unit', '1990', '1995',
       '2000', '2005', '2010', '2015', '2016', '2017', '2020', '2021', '2025',
       '2030', '2035', '2040', '2045', '2050', '2055', '2060', '2065', '2070',
       '2075', '2080', '2085', '2090', '2095', '2100', 'Model_2', 'Region_2'],
      dtype='object')

In [281]:
#Change order of columns
elevate_scens_filtered = elevate_scens_filtered[['Model', 'Model_2', 'Scenario', 'Region', 'Region_2', 'Variable', 'Unit', '1990', '1995',
       '2000', '2005', '2010', '2015', '2016', '2017', '2020', '2021', '2025',
       '2030', '2035', '2040', '2045', '2050', '2055', '2060', '2065', '2070',
       '2075', '2080', '2085', '2090', '2095', '2100']]

In [282]:
# Replace values in Model_2 with NaN if they are the same as in Model to filter out regions
elevate_scens_filtered['Model_2'] = np.where(
    elevate_scens_filtered['Model_2'] == elevate_scens_filtered['Model'],
    np.nan,
    elevate_scens_filtered['Model_2']
)

In [283]:
# Merge the two columns into a new column 'Region_cleaned', drop old columns and rearrange order
elevate_scens_filtered["Region_cleaned"] = elevate_scens_filtered[
    "Model_2"
].combine_first(elevate_scens_filtered["Region_2"])



In [284]:
elevate_scens_filtered.columns

Index(['Model', 'Model_2', 'Scenario', 'Region', 'Region_2', 'Variable',
       'Unit', '1990', '1995', '2000', '2005', '2010', '2015', '2016', '2017',
       '2020', '2021', '2025', '2030', '2035', '2040', '2045', '2050', '2055',
       '2060', '2065', '2070', '2075', '2080', '2085', '2090', '2095', '2100',
       'Region_cleaned'],
      dtype='object')

In [285]:
#Rename regions
elevate_scens_filtered["Region_cleaned"] = elevate_scens_filtered[
    "Region_cleaned"
].replace("United States of America", "USA")
elevate_scens_filtered["Region_cleaned"] = elevate_scens_filtered[
    "Region_cleaned"
].replace(["Southeast Asia", "South-East Asia", "South East Asia"], "Southeast Asia")
elevate_scens_filtered["Region_cleaned"] = elevate_scens_filtered[
    "Region_cleaned"
].replace("World", "EARTH")

elevate_scens_filtered.drop(columns=['Model_2', 'Region_2'], inplace=True)
# elevate_scens_filtered = elevate_scens_filtered[['Model', 'Scenario', 'Region', 'Region_cleaned', 'Variable', 'Unit', '1990', '1995',
#        '2000', '2005', '2010', '2015', '2016', '2017', '2020', '2021', '2025',
#        '2030', '2035', '2040', '2045', '2050', '2055', '2060', '2065', '2070',
#        '2075', '2080', '2085', '2090', '2095', '2100']]


# Dynamically select the first six columns and all year columns
columns_to_keep = [
    "Model",
    "Scenario",
    "Region",
    "Region_cleaned",
    "Variable",
    "Unit",
] + [col for col in elevate_scens_filtered.columns if col.isdigit()]

# Reorder the DataFrame
elevate_scens_filtered = elevate_scens_filtered[columns_to_keep]

In [286]:
elevate_scens_filtered.columns

Index(['Model', 'Scenario', 'Region', 'Region_cleaned', 'Variable', 'Unit',
       '1990', '1995', '2000', '2005', '2010', '2015', '2016', '2017', '2020',
       '2021', '2025', '2030', '2035', '2040', '2045', '2050', '2055', '2060',
       '2065', '2070', '2075', '2080', '2085', '2090', '2095', '2100'],
      dtype='object')

In [None]:
# Sort the dataframe by 'Region_cleaned' and reset the index
elevate_scens_filtered.sort_values(by=['Region_cleaned'], inplace=True)
elevate_scens_filtered.reset_index(drop=True, inplace=True)

# Add a new column 'Is_Duplicate' to indicate subsequent duplicates
elevate_scens_filtered['Is_Duplicate'] = elevate_scens_filtered.duplicated(subset=['Model', 'Scenario', 'Variable', '2025', '2100'], keep='first')

# Remove all rows that are Is_Duplicated = True
elevate_scens_deduplicated = elevate_scens_filtered[~elevate_scens_filtered['Is_Duplicate']]

elevate_scens_deduplicated.drop(columns=['Is_Duplicate', 'Region'], inplace=True)
elevate_scens_deduplicated.rename(columns={'Region_cleaned': 'Region'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  elevate_scens_deduplicated.drop(columns=['Is_Duplicate', 'Region'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  elevate_scens_deduplicated.rename(columns={'Region_cleaned': 'Region'}, inplace=True)
