# EMISSIONS/DAMAGES VALIDATION NOTEBOOK
- This notebook performs validations on estimates from the TARE model and unit tests for the functions used to calculate emissions and damages

### HEALTH RELATED EMISSIONS: VALIDATION of Grid Emissions Intensity Projections
- Assumes GEA Region and EPA eGRID subregions are the same - which they aren't
- Multiply emissions factors for the grid mix fuel sources (ef_pollutants_egrid) by the generation fraction (grid_mix_reg_full_delta)
- This creates a regional emissions factor. The delta scenario approximates long run marginal emissions rates by subtracting the MidCase generation from the High Electrification scenario generation
- The regional emissions factor (eGRID subregion/Cambium GEA Region) can then be multiplied by the EASIUR marginal social costs (Latitude/Longitude specific)

In [None]:
# Adjust for regional cost differences with RSMeans
filename = "grid_mix_reg_full_delta.csv"
relative_path = os.path.join(r"projections\schmitt_ev_study", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_grid_mix = pd.read_csv(file_path)

df_grid_mix = pd.DataFrame({
    'year': df_grid_mix['Year'],
    'cambium_gea_region': df_grid_mix['Cambium.GEA'],
    'fuel_source': df_grid_mix['Source'],
    'fraction_generation': df_grid_mix['Fraction'],
})
df_grid_mix

Retrieved data for filename: grid_mix_reg_full_delta.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\schmitt_ev_study\grid_mix_reg_full_delta.csv




Unnamed: 0,year,cambium_gea_region,fuel_source,fraction_generation
0,2022,AZNMc,Coal,0.000000
1,2023,AZNMc,Coal,0.000000
2,2024,AZNMc,Coal,0.000000
3,2025,AZNMc,Coal,0.137832
4,2026,AZNMc,Coal,0.275665
...,...,...,...,...
2895,2046,SRVCc,Renewable,0.868251
2896,2047,SRVCc,Renewable,0.883341
2897,2048,SRVCc,Renewable,0.898431
2898,2049,SRVCc,Renewable,0.913521


In [None]:
# Adjust for regional cost differences with RSMeans
filename = "ef_pollutants_egrid.csv"
relative_path = os.path.join(r"projections\schmitt_ev_study", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_grid_emis_factors = pd.read_csv(file_path)

df_grid_emis_factors = pd.DataFrame({
    'cambium_gea_region': df_grid_emis_factors['eGRID_subregion'],
    'fuel_source': df_grid_emis_factors['Fuel'],
    'pollutant': df_grid_emis_factors['Pollutant'],
    'emis_rate': df_grid_emis_factors['Emission_rate'],
    'unit': df_grid_emis_factors['Unit'],
})

mapping = {
    'AKGD': None,       # Alaska Grid - Not included
    'AKMS': None,       # Alaska Miscellaneous - Not included
    'AZNM': 'AZNMc',    # Arizona/New Mexico Power Area
    'CAMX': 'CAMXc',    # California Mexico
    'ERCT': 'ERCTc',    # Electric Reliability Council of Texas
    'FRCC': 'FRCCc',    # Florida Reliability Coordinating Council
    'HIMS': None,       # Hawaii Maui Subregion - Not included
    'HIOA': None,       # Hawaii Oahu Subregion - Not included
    'MROE': 'MROEc',    # Midwest Reliability Organization East
    'MROW': 'MROWc',    # Midwest Reliability Organization West
    'NEWE': 'NEWEc',    # New England
    'NWPP': 'NWPPc',    # Northwest Power Pool
    'NYCW': 'NYSTc',    # New York City/Westchester mapped to New York State
    'NYLI': 'NYSTc',    # New York Long Island mapped to New York State
    'NYUP': 'NYSTc',    # New York Upstate mapped to New York State
    'PRMS': None,       # Puerto Rico Miscellaneous - Not included
    'RFCE': 'RFCEc',    # ReliabilityFirst Corporation East
    'RFCM': 'RFCMc',    # ReliabilityFirst Corporation Midwest
    'RFCW': 'RFCWc',    # ReliabilityFirst Corporation West
    'RMPA': 'RMPAc',    # Rocky Mountain Power Area
    'SPNO': 'SPNOc',    # Southwest Power Pool North
    'SPSO': 'SPSOc',    # Southwest Power Pool South
    'SRMV': 'SRMVc',    # SERC Reliability Corporation Mississippi Valley
    'SRMW': 'SRMWc',    # SERC Reliability Corporation Midwest
    'SRSO': 'SRSOc',    # SERC Reliability Corporation South
    'SRTV': 'SRTVc',    # SERC Reliability Corporation Tennessee Valley
    'SRVC': 'SRVCc',    # SERC Reliability Corporation Virginia/Carolina
}

# Apply the mapping to the 'cambium_gea_region' column
df_grid_emis_factors['cambium_gea_region'] = df_grid_emis_factors['cambium_gea_region'].map(mapping)

# Drop rows where 'cambium_gea_region' is None (regions not included in the mapping)
df_grid_emis_factors = df_grid_emis_factors.dropna(subset=['cambium_gea_region']).reset_index(drop=True)

# Conversion factor from pounds to metric tons
lb_to_mt = 0.00045359237
perMWh_to_perkWh = 1/1000

# Apply the conversion where the unit is 'lb/MWh'
df_grid_emis_factors.loc[df_grid_emis_factors['unit'] == 'lb/MWh', 'emis_rate'] *= (lb_to_mt * perMWh_to_perkWh)
df_grid_emis_factors.loc[df_grid_emis_factors['unit'] == 'lb/MWh', 'unit'] = 'mt/kWh'

df_grid_emis_factors

Retrieved data for filename: ef_pollutants_egrid.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\schmitt_ev_study\ef_pollutants_egrid.csv




Unnamed: 0,cambium_gea_region,fuel_source,pollutant,emis_rate,unit
0,AZNMc,Coal,NOx,7.171295e-07,mt/kWh
1,CAMXc,Coal,NOx,1.243750e-06,mt/kWh
2,ERCTc,Coal,NOx,5.030339e-07,mt/kWh
3,FRCCc,Coal,NOx,2.326929e-07,mt/kWh
4,MROEc,Coal,NOx,3.451838e-07,mt/kWh
...,...,...,...,...,...
545,SRMVc,Renewables,VOC,0.000000e+00,mt/kWh
546,SRMWc,Renewables,VOC,0.000000e+00,mt/kWh
547,SRSOc,Renewables,VOC,0.000000e+00,mt/kWh
548,SRTVc,Renewables,VOC,0.000000e+00,mt/kWh


In [None]:
# HEALTH RELATED EMISSIONS VALIDATION
df_emis_factors_epa_egrid = process_Schmitt_emissions_data(df_grid_mix, df_grid_emis_factors)
# df_emis_factors_epa_egrid

# Convert the emissions factors dataframe into a lookup dictionary
lookup_electricity_emissions_egrid = df_emis_factors_epa_egrid.set_index(['year', 'cambium_gea_region']).to_dict('index')

# Display the lookup dictionary
lookup_electricity_emissions_egrid

Fuel sources in df_grid_mix: {'Coal', 'Natural Gas', 'Renewable', 'Oil', 'Nuclear'}
Fuel sources in df_grid_emis_factors: {'Renewables', 'Coal', 'Natural Gas', 'Oil', 'Nuclear'}


pollutant,year,cambium_gea_region,delta_egrid_nh3,delta_egrid_nox,delta_egrid_pm25,delta_egrid_so2,delta_egrid_voc
0,2022,AZNMc,4.284055e-11,9.898048e-09,4.965068e-11,3.457965e-09,1.953264e-11
1,2022,CAMXc,7.522755e-09,2.069686e-07,9.580178e-09,3.556412e-09,2.775051e-09
2,2022,ERCTc,4.432087e-09,4.835162e-07,4.317288e-08,1.320681e-06,1.101529e-08
3,2022,FRCCc,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
4,2022,MROEc,4.849361e-09,5.447874e-07,1.078161e-08,8.474062e-08,9.076871e-09
...,...,...,...,...,...,...,...
575,2050,SRMVc,1.411854e-09,1.876759e-08,2.205976e-09,5.428756e-09,6.989808e-10
576,2050,SRMWc,6.359354e-10,1.704447e-08,9.728582e-10,4.954324e-09,2.334395e-10
577,2050,SRSOc,4.354236e-10,1.003789e-08,1.331772e-09,3.118447e-09,4.730064e-10
578,2050,SRTVc,2.546688e-09,2.709200e-08,4.472789e-09,2.509969e-08,1.490707e-09


In [None]:
# # Check unique fuel sources in both dataframes
# fuel_sources_mix = set(df_grid_mix['fuel_source'].unique())
# fuel_sources_emis = set(df_grid_emis_factors['fuel_source'].unique())

# print("Fuel sources in df_grid_mix:", fuel_sources_mix)
# print("Fuel sources in df_grid_emis_factors:", fuel_sources_emis)

# # Merge the dataframes
# df_combined = pd.merge(
#     df_grid_mix,
#     df_grid_emis_factors,
#     on=['cambium_gea_region', 'fuel_source'],
#     how='inner'
# )

# # Calculate emissions contribution
# df_combined['emis_contribution'] = df_combined['fraction_generation'] * df_combined['emis_rate']

# # Sum emissions contributions
# df_emis_factors = df_combined.groupby(
#     ['year', 'cambium_gea_region', 'pollutant']
# )['emis_contribution'].sum().reset_index()
# df_emis_factors

### Baseline Marginal Damages: WHOLE-HOME

In [None]:
import numpy as np
import pandas as pd

# Constants
TD_LOSSES = 0.06
TD_LOSSES_MULTIPLIER = 1 / (1 - TD_LOSSES)
EQUIPMENT_SPECS = {'heating': 15, 'waterHeating': 12, 'clothesDrying': 13, 'cooking': 15}

def calculate_marginal_damages(df, menu_mp, policy_scenario, df_summary):
    """
    Calculate marginal damages of pollutants based on equipment usage, emissions, and policy scenarios.

    Parameters:
        df (DataFrame): Input data with emissions and consumption data.
        menu_mp (int): Measure package identifier.
        policy_scenario (str): Specifies the policy scenario ('No Inflation Reduction Act' or 'AEO2023 Reference Case').
        df_summary (DataFrame): Summary DataFrame to store aggregated results.

    Returns:
        DataFrame: Updated DataFrame with calculated marginal emissions and damages.
    """
    df_copy = df.copy()

    # Define policy-specific settings
    scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(menu_mp, policy_scenario)

    # Precompute HDD adjustment factors by region and year
    hdd_factors_per_year = precompute_hdd_factors(df_copy)

    # Compute marginal damages based on grid scenario
    df_new_columns = calculate_damages_grid_scenario(
        df_copy, df_summary, menu_mp, TD_LOSSES_MULTIPLIER, emis_electricity_lookup,
        cambium_scenario, scenario_prefix, hdd_factors_per_year
    )

    # Handle overlapping columns
    overlapping_columns = df_new_columns.columns.intersection(df_copy.columns)
    if not overlapping_columns.empty:
        df_copy.drop(columns=overlapping_columns, inplace=True)

    # Merge newly calculated columns
    df_copy = df_copy.join(df_new_columns, how='left')
    return df_copy


def define_scenario_settings(menu_mp, policy_scenario):
    """
    Define scenario-specific settings based on menu and policy inputs.

    Parameters:
        menu_mp (int): Measure package identifier.
        policy_scenario (str): Policy scenario.

    Returns:
        Tuple: (scenario_prefix, cambium_scenario, emis_electricity_lookup)
    """
    if menu_mp == 0:
        return "baseline_", "MidCase", emis_preIRA_co2e_cambium21_lookup

    if policy_scenario == 'No Inflation Reduction Act':
        return f"preIRA_mp{menu_mp}_", "MidCase", emis_preIRA_co2e_cambium21_lookup

    if policy_scenario == 'AEO2023 Reference Case':
        return f"iraRef_mp{menu_mp}_", "MidCase", emis_IRA_co2e_cambium22_lookup

    raise ValueError("Invalid Policy Scenario! Choose 'No Inflation Reduction Act' or 'AEO2023 Reference Case'.")


def precompute_hdd_factors(df):
    """
    Precompute heating degree day (HDD) factors for each region and year.

    Parameters:
        df (DataFrame): Input data.

    Returns:
        dict: HDD factors mapped by year and region.
    """
    max_lifetime = max(EQUIPMENT_SPECS.values())
    hdd_factors_per_year = {}
    for year_label in range(2024, 2024 + max_lifetime + 1):
        # Map census_division to HDD factors
        hdd_factors = df['census_division'].map(
            lambda x: hdd_factor_lookup.get(x, hdd_factor_lookup['National']).get(year_label, 1.0)
        )
        hdd_factors_per_year[year_label] = hdd_factors
    return hdd_factors_per_year


def calculate_damages_grid_scenario(df, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup,
                                    cambium_scenario, scenario_prefix, hdd_factors):
    """
    Calculate damages for electricity grid emissions under different scenarios.

    Parameters:
        df (DataFrame): Input DataFrame.
        df_summary (DataFrame): DataFrame to store summary results.
        menu_mp (int): Measure package identifier.
        td_losses_multiplier (float): Adjusted factor for transmission/distribution losses.
        emis_electricity_lookup (dict): Lookup for emissions data.
        cambium_scenario (str): Scenario identifier for emissions data.
        scenario_prefix (str): Prefix for column naming.
        hdd_factors (dict): Precomputed HDD adjustment factors.

    Returns:
        DataFrame: New columns with calculated emissions and damages.
    """
    new_columns_data = {}
    for category, lifetime in EQUIPMENT_SPECS.items():
        for mer_type in ['lrmer', 'srmer']:
            process_emissions_for_category(
                df, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup,
                cambium_scenario, scenario_prefix, hdd_factors, new_columns_data,
                category, lifetime, mer_type
            )

    return pd.DataFrame(new_columns_data, index=df.index)


def process_emissions_for_category(df, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup,
                                   cambium_scenario, scenario_prefix, hdd_factors, new_columns_data,
                                   category, lifetime, mer_type):
    """
    Process emissions and damages for a specific category and MER type.

    Parameters:
        df (DataFrame): Input DataFrame.
        df_summary (DataFrame): Summary DataFrame.
        category (str): Equipment category (e.g., heating, cooking).
        lifetime (int): Equipment lifetime in years.
        mer_type (str): Marginal emissions type ('lrmer' or 'srmer').
        hdd_factors (dict): Precomputed HDD factors.
    """
    lifetime_emissions = np.zeros(len(df))
    lifetime_damages = np.zeros(len(df))

    for year in range(1, lifetime + 1):
        year_label = year + 2023
        emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)

        # Calculate electricity emissions
        emis_electricity = calculate_electricity_emissions(
            df, category, hdd_factors[year_label], td_losses_multiplier, emis_electricity_lookup,
            cambium_scenario, year_label, mer_type
        )

        # Calculate fossil fuel emissions
        fossil_fuel_emissions = calculate_fossil_fuel_emissions(
            df, category, hdd_factors[year_label], emission_factors=emis_factors_fossil_fuels
        )

        # Total emissions and damages
        total_emissions = fossil_fuel_emissions + emis_electricity
        total_damages = total_emissions * EPA_SCC_USD2023_PER_TON

        # Store results
        new_columns_data[emis_col] = np.round(total_emissions, 2)
        new_columns_data[damage_col] = np.round(total_damages, 2)
        lifetime_emissions += total_emissions
        lifetime_damages += total_damages

    # Lifetime and avoided emissions
    store_lifetime_and_avoided_emissions(df, df_summary, scenario_prefix, category, mer_type,
                                         lifetime_emissions, lifetime_damages, new_columns_data)


def generate_column_names(scenario_prefix, year_label, category, mer_type):
    """
    Generate column names for emissions and damages.

    Parameters:
        scenario_prefix (str): Scenario prefix.
        year_label (int): Year of calculation.
        category (str): Equipment category.
        mer_type (str): Marginal emissions type.

    Returns:
        tuple: Emission and damage column names.
    """
    emis_col = f'{scenario_prefix}{year_label}_{category}_tons_co2e_{mer_type}'
    damage_col = f'{scenario_prefix}{year_label}_{category}_damages_climate_{mer_type}'
    return emis_col, damage_col


def calculate_electricity_emissions(df, category, hdd_factor, td_losses_multiplier,
                                    emis_electricity_lookup, cambium_scenario, year_label, mer_type):
    """
    Calculate electricity emissions for a category.

    Returns:
        Series: Calculated emissions.
    """
    # Precompute emission factors for each gea_region
    emis_factors = df['gea_region'].map(
        lambda gea_region: emis_electricity_lookup.get(
            (cambium_scenario, gea_region), {}
        ).get(year_label, {}).get(f'{mer_type}_co2e', 0)
    )

    return (
        df[f'base_electricity_{category}_consumption'] *
        hdd_factor * td_losses_multiplier *
        emis_factors.fillna(0)
    )


def calculate_fossil_fuel_emissions(df, category, hdd_factor, emission_factors=emis_factors_fossil_fuels):
    """
    Calculate fossil fuel emissions for a category using the provided emission factors lookup dictionary.

    Parameters:
        df (DataFrame): Input DataFrame containing fuel consumption data.
        category (str): Equipment category (e.g., 'heating', 'cooking').
        hdd_factor (float or Series): Heating Degree Day adjustment factor.
        emission_factors (dict): Lookup dictionary for emission factors.

    Returns:
        Series: Combined fossil fuel emissions (in tons CO2e).
    """
    # Access emission factors from the lookup dictionary
    natural_gas_factor = emission_factors['naturalGas']['co2e']
    propane_factor = emission_factors['propane']['co2e']
    fuel_oil_factor = emission_factors['fuelOil']['co2e']

    # Calculate emissions for each fuel type
    emis_naturalGas = (
        df[f'base_naturalGas_{category}_consumption'] * hdd_factor * natural_gas_factor
    )
    emis_propane = (
        df[f'base_propane_{category}_consumption'] * hdd_factor * propane_factor
    )

    # Fuel oil is not used for cooking or clothes drying
    if category not in ['cooking', 'clothesDrying']:
        emis_fuelOil = (
            df[f'base_fuelOil_{category}_consumption'] * hdd_factor * fuel_oil_factor
        )
    else:
        emis_fuelOil = pd.Series(0, index=df.index)

    # Sum the emissions from all applicable fuel types
    total_emissions = (
        emis_naturalGas.fillna(0) +
        emis_propane.fillna(0) +
        emis_fuelOil.fillna(0)
    )

    return total_emissions


def store_lifetime_and_avoided_emissions(df, df_summary, scenario_prefix, category, mer_type,
                                         lifetime_emissions, lifetime_damages, new_columns_data):
    """
    Store lifetime and avoided emissions in the summary DataFrame and new columns.
    """
    lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
    lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'

    new_columns_data[lifetime_emissions_col] = np.round(lifetime_emissions, 2)
    new_columns_data[lifetime_damages_col] = np.round(lifetime_damages, 2)

    df_summary[lifetime_emissions_col] = np.round(lifetime_emissions, 2)
    df_summary[lifetime_damages_col] = np.round(lifetime_damages, 2)


In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------
""")
# Make copies from scenario consumption to keep df smaller
print("\n", "Creating dataframe to store marginal damages calculations ...")
df_baseline_scenario_damages = df_euss_am_baseline_home.copy()

# calculate_marginal_damages(df, menu_mp, policy_scenario)
df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
                                                      menu_mp=menu_mp,
                                                      policy_scenario='No Inflation Reduction Act',
                                                      df_summary=df_baseline_scenario_damages
                                                     )
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------


 Creating dataframe to store marginal damages calculations ...


Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,reeds_balancing_area,gea_region,state,city,...,baseline_2035_cooking_tons_co2e_srmer,baseline_2035_cooking_damages_climate_srmer,baseline_2036_cooking_tons_co2e_srmer,baseline_2036_cooking_damages_climate_srmer,baseline_2037_cooking_tons_co2e_srmer,baseline_2037_cooking_damages_climate_srmer,baseline_2038_cooking_tons_co2e_srmer,baseline_2038_cooking_damages_climate_srmer,baseline_cooking_lifetime_tons_co2e_srmer,baseline_cooking_lifetime_damages_climate_srmer
2,239,1690.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,0.03,5.75,0.03,5.66,0.02,5.57,0.02,5.48,0.40,89.27
3,273,1690.0,South,East South Central,East South Central,Mixed-Humid,90,SRSOc,AL,In another census Place,...,0.43,95.47,0.42,94.00,0.41,92.47,0.41,90.96,6.62,1481.71
4,307,1220.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,0.43,95.47,0.42,94.00,0.41,92.47,0.41,90.96,6.62,1481.71
5,409,1220.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,0.03,5.96,0.03,5.87,0.03,5.77,0.03,5.68,0.41,92.51
7,517,1220.0,South,East South Central,East South Central,Mixed-Humid,89,SRSOc,AL,In another census Place,...,0.30,67.03,0.30,66.00,0.29,64.93,0.29,63.87,4.65,1040.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548905,548109,1690.0,West,Mountain,Mountain North,Cold,23,RMPAc,WY,In another census Place,...,0.31,69.28,0.30,67.07,0.30,66.81,0.30,66.54,4.91,1098.83
548907,548226,2176.0,West,Mountain,Mountain North,Cold,23,RMPAc,WY,In another census Place,...,0.31,69.28,0.30,67.07,0.30,66.81,0.30,66.54,4.91,1098.83
548908,548228,1690.0,West,Mountain,Mountain North,Cold,24,RMPAc,WY,Not in a census Place,...,0.31,69.28,0.30,67.07,0.30,66.81,0.30,66.54,4.91,1098.83
548910,548417,885.0,West,Mountain,Mountain North,Cold,24,RMPAc,WY,Casper,...,0.37,83.10,0.36,80.45,0.36,80.13,0.36,79.81,5.89,1317.96


In [None]:
import unittest
import pandas as pd
import numpy as np

# Mock data and constants for testing
EQUIPMENT_SPECS = {'heating': 15, 'waterHeating': 12, 'clothesDrying': 13, 'cooking': 15}

emis_preIRA_co2e_cambium21_lookup = {
    ('MidCase', 'Region1'): {
        2024: {'lrmer_co2e': 0.5, 'srmer_co2e': 0.6},
        2025: {'lrmer_co2e': 0.4, 'srmer_co2e': 0.5},
    },
    ('MidCase', 'Region2'): {
        2024: {'lrmer_co2e': 0.3, 'srmer_co2e': 0.35},
        2025: {'lrmer_co2e': 0.25, 'srmer_co2e': 0.3},
    },
}

emis_IRA_co2e_cambium22_lookup = emis_preIRA_co2e_cambium21_lookup  # Using the same for simplicity

EPA_SCC_USD2023_PER_TON = 51  # Example value

emis_factors_fossil_fuels = {
    'naturalGas': {'co2e': 0.2},
    'propane': {'co2e': 0.25},
    'fuelOil': {'co2e': 0.3},
}

hdd_factor_lookup = {
    'Division1': {2024: 1.1, 2025: 1.0},
    'Division2': {2024: 0.9, 2025: 0.95},
    'National': {2024: 1.0, 2025: 1.0},
}

class TestMarginalDamagesCalculations(unittest.TestCase):
    def setUp(self):
        # Create a sample DataFrame for testing
        data = {
            'census_division': ['Division1', 'Division2'],
            'gea_region': ['Region1', 'Region2'],
        }
        
        # Add consumption data for all equipment categories
        for category in EQUIPMENT_SPECS.keys():
            data[f'base_electricity_{category}_consumption'] = [1000, 2000]
            data[f'base_naturalGas_{category}_consumption'] = [500, 600]
            data[f'base_propane_{category}_consumption'] = [300, 400]
            # Fuel oil is not used for 'cooking' and 'clothesDrying'
            if category not in ['cooking', 'clothesDrying']:
                data[f'base_fuelOil_{category}_consumption'] = [200, 300]
        
        self.df = pd.DataFrame(data)
        # Sample df_summary DataFrame
        self.df_summary = pd.DataFrame(index=self.df.index)

    # (Your test methods remain the same.)

    def test_define_scenario_settings(self):
        # Test with menu_mp = 0
        scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(0, 'AEO2023 Reference Case')
        self.assertEqual(scenario_prefix, 'baseline_')
        self.assertEqual(cambium_scenario, 'MidCase')
        self.assertEqual(emis_electricity_lookup, emis_preIRA_co2e_cambium21_lookup)

        # Test with 'No Inflation Reduction Act' policy
        scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(1, 'No Inflation Reduction Act')
        self.assertEqual(scenario_prefix, 'preIRA_mp1_')
        self.assertEqual(emis_electricity_lookup, emis_preIRA_co2e_cambium21_lookup)

        # Test with 'AEO2023 Reference Case' policy
        scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(2, 'AEO2023 Reference Case')
        self.assertEqual(scenario_prefix, 'iraRef_mp2_')
        self.assertEqual(emis_electricity_lookup, emis_IRA_co2e_cambium22_lookup)

        # Test with an invalid policy scenario
        with self.assertRaises(ValueError):
            define_scenario_settings(1, 'Invalid Policy')

    def test_precompute_hdd_factors(self):
        hdd_factors = precompute_hdd_factors(self.df)
        # Check that the HDD factors are correctly computed
        self.assertIn(2024, hdd_factors)
        self.assertIn(2025, hdd_factors)

        np.testing.assert_array_almost_equal(
            hdd_factors[2024],
            pd.Series([1.1, 0.9], index=self.df.index)
        )
        np.testing.assert_array_almost_equal(
            hdd_factors[2025],
            pd.Series([1.0, 0.95], index=self.df.index)
        )

    def test_calculate_electricity_emissions(self):
        hdd_factor = pd.Series([1.1, 0.9], index=self.df.index)
        td_losses_multiplier = 1 / (1 - 0.06)
        year_label = 2024
        mer_type = 'lrmer'
        cambium_scenario = 'MidCase'

        emis_electricity = calculate_electricity_emissions(
            self.df, 'heating', hdd_factor, td_losses_multiplier,
            emis_preIRA_co2e_cambium21_lookup, cambium_scenario, year_label, mer_type
        )

        expected_emissions = pd.Series([
            1000 * 1.1 * td_losses_multiplier * 0.5,
            2000 * 0.9 * td_losses_multiplier * 0.3
        ], index=self.df.index)

        np.testing.assert_array_almost_equal(emis_electricity, expected_emissions)

    def test_calculate_fossil_fuel_emissions(self):
        hdd_factor = pd.Series([1.1, 0.9], index=self.df.index)

        emissions = calculate_fossil_fuel_emissions(
            self.df, 'heating', hdd_factor, emission_factors=emis_factors_fossil_fuels
        )

        expected_emissions = pd.Series([
            500 * 1.1 * 0.2 + 300 * 1.1 * 0.25 + 200 * 1.1 * 0.3,
            600 * 0.9 * 0.2 + 400 * 0.9 * 0.25 + 300 * 0.9 * 0.3
        ], index=self.df.index)

        np.testing.assert_array_almost_equal(emissions, expected_emissions)

    def test_process_emissions_for_category(self):
        new_columns_data = {}
        category = 'heating'
        lifetime = EQUIPMENT_SPECS[category]
        mer_type = 'lrmer'
        scenario_prefix = 'test_'
        cambium_scenario = 'MidCase'
        td_losses_multiplier = TD_LOSSES_MULTIPLIER

        hdd_factors = precompute_hdd_factors(self.df)

        process_emissions_for_category(
            self.df, self.df_summary, 1, td_losses_multiplier,
            emis_preIRA_co2e_cambium21_lookup, cambium_scenario,
            scenario_prefix, hdd_factors, new_columns_data,
            category, lifetime, mer_type
        )

        # Check that new_columns_data has expected keys
        expected_columns = []
        for year in range(1, lifetime + 1):
            year_label = year + 2023
            emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)
            expected_columns.extend([emis_col, damage_col])

        # Add lifetime columns
        lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
        lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
        expected_columns.extend([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(new_columns_data.keys()), set(expected_columns))

    def test_calculate_damages_grid_scenario(self):
        scenario_prefix = 'test_'
        cambium_scenario = 'MidCase'
        td_losses_multiplier = TD_LOSSES_MULTIPLIER
        hdd_factors = precompute_hdd_factors(self.df)

        df_new_columns = calculate_damages_grid_scenario(
            self.df, self.df_summary, 1, td_losses_multiplier,
            emis_preIRA_co2e_cambium21_lookup, cambium_scenario,
            scenario_prefix, hdd_factors
        )

        # Check that df_new_columns has expected columns
        expected_columns = []
        for category, lifetime in EQUIPMENT_SPECS.items():
            for mer_type in ['lrmer', 'srmer']:
                for year in range(1, lifetime + 1):
                    year_label = year + 2023
                    emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)
                    expected_columns.extend([emis_col, damage_col])

                # Lifetime columns
                lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
                lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
                expected_columns.extend([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(df_new_columns.columns), set(expected_columns))

    def test_calculate_marginal_damages(self):
        # Run the full calculation and check outputs
        menu_mp = 1
        policy_scenario = 'No Inflation Reduction Act'

        df_result = calculate_marginal_damages(self.df, menu_mp, policy_scenario, self.df_summary)

        # Expected columns
        expected_columns = set(self.df.columns)
        scenario_prefix = 'preIRA_mp1_'
        for category, lifetime in EQUIPMENT_SPECS.items():
            for mer_type in ['lrmer', 'srmer']:
                for year in range(1, lifetime + 1):
                    year_label = year + 2023
                    emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)
                    expected_columns.update([emis_col, damage_col])

                # Lifetime columns
                lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
                lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
                expected_columns.update([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(df_result.columns), expected_columns)

        # Check that df_summary has the lifetime columns
        expected_summary_columns = []
        for category, lifetime in EQUIPMENT_SPECS.items():
            for mer_type in ['lrmer', 'srmer']:
                lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
                lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
                expected_summary_columns.extend([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(self.df_summary.columns), set(expected_summary_columns))

# if __name__ == '__main__':
#     unittest.main(argv=['first-arg-is-ignored'], exit=False)
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False, verbosity=2)



test_calculate_damages_grid_scenario (__main__.TestMarginalDamagesCalculations.test_calculate_damages_grid_scenario) ... ok
test_calculate_electricity_emissions (__main__.TestMarginalDamagesCalculations.test_calculate_electricity_emissions) ... ok
test_calculate_fossil_fuel_emissions (__main__.TestMarginalDamagesCalculations.test_calculate_fossil_fuel_emissions) ... ok
test_calculate_marginal_damages (__main__.TestMarginalDamagesCalculations.test_calculate_marginal_damages) ... ok
test_define_scenario_settings (__main__.TestMarginalDamagesCalculations.test_define_scenario_settings) ... ok
test_precompute_hdd_factors (__main__.TestMarginalDamagesCalculations.test_precompute_hdd_factors) ... ok
test_process_emissions_for_category (__main__.TestMarginalDamagesCalculations.test_process_emissions_for_category) ... ok

----------------------------------------------------------------------
Ran 7 tests in 0.231s

OK
