In [1]:
# Set columns in display
# pd.set_option('display.max_columns', None)
# pd.reset_option('display.max_columns') # Reset options to default

# Set rows in display
# pd.set_option('display.max_rows', None)
# pd.reset_option('display.max_rows') # Reset options to default

# Load Util File with TARE Model Functions

In [2]:
import os

# Measure Package 0: Baseline
menu_mp = 0
input_mp = 'baseline'

# Get the current working directory of the project
project_root = os.path.abspath(os.getcwd())
print(f"Project root directory: {project_root}")

# Relative path to the file from the project root
relative_path = r"tare_model_functions_v1.5.ipynb"

# Construct the absolute path to the file
file_path = os.path.join(project_root, relative_path)
print(f"File path: {file_path}")

# Run the notebook and import variables
if os.path.exists(relative_path):
    get_ipython().run_line_magic('run', f'-i "{relative_path}"')
    print("Loaded All TARE Model Functions")
else:
    print(f"File not found: {relative_path}")

Project root directory: c:\Users\14128\Research\cmu-tare-model
File path: c:\Users\14128\Research\cmu-tare-model\tare_model_functions_v1.5.ipynb
Loaded All TARE Model Functions


In [3]:
# Storing Result Outputs in output_results folder
relative_path = r"output_results"
output_folder_path = os.path.join(project_root, relative_path)
print(f"Result outputs will be exported here: {output_folder_path}")

Result outputs will be exported here: c:\Users\14128\Research\cmu-tare-model\output_results


# Simulate Residential Energy Consumption using NREL End-Use Savings Shapes
- Filter EUSS Data: Only occupied units and Single Family Homes



In [4]:
# The ``inline`` flag will use the appropriate backend to make figures appear inline in the notebook.  
%matplotlib inline

import pandas as pd
import numpy as np

# `plt` is an alias for the `matplotlib.pyplot` module
import matplotlib.pyplot as plt

# import seaborn library (wrapper of matplotlib)
import seaborn as sns
sns.set(style="darkgrid")

# For regex, import re
import re

from datetime import datetime

# Get the current datetime
# Start the timer
start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

In [5]:
print("""
-------------------------------------------------------------------------------------------------------
Welcome to the Trade-off Analysis of residential Retrofits for energy Equity Tool (TARE Model)
Let's start by reading the data from the NREL EUSS Database.

Make sure that the zipped folders stay organized as they are once unzipped.
If changes are made to the file path, then the program will not run properly.
-------------------------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------------------------
BASELINE (Measure Package 0)
-------------------------------------------------------------------------------------------------------
""")

# Measure Package 0: Baseline
menu_mp = 0
input_mp = 'baseline'

filename = "baseline_metadata_and_annual_results.csv"
relative_path = os.path.join(r"euss_data\resstock_amy2018_release_1.1\state", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

print("""
-------------------------------------------------------------------------------------------------------
Data Filters: Only occupied units and Single Family Homes
-------------------------------------------------------------------------------------------------------
""")

# Fix DtypeWarning error in columns:
# 'in.neighbors', 'in.geometry_stories_low_rise', 'in.iso_rto_region', 'in.pv_orientation', 'in.pv_system_size'
columns_to_string = {11: str, 61: str, 121: str, 103: str, 128: str, 129: str}
df_euss_am_baseline = pd.read_csv(file_path, dtype=columns_to_string)
occupancy_filter = df_euss_am_baseline['in.vacancy_status'] == 'Occupied'
df_euss_am_baseline = df_euss_am_baseline.loc[occupancy_filter]

# Filter for single family home building type
house_type_list = ['Single-Family Attached', 'Single-Family Detached']
house_type_filter = df_euss_am_baseline['in.geometry_building_type_recs'].isin(house_type_list)
df_euss_am_baseline = df_euss_am_baseline.loc[house_type_filter]
# df_euss_am_baseline


-------------------------------------------------------------------------------------------------------
Welcome to the Trade-off Analysis of residential Retrofits for energy Equity Tool (TARE Model)
Let's start by reading the data from the NREL EUSS Database.

Make sure that the zipped folders stay organized as they are once unzipped.
If changes are made to the file path, then the program will not run properly.
-------------------------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------------------------
BASELINE (Measure Package 0)
-------------------------------------------------------------------------------------------------------

Retrieved data for filename: baseline_metadata_and_annual_results.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\euss_data\resstock_amy2018_release_1.1\state\baseline_metadata_and_annual_results.csv

------------------------

In [6]:
# # Create a location ID for the name of the batch conversion file
# while True:
#     if menu_state == 'N':
#         location_id = 'National'
#         print("You chose to analyze all of the United States.")
#         break
#     elif menu_state == 'Y':
#         if menu_city == 'N':
#             try:
#                 location_id = str(input_state)
#                 print(f"Location ID is: {location_id}")
#                 break
#             except ValueError:
#                 print("Invalid input for state!")
#         elif menu_city == 'Y':
#             try:
#                 location_id = input_cityFilter.replace(', ', '_').strip()
#                 print(f"Location ID is: {location_id}")
#                 break
#             except AttributeError:
#                 print("Invalid input for city filter!")
#         else:
#             print("Incorrect state or city filter assignment!")
#     else:
#         print("Invalid data location. Check your inputs at the beginning of this notebook!")

In [7]:
# # Make a copy of the dataframe
# df_euss_am_baseline = df_euss_am_baseline.copy()

# Choose between national or sub-national level analysis
menu_state = get_menu_choice(menu_prompt, {'N', 'Y'})   # This code is only run in baseline

# National Level 
if menu_state == 'N':
    print("You chose to analyze all of the United States.")
    input_state = 'National'
    location_id = 'National'

# Filter down to state or city
else:
    input_state = get_state_choice(df_euss_am_baseline)    
    print(f"You chose to filter for: {input_state}")

    location_id = str(input_state)
    print(f"Location ID is: {location_id}")


    state_filter = df_euss_am_baseline['in.state'].eq(input_state)
    df_euss_am_baseline = df_euss_am_baseline.loc[state_filter]

    print(city_prompt)
    print(df_euss_am_baseline['in.city'].value_counts())

    menu_city = get_menu_choice(city_menu_prompt, {'N', 'Y'})

    # Filter for the entire selected state
    if menu_city == 'N':
        print(f"You chose to analyze all of state: {input_state}")
        
        location_id = str(input_state)
        print(f"Location ID is: {location_id}")
        
    # Filter to a city within the selected state
    else:
        input_cityFilter = get_city_choice(df_euss_am_baseline, input_state)
        print(f"You chose to filter for: {input_state}, {input_cityFilter}")

        location_id = input_cityFilter.replace(', ', '_').strip()
        print(f"Location ID is: {location_id}")

        city_filter = df_euss_am_baseline['in.city'].eq(f"{input_state}, {input_cityFilter}")
        df_euss_am_baseline = df_euss_am_baseline.loc[city_filter]

# Display the filtered dataframe
df_euss_am_baseline

You chose to analyze all of the United States.


Unnamed: 0,bldg_id,upgrade,weight,applicability,in.sqft,in.ahs_region,in.ashrae_iecc_climate_zone_2004,in.ashrae_iecc_climate_zone_2004_2_a_split,in.bathroom_spot_vent_hour,in.bedrooms,...,out.emissions.natural_gas.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.propane.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.electricity.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.fuel_oil.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.natural_gas.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.propane.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_95_decarb_by_2035_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_low_re_cost_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_mid_case_15_2025_start.co2e_kg
2,239,0,242.131013,True,1690.0,Non-CBSA East South Central,3A,3A,Hour20,3,...,215.943534,0.000000,8773.384074,0.0,215.943534,0.000000,3565.038262,6416.193347,5755.373221,8989.327608
3,273,0,242.131013,True,1690.0,Non-CBSA East South Central,3A,3A,Hour12,3,...,0.000000,0.000000,11296.731129,0.0,0.000000,0.000000,4339.903757,8193.678510,7254.665194,11296.731129
4,307,0,242.131013,True,1220.0,Non-CBSA East South Central,3A,3A,Hour0,4,...,0.000000,0.000000,8750.011820,0.0,0.000000,0.000000,3345.187937,6249.625611,5587.946834,8750.011820
5,409,0,242.131013,True,1220.0,Non-CBSA East South Central,3A,3A,Hour20,2,...,1642.477930,0.000000,5725.103641,0.0,1642.477930,0.000000,3784.993820,5489.549041,5164.458936,7367.581571
7,517,0,242.131013,True,1220.0,Non-CBSA East South Central,3A,3A,Hour1,3,...,0.000000,0.000000,8932.439414,0.0,0.000000,0.000000,3441.414837,6415.303853,5721.073473,8932.439414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548907,548226,0,242.131013,True,2176.0,Non-CBSA Mountain,6B,6B,Hour3,4,...,15324.356044,0.000000,2313.298407,0.0,15324.356044,0.000000,16802.023908,16545.095582,16472.920871,17637.654451
548908,548228,0,242.131013,True,1690.0,Non-CBSA Mountain,6B,6B,Hour6,4,...,8192.601682,0.000000,1889.439924,0.0,8192.601682,0.000000,9394.122057,9195.903552,9129.578822,10082.041606
548910,548417,0,242.131013,True,885.0,Non-CBSA Mountain,6B,6B,Hour18,2,...,5212.758359,0.000000,2112.907195,0.0,5212.758359,0.000000,6546.826589,6318.763521,6253.986448,7325.665554
548914,549740,0,242.131013,True,1220.0,Non-CBSA Mountain,7B,7B,Hour4,2,...,0.000000,268.627834,11423.104685,0.0,0.000000,268.627834,7173.561517,5931.433285,4609.821155,11691.732519


## Project Future Energy Consumption Using EIA Heating Degree Day (HDD) Forecasted Data (Factors)

In [8]:
# Factors for 2022 to 2050
filename = 'aeo_projections_2022_2050.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_hdd_projection_factors = pd.read_excel(io=file_path, sheet_name='hdd_factors_2022_2050')

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# Convert the factors dataframe into a lookup dictionary
hdd_factor_lookup = df_hdd_projection_factors.set_index(['census_division']).to_dict('index')
hdd_factor_lookup

Retrieved data for filename: aeo_projections_2022_2050.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\aeo_projections_2022_2050.xlsx


{'National': {2022: 1,
  2023: 1.0028349414260749,
  2024: 0.9389536266963965,
  2025: 0.9344844368179533,
  2026: 0.9300828169743566,
  2027: 0.9257070259326153,
  2028: 0.9212779053519207,
  2029: 0.9168538581973203,
  2030: 0.9124351151864318,
  2031: 0.9080377039911245,
  2032: 0.9036541297129915,
  2033: 0.8992492812396443,
  2034: 0.894875507855348,
  2035: 0.8904758482849783,
  2036: 0.8860390020882589,
  2037: 0.8817285900905196,
  2038: 0.877365886428882,
  2039: 0.8729314040841085,
  2040: 0.8685839209369028,
  2041: 0.8642702226890459,
  2042: 0.8599120736340495,
  2043: 0.8555441810694344,
  2044: 0.8511753084862802,
  2045: 0.8468232704962843,
  2046: 0.8425090534289743,
  2047: 0.8382247585710751,
  2048: 0.8339389072548168,
  2049: 0.8297055204635582,
  2050: 0.8255002687057338},
 'East North Central': {2022: 1,
  2023: 0.9811731756651626,
  2024: 0.9307608526528707,
  2025: 0.928426948809709,
  2026: 0.9262486385560915,
  2027: 0.9239147347129298,
  2028: 0.921580830869

In [9]:
print("""
-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------
""")

# df_baseline_enduse(df_baseline, df_enduse, category, fuel_filter='Yes', tech_filter='Yes')
df_euss_am_baseline_home = df_enduse_refactored(df_baseline = df_euss_am_baseline,
                                                fuel_filter = 'Yes',
                                                tech_filter = 'Yes')

# Project Future Energy Consumption
df_euss_am_baseline_home = project_future_consumption(df=df_euss_am_baseline_home, hdd_factor_lookup=hdd_factor_lookup, menu_mp=menu_mp)
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------

Processing column: in.clothes_dryer
Initial data types: object
Data types after processing: object
Processing column: in.cooking_range
Initial data types: object
Data types after processing: object
331531 rows remain after applying total heating consumption calculation
Filtered for the following fuels: ['Natural Gas', 'Electricity', 'Propane', 'Fuel Oil']
321357 rows remain after applying heating fuel filter
Filtered for the following Heating technologies: ['Electricity ASHP', 'Electricity Baseboard', 'Electricity Electric Boiler', 'Electricity Electric Furnace', 'Fuel Oil Fuel Boiler', 'Fuel Oil Fuel Furnace', 'Natural Gas Fuel Boiler', 'Natural Gas Fuel Furnace', 'Propane Fuel Boiler', 'Propane Fuel Furnace']
291558 rows remain after applying heating techn

Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,reeds_balancing_area,gea_region,state,city,...,baseline_2029_cooking_consumption,baseline_2030_cooking_consumption,baseline_2031_cooking_consumption,baseline_2032_cooking_consumption,baseline_2033_cooking_consumption,baseline_2034_cooking_consumption,baseline_2035_cooking_consumption,baseline_2036_cooking_consumption,baseline_2037_cooking_consumption,baseline_2038_cooking_consumption
2,239,1690.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,979.44,979.44,979.44,979.44,979.44,979.44,979.44,979.44,979.44,979.44
3,273,1690.0,South,East South Central,East South Central,Mixed-Humid,90,SRSOc,AL,In another census Place,...,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20
4,307,1220.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20
5,409,1220.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63
7,517,1220.0,South,East South Central,East South Central,Mixed-Humid,89,SRSOc,AL,In another census Place,...,377.18,377.18,377.18,377.18,377.18,377.18,377.18,377.18,377.18,377.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548905,548109,1690.0,West,Mountain,Mountain North,Cold,23,RMPAc,WY,In another census Place,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
548907,548226,2176.0,West,Mountain,Mountain North,Cold,23,RMPAc,WY,In another census Place,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
548908,548228,1690.0,West,Mountain,Mountain North,Cold,24,RMPAc,WY,Not in a census Place,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
548910,548417,885.0,West,Mountain,Mountain North,Cold,24,RMPAc,WY,Casper,...,486.50,486.50,486.50,486.50,486.50,486.50,486.50,486.50,486.50,486.50


# Public Perspective: Monetized Marginal Damages from Emissions

## Fossil Fuels: Climate and Health-Related Pollutants

In [None]:
# Calculate emissions factors for fossil fuels
# This is before adjusting for natural gas leakage
# Note: We use electricity marginal damages directly instead of multiplying
# CEDM emissions factors by the EASIUR marginal damages. 
def calculate_fossilFuel_emission_factor(fuel_type, so2_factor, nox_factor, pm25_factor, fuelConversion_factor1, fuelConversion_factor2):
    """
    Calculate Emissions Factors: FOSSIL FUELS
    Fossil Fuels (Natural Gas, Fuel Oil, Propane):
    - NOx, SO2, CO2: 
        - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
        - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
        - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
        - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
    - PM2.5: 
        - A National Methodology and Emission Inventory for Residential Fuel Combustion
        - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
    """
    
    # Create an empty dictionary called margEmis_factors to store the values
    margEmis_factors = {}

    # SO2, NOx, CO2: (_ lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
    # PM2.5 - FUEL OIL: 0.83 lb/thousand gallons * (1 thousand gallons / 1000 gallons) * (1 gallon heating oil/138,500 BTU) * (3412 BTU/1 kWh)
    # PM2.5 - NATURAL GAS: 1.9 lb/million cf * (million cf/1000000 cf) * (1 cf natural gas/1039 BTU) * (3412 BTU/1 kWh)
    # PM2.5 - PROPANE: 0.17 lb/thousand gallons * (1 thousand gallons / 1000 gallons) * (1 gallon propane/91,452 BTU) * (3412 BTU/1 kWh)
    margEmis_factors[f"{fuel_type}_so2"] = so2_factor * (1 / 1000000) * (3412 / 1)
    margEmis_factors[f"{fuel_type}_nox"] = nox_factor * (1 / 1000000) * (3412 / 1)
    margEmis_factors[f"{fuel_type}_pm25"] = pm25_factor * (1 / fuelConversion_factor1) * (1 / fuelConversion_factor2) * (3412 / 1)

    # NATURAL GAS LEAKAGE: NATURAL GAS INFRASTRUCTURE
    # leakage rate for natural gas infrastructure
    # 1 Therm = 29.30 kWh --> 1.27 kg CO2e/therm * (1 therm/29.30 kWh) = 0.043 kg CO2e/kWh = 0.095 lb CO2e/kWh
    naturalGas_leakage_mtCO2e_perkWh = 0.043 * (1/1000)

    # CO2e include pre- and post-combustion emissions
    margEmis_factors[f"naturalGas_co2e"] = (228.5 * (1/1000) * (1/1000)) + naturalGas_leakage_mtCO2e_perkWh
    margEmis_factors[f"propane_co2e"]  = 275.8 * (1/1000) * (1/1000)
    margEmis_factors[f"fuelOil_co2e"]  = 303.9 * (1/1000) * (1/1000)

    return margEmis_factors

print("""
-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
Fossil Fuels (Natural Gas, Fuel Oil, Propane):
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
-------------------------------------------------------------------------------------------------------
""")

fuelOil_factors = calculate_fossilFuel_emission_factor(fuel_type="fuelOil", so2_factor=0.0015, nox_factor=0.1300, pm25_factor=0.83, fuelConversion_factor1=1000, fuelConversion_factor2=138500)
naturalGas_factors = calculate_fossilFuel_emission_factor(fuel_type="naturalGas", so2_factor=0.0006, nox_factor=0.0922, pm25_factor=1.9, fuelConversion_factor1=1000000, fuelConversion_factor2=1039)
propane_factors = calculate_fossilFuel_emission_factor(fuel_type="propane", so2_factor=0.0002, nox_factor=0.1421, pm25_factor=0.17, fuelConversion_factor1=1000, fuelConversion_factor2=91452)

all_factors = {**fuelOil_factors, **naturalGas_factors, **propane_factors}

df_margEmis_factors = pd.DataFrame.from_dict(all_factors, orient="index", columns=["value"])
df_margEmis_factors.reset_index(inplace=True)
df_margEmis_factors.columns = ["pollutant", "value"]
df_margEmis_factors[["fuel_type", "pollutant"]] = df_margEmis_factors["pollutant"].str.split("_", expand=True)
# df_margEmis_factors["unit"] = "[lb/kWh]"

# Update the units to metric tons per kWh
df_margEmis_factors["unit"] = "[mt/kWh]"

# Convert the values from lb/kWh to mt/kWh
lb_to_mt = 0.00045359237
df_margEmis_factors["value"] = df_margEmis_factors["value"] * lb_to_mt

# Add the 'state' column and assign 'National' to every row
df_margEmis_factors = df_margEmis_factors.assign(state='National')

df_margEmis_factors = df_margEmis_factors[["state", "fuel_type", "pollutant", "value", "unit"]]
df_margEmis_factors


-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
Fossil Fuels (Natural Gas, Fuel Oil, Propane):
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
-------------------------------------------------------------------------------------------------------



Unnamed: 0,state,fuel_type,pollutant,value,unit
0,National,fuelOil,so2,2.321486e-09,[mt/kWh]
1,National,fuelOil,nox,2.011954e-07,[mt/kWh]
2,National,fuelOil,pm25,9.274769e-09,[mt/kWh]
3,National,naturalGas,co2e,1.231503e-07,[mt/kWh]
4,National,propane,co2e,1.251008e-07,[mt/kWh]
5,National,fuelOil,co2e,1.378467e-07,[mt/kWh]
6,National,naturalGas,so2,9.285943e-10,[mt/kWh]
7,National,naturalGas,nox,1.42694e-07,[mt/kWh]
8,National,naturalGas,pm25,2.830172e-09,[mt/kWh]
9,National,propane,so2,3.095314e-10,[mt/kWh]


In [None]:
# Extract fossil fuel emissions factors from the dataframe
emis_factors_fossil_fuels = {
    'naturalGas': {
        'co2e': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'naturalGas') & (df_margEmis_factors['pollutant'] == 'co2e')]['value'].values[0],
        'so2': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'naturalGas') & (df_margEmis_factors['pollutant'] == 'so2')]['value'].values[0],
        'nox': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'naturalGas') & (df_margEmis_factors['pollutant'] == 'nox')]['value'].values[0],
        'pm25': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'naturalGas') & (df_margEmis_factors['pollutant'] == 'pm25')]['value'].values[0]
        },
    'propane': {
        'co2e': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'propane') & (df_margEmis_factors['pollutant'] == 'co2e')]['value'].values[0],
        'so2': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'propane') & (df_margEmis_factors['pollutant'] == 'so2')]['value'].values[0],
        'nox': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'propane') & (df_margEmis_factors['pollutant'] == 'nox')]['value'].values[0],
        'pm25': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'propane') & (df_margEmis_factors['pollutant'] == 'pm25')]['value'].values[0]
        },
    'fuelOil': {
        'co2e': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'fuelOil') & (df_margEmis_factors['pollutant'] == 'co2e')]['value'].values[0],
        'so2': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'fuelOil') & (df_margEmis_factors['pollutant'] == 'so2')]['value'].values[0],
        'nox': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'fuelOil') & (df_margEmis_factors['pollutant'] == 'nox')]['value'].values[0],
        'pm25': df_margEmis_factors[(df_margEmis_factors['fuel_type'] == 'fuelOil') & (df_margEmis_factors['pollutant'] == 'pm25')]['value'].values[0]
        }
    }

emis_factors_fossil_fuels

## Emissions from Electricity Generation

### Climate-Related Emissions from CAMBIUM LRMER/SRMER 
### Includes pre-combustion (fugitive) and combustion

In [12]:
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd

def calculate_electricity_co2e_cambium(df_cambium_import):
    """
    Interpolates Cambium electricity emission factors and converts units.

    This function takes a dataframe containing Cambium electricity emission factors and performs the following:
    - Interpolates the Long Run Marginal Emissions Rates (LRMER) and Short Run Marginal Emissions Rates (SRMER)
      values for each scenario and GEA region on an annual basis.
    - Converts the LRMER and SRMER values from kg per MWh to tons per MWh and tons per kWh.

    Parameters
    ----------
    df_cambium_import : pandas.DataFrame
        DataFrame containing Cambium electricity emission factors with the following columns:
        - 'scenario': Scenario name or identifier.
        - 'gea_region': GEA region identifier.
        - 'year': Year of the data.
        - 'lrmer_co2e_kg_per_MWh': Long Run Marginal Emissions Rate in kg CO2e per MWh.
        - 'srmer_co2e_kg_per_MWh': Short Run Marginal Emissions Rate in kg CO2e per MWh.

    Returns
    -------
    df_cambium_import_copy : pandas.DataFrame
        DataFrame with interpolated LRMER and SRMER values for each year and additional columns for emission factors
        converted to tons per MWh and tons per kWh.

    Notes
    -----
    - The interpolation is performed linearly between the available years for each unique combination of scenario and GEA region.
    - The converted emission factors are added as new columns:
        - 'lrmer_co2e_ton_per_MWh'
        - 'lrmer_co2e_ton_per_kWh'
        - 'srmer_co2e_ton_per_MWh'
        - 'srmer_co2e_ton_per_kWh'
    - The conversion from kg to tons is done by dividing by 1,000 (1 ton = 1,000 kg).
    - The conversion from MWh to kWh is done by dividing by 1,000 (1 MWh = 1,000 kWh).

    """
    # Create a copy of the dataframe
    df_cambium_import_copy = df_cambium_import.copy()

    # Create a new DataFrame to store interpolated results
    interpolated_data = []

    # Group by 'scenario', 'state', and 'gea_region'
    grouped = df_cambium_import_copy.groupby(['scenario', 'gea_region'])

    for (scenario, gea_region), group in grouped:
        years = group['year'].values

        # Interpolate for LRMER (Long Run Marginal Emissions Rates)
        lrmer_values = group['lrmer_co2e_kg_per_MWh'].values
        lrmer_interp_func = interp1d(years, lrmer_values, kind='linear')

        # Interpolate for SRMER (Short Run Marginal Emissions Rates)
        srmer_values = group['srmer_co2e_kg_per_MWh'].values
        srmer_interp_func = interp1d(years, srmer_values, kind='linear')

        # Generate new years in 1-year increments
        new_years = np.arange(years.min(), years.max() + 1)

        # Interpolate the LRMER and SRMER values for these new years
        new_lrmer_values = lrmer_interp_func(new_years)
        new_srmer_values = srmer_interp_func(new_years)

        # Store the results in a DataFrame
        interpolated_group = pd.DataFrame({
            'scenario': scenario,
            'gea_region': gea_region,
            'year': new_years,
            'lrmer_co2e_kg_per_MWh': new_lrmer_values,
            'srmer_co2e_kg_per_MWh': new_srmer_values
        })

        interpolated_data.append(interpolated_group)

    # Concatenate all the interpolated data into a single DataFrame
    df_cambium_import_copy = pd.concat(interpolated_data).reset_index(drop=True)

    # Convert both LRMER and SRMER values to tons per MWh and tons per kWh
    df_cambium_import_copy['lrmer_co2e_ton_per_MWh'] = df_cambium_import_copy['lrmer_co2e_kg_per_MWh'] / 1000
    df_cambium_import_copy['lrmer_co2e_ton_per_kWh'] = df_cambium_import_copy['lrmer_co2e_kg_per_MWh'] / 1_000_000

    df_cambium_import_copy['srmer_co2e_ton_per_MWh'] = df_cambium_import_copy['srmer_co2e_kg_per_MWh'] / 1000
    df_cambium_import_copy['srmer_co2e_ton_per_kWh'] = df_cambium_import_copy['srmer_co2e_kg_per_MWh'] / 1_000_000

    return df_cambium_import_copy

def create_cambium_co2e_lookup(df_cambium_processed):
    """
    Creates a nested lookup dictionary for Cambium emission factors.

    This function takes a processed dataframe containing Cambium emission factors and constructs a nested dictionary
    that allows quick lookup of LRMER and SRMER emission factors based on scenario, GEA region, and year.

    Parameters
    ----------
    df_cambium_processed : pandas.DataFrame
        DataFrame containing processed Cambium emission factors with the following columns:
        - 'scenario': Scenario name or identifier.
        - 'gea_region': GEA region identifier.
        - 'year': Year of the data.
        - 'lrmer_co2e_ton_per_kWh': Long Run Marginal Emissions Rate in tons CO2e per kWh.
        - 'srmer_co2e_ton_per_kWh': Short Run Marginal Emissions Rate in tons CO2e per kWh.

    Returns
    -------
    emis_scenario_cambium_lookup : dict
        Nested dictionary structured as:
        {
            (scenario, gea_region): {
                year: {
                    'lrmer_co2e': lrmer_value,
                    'srmer_co2e': srmer_value
                },
                ...
            },
            ...
        }

    Notes
    -----
    - The outer keys of the dictionary are tuples containing (scenario, gea_region).
    - The inner dictionary maps years to a dictionary containing both LRMER and SRMER values.
    - This structure allows efficient retrieval of emission factors based on scenario, location, and year.

    """

    # Create a copy of the dataframe
    df_cambium_processed_copy = df_cambium_processed.copy()

    # Create the nested lookup dictionary for both LRMER and SRMER in tons CO2e per kWh
    emis_scenario_cambium_lookup = {}

    # Populate the dictionary
    for _, row in df_cambium_processed_copy.iterrows():
        outer_key = (row['scenario'], row['gea_region'])
        year = row['year']

        # Extract both LRMER and SRMER values in tons per kWh
        lrmer_value = row['lrmer_co2e_ton_per_kWh']
        srmer_value = row['srmer_co2e_ton_per_kWh']

        # Initialize the outer key if not already present
        if outer_key not in emis_scenario_cambium_lookup:
            emis_scenario_cambium_lookup[outer_key] = {}

        # Assign both LRMER and SRMER values in the inner dictionary for each year
        emis_scenario_cambium_lookup[outer_key][year] = {
            'lrmer_co2e': lrmer_value,
            'srmer_co2e': srmer_value
        }

    return emis_scenario_cambium_lookup


In [13]:
# INTERPOLATE ANNUAL DATA BETWEEN 5-YEAR TIME STEPS
print("""
-------------------------------------------------------------------------------------------------------
CLIMATE DAMAGES FROM CAMBIUM
-------------------------------------------------------------------------------------------------------
- Load CSV
- Convert MWh --> kWh and kg --> metric tons (mt)
- Inflate updated Social Cost of Carbon from $190 USD2020 to $USD2023
- Convert SCC to $USD2023/lb
- Calculate damage factors for CO2e: LRMER[lb/kWh] * SCC[$USD2023/lb] = $USD2023/kWh
- Map state, county pairs to Cambium 2023 GEA region

Possibly for other emissions projections:
- Calculate projection factors for each group of scenario and GEA region for 2025 to 2050 (normalize all annual data in group by 2025 value) 
-------------------------------------------------------------------------------------------------------
""")


-------------------------------------------------------------------------------------------------------
CLIMATE DAMAGES FROM CAMBIUM
-------------------------------------------------------------------------------------------------------
- Load CSV
- Convert MWh --> kWh and kg --> metric tons (mt)
- Inflate updated Social Cost of Carbon from $190 USD2020 to $USD2023
- Convert SCC to $USD2023/lb
- Calculate damage factors for CO2e: LRMER[lb/kWh] * SCC[$USD2023/lb] = $USD2023/kWh
- Map state, county pairs to Cambium 2023 GEA region

Possibly for other emissions projections:
- Calculate projection factors for each group of scenario and GEA region for 2025 to 2050 (normalize all annual data in group by 2025 value) 
-------------------------------------------------------------------------------------------------------



In [14]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

print("""
-------------------------------------------------------------------------------------------------------
PRE-IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2021 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2021 FOR PRE-IRA SCENARIO
filename = 'cambium21_midCase_annual_gea.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium21_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='proc_Cambium21_MidCase_gea')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}

Loading dataframe ...
Creating lookup dictionary for LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------
""")

# Calculate electricity emission factors for Cambium 2021
# Process the data using the provided function to interpolate and convert units
df_cambium21_processed = calculate_electricity_co2e_cambium(df_cambium21_margEmis_electricity)

# Display the processed DataFrame
df_cambium21_processed


-------------------------------------------------------------------------------------------------------
PRE-IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2021 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium21_midCase_annual_gea.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium21_midCase_annual_gea.xlsx

Loading dataframe ...
Creating lookup dictionary for LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------



Unnamed: 0,scenario,gea_region,year,lrmer_co2e_kg_per_MWh,srmer_co2e_kg_per_MWh,lrmer_co2e_ton_per_MWh,lrmer_co2e_ton_per_kWh,srmer_co2e_ton_per_MWh,srmer_co2e_ton_per_kWh
0,MidCase,AZNMc,2022,525.60,811.8,0.52560,0.000526,0.8118,0.000812
1,MidCase,AZNMc,2023,507.00,810.1,0.50700,0.000507,0.8101,0.000810
2,MidCase,AZNMc,2024,488.40,808.4,0.48840,0.000488,0.8084,0.000808
3,MidCase,AZNMc,2025,455.75,812.2,0.45575,0.000456,0.8122,0.000812
4,MidCase,AZNMc,2026,423.10,816.0,0.42310,0.000423,0.8160,0.000816
...,...,...,...,...,...,...,...,...,...
575,MidCase,SRVCc,2046,320.60,754.9,0.32060,0.000321,0.7549,0.000755
576,MidCase,SRVCc,2047,319.25,753.1,0.31925,0.000319,0.7531,0.000753
577,MidCase,SRVCc,2048,317.90,751.3,0.31790,0.000318,0.7513,0.000751
578,MidCase,SRVCc,2049,312.30,734.8,0.31230,0.000312,0.7348,0.000735


In [15]:
# Create the lookup dictionary using the create_cambium_emission_factor_lookup function
emis_preIRA_co2e_cambium21_lookup = create_cambium_co2e_lookup(df_cambium21_processed)

# Display the lookup dictionary
emis_preIRA_co2e_cambium21_lookup

{('MidCase',
  'AZNMc'): {2022: {'lrmer_co2e': 0.0005256,
   'srmer_co2e': 0.0008118}, 2023: {'lrmer_co2e': 0.000507, 'srmer_co2e': 0.0008100999999999999}, 2024: {'lrmer_co2e': 0.0004883999999999999,
   'srmer_co2e': 0.0008084}, 2025: {'lrmer_co2e': 0.00045575,
   'srmer_co2e': 0.0008122}, 2026: {'lrmer_co2e': 0.00042310000000000004,
   'srmer_co2e': 0.000816}, 2027: {'lrmer_co2e': 0.00036945000000000006,
   'srmer_co2e': 0.00080035}, 2028: {'lrmer_co2e': 0.00031580000000000003,
   'srmer_co2e': 0.0007847}, 2029: {'lrmer_co2e': 0.00031470000000000006,
   'srmer_co2e': 0.00076725}, 2030: {'lrmer_co2e': 0.00031360000000000003,
   'srmer_co2e': 0.0007497999999999999}, 2031: {'lrmer_co2e': 0.00029605,
   'srmer_co2e': 0.00073815}, 2032: {'lrmer_co2e': 0.0002785,
   'srmer_co2e': 0.0007265}, 2033: {'lrmer_co2e': 0.00029995,
   'srmer_co2e': 0.00073055}, 2034: {'lrmer_co2e': 0.00032139999999999995,
   'srmer_co2e': 0.0007346000000000001}, 2035: {'lrmer_co2e': 0.00029375,
   'srmer_co2e': 0.0

In [16]:
import os
import pandas as pd

print("""
-------------------------------------------------------------------------------------------------------
IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2022 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2022 FOR IRA SCENARIO
filename = 'cambium22_allScenarios_annual_gea.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium22_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='proc_Cambium22_MidCase_gea')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}

Loading dataframe ...
Creating lookup dictionary for 2024 LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------
""")

# Calculate electricity emission factors for Cambium 2021
# Process the data using the provided function to interpolate and convert units
df_cambium22_processed = calculate_electricity_co2e_cambium(df_cambium22_margEmis_electricity)

# Display the processed DataFrame
df_cambium22_processed


-------------------------------------------------------------------------------------------------------
IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2022 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium22_allScenarios_annual_gea.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium22_allScenarios_annual_gea.xlsx

Loading dataframe ...
Creating lookup dictionary for 2024 LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------



Unnamed: 0,scenario,gea_region,year,lrmer_co2e_kg_per_MWh,srmer_co2e_kg_per_MWh,lrmer_co2e_ton_per_MWh,lrmer_co2e_ton_per_kWh,srmer_co2e_ton_per_MWh,srmer_co2e_ton_per_kWh
0,MidCase,AZNMc,2024,326.9,777.10,0.3269,0.000327,0.77710,0.000777
1,MidCase,AZNMc,2025,260.4,743.40,0.2604,0.000260,0.74340,0.000743
2,MidCase,AZNMc,2026,193.9,709.70,0.1939,0.000194,0.70970,0.000710
3,MidCase,AZNMc,2027,162.2,643.05,0.1622,0.000162,0.64305,0.000643
4,MidCase,AZNMc,2028,130.5,576.40,0.1305,0.000131,0.57640,0.000576
...,...,...,...,...,...,...,...,...,...
535,MidCase,SRVCc,2046,136.4,500.16,0.1364,0.000136,0.50016,0.000500
536,MidCase,SRVCc,2047,130.6,502.92,0.1306,0.000131,0.50292,0.000503
537,MidCase,SRVCc,2048,124.8,505.68,0.1248,0.000125,0.50568,0.000506
538,MidCase,SRVCc,2049,119.0,508.44,0.1190,0.000119,0.50844,0.000508


In [17]:
# Create the lookup dictionary using the create_cambium_co2e_lookup function
emis_IRA_co2e_cambium22_lookup = create_cambium_co2e_lookup(df_cambium22_processed)

# Display the lookup dictionary
emis_IRA_co2e_cambium22_lookup

{('MidCase',
  'AZNMc'): {2024: {'lrmer_co2e': 0.0003269,
   'srmer_co2e': 0.0007771}, 2025: {'lrmer_co2e': 0.0002604, 'srmer_co2e': 0.0007434000000000001}, 2026: {'lrmer_co2e': 0.0001939,
   'srmer_co2e': 0.0007097000000000001}, 2027: {'lrmer_co2e': 0.00016219999999999999,
   'srmer_co2e': 0.0006430499999999999}, 2028: {'lrmer_co2e': 0.0001305,
   'srmer_co2e': 0.0005764}, 2029: {'lrmer_co2e': 0.0001171,
   'srmer_co2e': 0.00053155}, 2030: {'lrmer_co2e': 0.0001037,
   'srmer_co2e': 0.0004867}, 2031: {'lrmer_co2e': 0.00011284,
   'srmer_co2e': 0.00046903999999999994}, 2032: {'lrmer_co2e': 0.00012198000000000001,
   'srmer_co2e': 0.00045138}, 2033: {'lrmer_co2e': 0.00013112,
   'srmer_co2e': 0.00043371999999999996}, 2034: {'lrmer_co2e': 0.00014026,
   'srmer_co2e': 0.00041605999999999994}, 2035: {'lrmer_co2e': 0.0001494,
   'srmer_co2e': 0.0003984}, 2036: {'lrmer_co2e': 0.00015762,
   'srmer_co2e': 0.00039015999999999996}, 2037: {'lrmer_co2e': 0.00016584,
   'srmer_co2e': 0.000381919999

## HEALTH-RELATED EMISSIONS

### Electricity - Method 1: Schmitt et al 2024 Study (Assumes GEA Region and EPA eGRID subregions are the same - which they aren't)

In [18]:
# Adjust for regional cost differences with RSMeans
filename = "grid_mix_reg_full_delta.csv"
relative_path = os.path.join(r"projections\schmitt_ev_study", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_grid_mix = pd.read_csv(file_path)

df_grid_mix = pd.DataFrame({
    'year': df_grid_mix['Year'],
    'cambium_gea_region': df_grid_mix['Cambium.GEA'],
    'fuel_source': df_grid_mix['Source'],
    'fraction_generation': df_grid_mix['Fraction'],
})
df_grid_mix

Retrieved data for filename: grid_mix_reg_full_delta.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\schmitt_ev_study\grid_mix_reg_full_delta.csv




Unnamed: 0,year,cambium_gea_region,fuel_source,fraction_generation
0,2022,AZNMc,Coal,0.000000
1,2023,AZNMc,Coal,0.000000
2,2024,AZNMc,Coal,0.000000
3,2025,AZNMc,Coal,0.137832
4,2026,AZNMc,Coal,0.275665
...,...,...,...,...
2895,2046,SRVCc,Renewable,0.868251
2896,2047,SRVCc,Renewable,0.883341
2897,2048,SRVCc,Renewable,0.898431
2898,2049,SRVCc,Renewable,0.913521


In [19]:
# Adjust for regional cost differences with RSMeans
filename = "ef_pollutants_egrid.csv"
relative_path = os.path.join(r"projections\schmitt_ev_study", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_grid_emis_factors = pd.read_csv(file_path)

df_grid_emis_factors = pd.DataFrame({
    'cambium_gea_region': df_grid_emis_factors['eGRID_subregion'],
    'fuel_source': df_grid_emis_factors['Fuel'],
    'pollutant': df_grid_emis_factors['Pollutant'],
    'emis_rate': df_grid_emis_factors['Emission_rate'],
    'unit': df_grid_emis_factors['Unit'],
})

mapping = {
    'AKGD': None,       # Alaska Grid - Not included
    'AKMS': None,       # Alaska Miscellaneous - Not included
    'AZNM': 'AZNMc',    # Arizona/New Mexico Power Area
    'CAMX': 'CAMXc',    # California Mexico
    'ERCT': 'ERCTc',    # Electric Reliability Council of Texas
    'FRCC': 'FRCCc',    # Florida Reliability Coordinating Council
    'HIMS': None,       # Hawaii Maui Subregion - Not included
    'HIOA': None,       # Hawaii Oahu Subregion - Not included
    'MROE': 'MROEc',    # Midwest Reliability Organization East
    'MROW': 'MROWc',    # Midwest Reliability Organization West
    'NEWE': 'NEWEc',    # New England
    'NWPP': 'NWPPc',    # Northwest Power Pool
    'NYCW': 'NYSTc',    # New York City/Westchester mapped to New York State
    'NYLI': 'NYSTc',    # New York Long Island mapped to New York State
    'NYUP': 'NYSTc',    # New York Upstate mapped to New York State
    'PRMS': None,       # Puerto Rico Miscellaneous - Not included
    'RFCE': 'RFCEc',    # ReliabilityFirst Corporation East
    'RFCM': 'RFCMc',    # ReliabilityFirst Corporation Midwest
    'RFCW': 'RFCWc',    # ReliabilityFirst Corporation West
    'RMPA': 'RMPAc',    # Rocky Mountain Power Area
    'SPNO': 'SPNOc',    # Southwest Power Pool North
    'SPSO': 'SPSOc',    # Southwest Power Pool South
    'SRMV': 'SRMVc',    # SERC Reliability Corporation Mississippi Valley
    'SRMW': 'SRMWc',    # SERC Reliability Corporation Midwest
    'SRSO': 'SRSOc',    # SERC Reliability Corporation South
    'SRTV': 'SRTVc',    # SERC Reliability Corporation Tennessee Valley
    'SRVC': 'SRVCc',    # SERC Reliability Corporation Virginia/Carolina
}

# Apply the mapping to the 'cambium_gea_region' column
df_grid_emis_factors['cambium_gea_region'] = df_grid_emis_factors['cambium_gea_region'].map(mapping)

# Drop rows where 'cambium_gea_region' is None (regions not included in the mapping)
df_grid_emis_factors = df_grid_emis_factors.dropna(subset=['cambium_gea_region']).reset_index(drop=True)

# Conversion factor from pounds to metric tons
lb_to_mt = 0.00045359237
perMWh_to_perkWh = 1/1000

# Apply the conversion where the unit is 'lb/MWh'
df_grid_emis_factors.loc[df_grid_emis_factors['unit'] == 'lb/MWh', 'emis_rate'] *= (lb_to_mt * perMWh_to_perkWh)
df_grid_emis_factors.loc[df_grid_emis_factors['unit'] == 'lb/MWh', 'unit'] = 'mt/kWh'

df_grid_emis_factors

Retrieved data for filename: ef_pollutants_egrid.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\schmitt_ev_study\ef_pollutants_egrid.csv




Unnamed: 0,cambium_gea_region,fuel_source,pollutant,emis_rate,unit
0,AZNMc,Coal,NOx,7.171295e-07,mt/kWh
1,CAMXc,Coal,NOx,1.243750e-06,mt/kWh
2,ERCTc,Coal,NOx,5.030339e-07,mt/kWh
3,FRCCc,Coal,NOx,2.326929e-07,mt/kWh
4,MROEc,Coal,NOx,3.451838e-07,mt/kWh
...,...,...,...,...,...
545,SRMVc,Renewables,VOC,0.000000e+00,mt/kWh
546,SRMWc,Renewables,VOC,0.000000e+00,mt/kWh
547,SRSOc,Renewables,VOC,0.000000e+00,mt/kWh
548,SRTVc,Renewables,VOC,0.000000e+00,mt/kWh


In [20]:
def process_Schmitt_emissions_data(df_grid_mix, df_grid_emis_factors):
    # Check unique fuel sources in both dataframes
    fuel_sources_mix = set(df_grid_mix['fuel_source'].unique())
    fuel_sources_emis = set(df_grid_emis_factors['fuel_source'].unique())

    print("Fuel sources in df_grid_mix:", fuel_sources_mix)
    print("Fuel sources in df_grid_emis_factors:", fuel_sources_emis)

    # Merge the dataframes
    df_combined = pd.merge(
        df_grid_mix,
        df_grid_emis_factors,
        on=['cambium_gea_region', 'fuel_source'],
        how='inner'
    )

    # Calculate emissions contribution
    df_combined['emis_contribution'] = df_combined['fraction_generation'] * df_combined['emis_rate']

    # Sum emissions contributions
    df_emis_factors = df_combined.groupby(
        ['year', 'cambium_gea_region', 'pollutant']
    )['emis_contribution'].sum().reset_index()

    # Pivot the dataframe
    df_emis_factors_pivot = df_emis_factors.pivot_table(
        index=['year', 'cambium_gea_region'],
        columns='pollutant',
        values='emis_contribution'
    ).reset_index()

    # Rename columns
    df_emis_factors_pivot.rename(columns={
        'NH3': 'delta_egrid_nh3',
        'NOx': 'delta_egrid_nox',
        'PM25': 'delta_egrid_pm25',
        'SO2': 'delta_egrid_so2',
        'VOC': 'delta_egrid_voc'
    }, inplace=True)

    return df_emis_factors_pivot

# Example usage
df_emis_factors_epa_egrid = process_Schmitt_emissions_data(df_grid_mix, df_grid_emis_factors)
df_emis_factors_epa_egrid

Fuel sources in df_grid_mix: {'Coal', 'Natural Gas', 'Renewable', 'Oil', 'Nuclear'}
Fuel sources in df_grid_emis_factors: {'Renewables', 'Coal', 'Natural Gas', 'Oil', 'Nuclear'}


pollutant,year,cambium_gea_region,delta_egrid_nh3,delta_egrid_nox,delta_egrid_pm25,delta_egrid_so2,delta_egrid_voc
0,2022,AZNMc,4.284055e-11,9.898048e-09,4.965068e-11,3.457965e-09,1.953264e-11
1,2022,CAMXc,7.522755e-09,2.069686e-07,9.580178e-09,3.556412e-09,2.775051e-09
2,2022,ERCTc,4.432087e-09,4.835162e-07,4.317288e-08,1.320681e-06,1.101529e-08
3,2022,FRCCc,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
4,2022,MROEc,4.849361e-09,5.447874e-07,1.078161e-08,8.474062e-08,9.076871e-09
...,...,...,...,...,...,...,...
575,2050,SRMVc,1.411854e-09,1.876759e-08,2.205976e-09,5.428756e-09,6.989808e-10
576,2050,SRMWc,6.359354e-10,1.704447e-08,9.728582e-10,4.954324e-09,2.334395e-10
577,2050,SRSOc,4.354236e-10,1.003789e-08,1.331772e-09,3.118447e-09,4.730064e-10
578,2050,SRTVc,2.546688e-09,2.709200e-08,4.472789e-09,2.509969e-08,1.490707e-09


In [21]:
# Convert the emissions factors dataframe into a lookup dictionary
lookup_electricity_emissions_egrid = df_emis_factors_epa_egrid.set_index(['year', 'cambium_gea_region']).to_dict('index')

# Display the lookup dictionary
lookup_electricity_emissions_egrid

{(2022, 'AZNMc'): {'delta_egrid_nh3': 4.2840552430687625e-11,
  'delta_egrid_nox': 9.898047990490191e-09,
  'delta_egrid_pm25': 4.965068338564084e-11,
  'delta_egrid_so2': 3.457964622852321e-09,
  'delta_egrid_voc': 1.9532640138985512e-11},
 (2022, 'CAMXc'): {'delta_egrid_nh3': 7.522755410773406e-09,
  'delta_egrid_nox': 2.0696856018167551e-07,
  'delta_egrid_pm25': 9.580177724431295e-09,
  'delta_egrid_so2': 3.55641151857646e-09,
  'delta_egrid_voc': 2.7750506632228643e-09},
 (2022, 'ERCTc'): {'delta_egrid_nh3': 4.432086644184407e-09,
  'delta_egrid_nox': 4.835161961260566e-07,
  'delta_egrid_pm25': 4.3172882994071317e-08,
  'delta_egrid_so2': 1.3206809363102089e-06,
  'delta_egrid_voc': 1.101529215996984e-08},
 (2022, 'FRCCc'): {'delta_egrid_nh3': 0.0,
  'delta_egrid_nox': 0.0,
  'delta_egrid_pm25': 0.0,
  'delta_egrid_so2': 0.0,
  'delta_egrid_voc': 0.0},
 (2022, 'MROEc'): {'delta_egrid_nh3': 4.849361388656854e-09,
  'delta_egrid_nox': 5.447873793436593e-07,
  'delta_egrid_pm25': 1.

In [22]:
# # Check unique fuel sources in both dataframes
# fuel_sources_mix = set(df_grid_mix['fuel_source'].unique())
# fuel_sources_emis = set(df_grid_emis_factors['fuel_source'].unique())

# print("Fuel sources in df_grid_mix:", fuel_sources_mix)
# print("Fuel sources in df_grid_emis_factors:", fuel_sources_emis)

# # Merge the dataframes
# df_combined = pd.merge(
#     df_grid_mix,
#     df_grid_emis_factors,
#     on=['cambium_gea_region', 'fuel_source'],
#     how='inner'
# )

# # Calculate emissions contribution
# df_combined['emis_contribution'] = df_combined['fraction_generation'] * df_combined['emis_rate']

# # Sum emissions contributions
# df_emis_factors = df_combined.groupby(
#     ['year', 'cambium_gea_region', 'pollutant']
# )['emis_contribution'].sum().reset_index()
# df_emis_factors

### Electricity - Method 2: CEDM Marginal Emissions Factors and EASIUR (Coal Generation Reduction)

In [84]:
import os
import pandas as pd

print("""
-------------------------------------------------------------------------------------------------------
COAL USED IN ELECTRICITY GENERATION FROM CAMBIUM 2021 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2022 FOR IRA SCENARIO
filename = 'cambium21_midCase_annual_gea.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium21_COAL_processed = pd.read_excel(io=file_path, sheet_name='proc_Cambium21_coal_gea')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}
-------------------------------------------------------------------------------------------------------
""")
df_cambium21_COAL_processed 


-------------------------------------------------------------------------------------------------------
COAL USED IN ELECTRICITY GENERATION FROM CAMBIUM 2021 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium21_midCase_annual_gea.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium21_midCase_annual_gea.xlsx
-------------------------------------------------------------------------------------------------------



Unnamed: 0,data_source,scenario,gea_region,year,coal_MWh
0,Cambium_2021,MidCase,AZNMc,2022,32912356.0
1,Cambium_2021,MidCase,AZNMc,2024,27460646.0
2,Cambium_2021,MidCase,AZNMc,2026,26772840.0
3,Cambium_2021,MidCase,AZNMc,2028,26693880.0
4,Cambium_2021,MidCase,AZNMc,2030,24898662.0
...,...,...,...,...,...
295,Cambium_2021,MidCase,SRVCc,2042,37576316.0
296,Cambium_2021,MidCase,SRVCc,2044,33240228.0
297,Cambium_2021,MidCase,SRVCc,2046,27152428.0
298,Cambium_2021,MidCase,SRVCc,2048,27861414.0


In [85]:
import os
import pandas as pd

print("""
-------------------------------------------------------------------------------------------------------
COAL USED IN ELECTRICITY GENERATION FROM CAMBIUM 2022 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2022 FOR IRA SCENARIO
filename = 'cambium22_allScenarios_annual_gea.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium22_COAL_processed = pd.read_excel(io=file_path, sheet_name='proc_Cambium22_coal_gea')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}
-------------------------------------------------------------------------------------------------------
""")
df_cambium22_COAL_processed 


-------------------------------------------------------------------------------------------------------
COAL USED IN ELECTRICITY GENERATION FROM CAMBIUM 2022 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium22_allScenarios_annual_gea.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium22_allScenarios_annual_gea.xlsx
-------------------------------------------------------------------------------------------------------



Unnamed: 0,data_source,scenario,gea_region,year,coal_MWh
0,Cambium_2022,MidCase,AZNMc,2024,22849070.0
1,Cambium_2022,MidCase,AZNMc,2026,21009922.0
2,Cambium_2022,MidCase,AZNMc,2028,14483350.0
3,Cambium_2022,MidCase,AZNMc,2030,13433882.0
4,Cambium_2022,MidCase,AZNMc,2035,8231337.5
...,...,...,...,...,...
155,Cambium_2022,MidCase,SRVCc,2030,17312904.0
156,Cambium_2022,MidCase,SRVCc,2035,8787109.0
157,Cambium_2022,MidCase,SRVCc,2040,6132569.5
158,Cambium_2022,MidCase,SRVCc,2045,4301606.5


In [86]:
# EPA eGRID Coal Generation Data for 2018 to 2022 ()
filename = 'epa_eGRID_total_coal_generation_2018_2022.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_epa_eGRID_COAL_processed = pd.read_excel(io=file_path, sheet_name='coal_generation_2018_2022')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}
-------------------------------------------------------------------------------------------------------
""") 

# Apply the mapping to the 'cambium_gea_region' column
df_epa_eGRID_COAL_processed['cambium_gea_region'] = df_epa_eGRID_COAL_processed['eGRID_subregion'].map(mapping)

# Drop rows where 'cambium_gea_region' is None (regions not included in the mapping)
df_epa_eGRID_COAL_processed = df_epa_eGRID_COAL_processed.dropna(subset=['cambium_gea_region'])

# Group by 'cambium_gea_region' and aggregate
df_epa_eGRID_COAL_processed = (
    df_epa_eGRID_COAL_processed
    .groupby('cambium_gea_region', as_index=False)
    .agg({
        'eGRID_subregion': 'first',  # Retain the first value (or use a different strategy)
        'coal_MWh_2018': 'sum',
        'coal_MWh_2019': 'sum',
        'coal_MWh_2020': 'sum',
        'coal_MWh_2021': 'sum',
        'coal_MWh_2022': 'sum'
    })
)
df_epa_eGRID_COAL_processed


Retrieved data for filename: epa_eGRID_total_coal_generation_2018_2022.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\epa_eGRID_total_coal_generation_2018_2022.xlsx
-------------------------------------------------------------------------------------------------------



Unnamed: 0,cambium_gea_region,eGRID_subregion,coal_MWh_2018,coal_MWh_2019,coal_MWh_2020,coal_MWh_2021,coal_MWh_2022
0,AZNMc,AZNM,44146150.0,37909640.0,26535437,26836890.0,26741465
1,CAMXc,CAMX,8763279.0,7780068.0,7033071,7855352.0,5747480
2,ERCTc,ERCT,93161470.0,77838830.0,68260811,74510140.0,71217304
3,FRCCc,FRCC,27123680.0,18677570.0,15879327,18374990.0,15227544
4,MROEc,MROE,15443670.0,11454900.0,10581044,13113740.0,10480887
5,MROWc,MROW,122647100.0,102670300.0,86817250,96229390.0,95599125
6,NEWEc,NEWE,1036896.0,467120.7,164881,578806.8,347898
7,NWPPc,NWPP,62858370.0,64983400.0,52353815,53734880.0,52895365
8,NYSTc,NYCW,690385.6,421951.4,143692,0.0,0
9,RFCEc,RFCE,46225710.0,36555990.0,24332566,29968360.0,25441381


In [91]:
import pandas as pd
import numpy as np

# Step 1: Melt the DataFrame to Long Format
columns_to_melt = ['coal_MWh_2018', 'coal_MWh_2019', 'coal_MWh_2020', 'coal_MWh_2021', 'coal_MWh_2022']
df_melted = pd.melt(
    df_epa_eGRID_COAL_processed,
    id_vars=['eGRID_subregion', 'cambium_gea_region'],
    value_vars=columns_to_melt,
    var_name='year',
    value_name='coal_MWh'
)

# Step 2: Extract the Year from Column Names
df_melted['year'] = df_melted['year'].str.extract('(\d{4})').astype(int)

# Step 3: Create Columns to Match the Target DataFrame
df_preIRA_transformed = pd.DataFrame({
    'data_source': 'EPA_eGRID',
    'scenario': 'MidCase',
    'eGRID_subregion': df_melted['eGRID_subregion'],
    'gea_region': df_melted['cambium_gea_region'],
    'year': df_melted['year'],
    'coal_MWh': df_melted['coal_MWh'],
})

# Step 4: Combine the DataFrames
df_preIRA_coal_generation = pd.concat([df_preIRA_transformed, df_cambium21_COAL_processed], ignore_index=True)
df_preIRA_coal_generation

Unnamed: 0,data_source,scenario,eGRID_subregion,gea_region,year,coal_MWh
0,EPA_eGRID,MidCase,AZNM,AZNMc,2018,44146154.50
1,EPA_eGRID,MidCase,CAMX,CAMXc,2018,8763279.09
2,EPA_eGRID,MidCase,ERCT,ERCTc,2018,93161470.51
3,EPA_eGRID,MidCase,FRCC,FRCCc,2018,27123675.80
4,EPA_eGRID,MidCase,MROE,MROEc,2018,15443667.62
...,...,...,...,...,...,...
395,Cambium_2021,MidCase,,SRVCc,2042,37576316.00
396,Cambium_2021,MidCase,,SRVCc,2044,33240228.00
397,Cambium_2021,MidCase,,SRVCc,2046,27152428.00
398,Cambium_2021,MidCase,,SRVCc,2048,27861414.00


In [90]:
# Step 3: Create Columns to Match the Target DataFrame
df_iraRef_transformed = pd.DataFrame({
    'data_source': 'EPA_eGRID',
    'scenario': 'MidCase',
    'eGRID_subregion': df_melted['eGRID_subregion'],
    'gea_region': df_melted['cambium_gea_region'],
    'year': df_melted['year'],
    'coal_MWh': df_melted['coal_MWh'],
})

# Step 4: Combine the DataFrames
df_iraRef_coal_generation = pd.concat([df_iraRef_transformed, df_cambium22_COAL_processed], ignore_index=True)
df_iraRef_coal_generation

Unnamed: 0,data_source,scenario,eGRID_subregion,gea_region,year,coal_MWh
0,EPA_eGRID,MidCase,AZNM,AZNMc,2018,44146154.50
1,EPA_eGRID,MidCase,CAMX,CAMXc,2018,8763279.09
2,EPA_eGRID,MidCase,ERCT,ERCTc,2018,93161470.51
3,EPA_eGRID,MidCase,FRCC,FRCCc,2018,27123675.80
4,EPA_eGRID,MidCase,MROE,MROEc,2018,15443667.62
...,...,...,...,...,...,...
255,Cambium_2022,MidCase,,SRVCc,2030,17312904.00
256,Cambium_2022,MidCase,,SRVCc,2035,8787109.00
257,Cambium_2022,MidCase,,SRVCc,2040,6132569.50
258,Cambium_2022,MidCase,,SRVCc,2045,4301606.50


In [93]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

def calculate_coal_projection_factors(df_cambium):
    """
    Interpolates coal_MWh and calculates coal projection factors for each region from 2018 to 2050,
    using 2018 coal generation as the reference point.

    Parameters
    ----------
    df_cambium : pandas.DataFrame
        DataFrame containing Cambium coal generation data with the following columns:
        - 'scenario': Scenario name or identifier.
        - 'gea_region': GEA region identifier.
        - 'year': Year of the data.
        - 'coal_MWh': Coal generation in MWh.

    Returns
    -------
    df_coal_factors : pandas.DataFrame
        DataFrame with interpolated coal_MWh values and a new column 'coal_projection_factors'.
    """
    # Create a copy of the dataframe
    df_cambium_copy = df_cambium.copy()

    # Create a new DataFrame to store interpolated results
    interpolated_data = []

    # Group by 'scenario' and 'gea_region'
    grouped = df_cambium_copy.groupby(['scenario', 'gea_region'])

    for (scenario, gea_region), group in grouped:
        # Extract existing years and coal_MWh values
        years = group['year'].values
        coal_MWh_values = group['coal_MWh'].values

        # Create interpolation function, allowing extrapolation
        coal_MWh_interp_func = interp1d(years, coal_MWh_values, kind='linear', bounds_error=False, fill_value="extrapolate")

        # Generate years from 2018 to 2050
        all_years = np.arange(2018, 2051)

        # Interpolate the coal_MWh values for these years
        interpolated_values = coal_MWh_interp_func(all_years)

        # Store the results in a DataFrame
        interpolated_group = pd.DataFrame({
            'scenario': scenario,
            'gea_region': gea_region,
            'year': all_years,
            'coal_MWh': interpolated_values
        })

        interpolated_data.append(interpolated_group)

    # Concatenate all the interpolated data into a single DataFrame
    df_interpolated = pd.concat(interpolated_data).reset_index(drop=True)

    # Get the coal_MWh value in 2018 for each scenario and gea_region
    coal_MWh_2018 = df_interpolated[df_interpolated['year'] == 2018][['scenario', 'gea_region', 'coal_MWh']]
    coal_MWh_2018 = coal_MWh_2018.set_index(['scenario', 'gea_region'])['coal_MWh']

    # Map the 2018 coal_MWh values to the DataFrame
    df_interpolated['coal_MWh_2018'] = df_interpolated.set_index(['scenario', 'gea_region']).index.map(coal_MWh_2018)

    # Avoid division by zero by replacing zero coal_MWh_2018 with NaN
    df_interpolated['coal_MWh_2018'] = df_interpolated['coal_MWh_2018'].replace(0, np.nan)

    # Conditions for regions other than CAMX
    condition_regions = (df_interpolated['gea_region'] != 'CAMX')

    # Calculate coal projection factors for regions other than CAMX
    df_interpolated.loc[condition_regions, 'coal_projection_factors'] = (
        df_interpolated.loc[condition_regions, 'coal_MWh'] / df_interpolated.loc[condition_regions, 'coal_MWh_2018']
    )

    # For CAMX region, assign coal_projection_factors as 1
    condition_CAMX = (df_interpolated['gea_region'] == 'CAMX')
    df_interpolated.loc[condition_CAMX, 'coal_projection_factors'] = 1

    # Replace any NaN or infinite values resulting from division by zero with 0
    df_interpolated['coal_projection_factors'] = df_interpolated['coal_projection_factors'].replace([np.inf, -np.inf, np.nan], 0)

    # Drop temporary columns
    df_interpolated.drop(columns=['coal_MWh_2018'], inplace=True)

    return df_interpolated


In [94]:
# Calculate Coal Generation Projection Factors
df_preIRA_coal_projection_factors = calculate_coal_projection_factors(df_preIRA_coal_generation)
df_preIRA_coal_projection_factors

Unnamed: 0,scenario,gea_region,year,coal_MWh,coal_projection_factors
0,MidCase,AZNMc,2018,44146154.50,1.000000
1,MidCase,AZNMc,2019,37909639.38,0.858730
2,MidCase,AZNMc,2020,26535437.00,0.601082
3,MidCase,AZNMc,2021,26836891.44,0.607910
4,MidCase,AZNMc,2022,26741465.00,0.605748
...,...,...,...,...,...
655,MidCase,SRVCc,2046,27152428.00,0.433227
656,MidCase,SRVCc,2047,27506921.00,0.438883
657,MidCase,SRVCc,2048,27861414.00,0.444539
658,MidCase,SRVCc,2049,21319390.00,0.340159


In [95]:
# Calculate Coal Generation Projection Factors
df_iraRef_coal_projection_factors = calculate_coal_projection_factors(df_iraRef_coal_generation)
df_iraRef_coal_projection_factors

Unnamed: 0,scenario,gea_region,year,coal_MWh,coal_projection_factors
0,MidCase,AZNMc,2018,44146154.50,1.000000
1,MidCase,AZNMc,2019,37909639.38,0.858730
2,MidCase,AZNMc,2020,26535437.00,0.601082
3,MidCase,AZNMc,2021,26836891.44,0.607910
4,MidCase,AZNMc,2022,26741465.00,0.605748
...,...,...,...,...,...
655,MidCase,SRVCc,2046,3826239.64,0.061049
656,MidCase,SRVCc,2047,3350872.78,0.053464
657,MidCase,SRVCc,2048,2875505.92,0.045880
658,MidCase,SRVCc,2049,2400139.06,0.038295


### Step 3: Obtain CPI-U Inflation Data
- Series Id:	CUUR0000SA0
- Not Seasonally Adjusted
- Series Title:	All items in U.S. city average, all urban consumers, not seasonally adjusted
- Area:	U.S. city average
- Item:	All items
- Base Period:	1982-84=100

In [96]:
# Load the BLS Inflation Data
filename = 'bls_cpiu_2005-2023.xlsx'
relative_path = os.path.join(r"inflation_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# Create a pandas dataframe
df_bls_cpiu = pd.read_excel(file_path, sheet_name='bls_cpiu')

df_bls_cpiu = pd.DataFrame({
    'year': df_bls_cpiu['Year'],
    'cpiu_annual': df_bls_cpiu['Annual']
})

# Obtain the Annual CPIU values for the years of interest
bls_cpi_annual_2008 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2008)].item()
bls_cpi_annual_2010 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2010)].item()
bls_cpi_annual_2013 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2013)].item()
bls_cpi_annual_2018 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2018)].item()
bls_cpi_annual_2019 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2019)].item()
bls_cpi_annual_2020 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2020)].item()
bls_cpi_annual_2021 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2021)].item()
bls_cpi_annual_2022 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2022)].item()
bls_cpi_annual_2023 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2023)].item()

# Precompute constant values
cpi_ratio_2023_2023 = bls_cpi_annual_2023 / bls_cpi_annual_2023
cpi_ratio_2023_2022 = bls_cpi_annual_2023 / bls_cpi_annual_2022
cpi_ratio_2023_2021 = bls_cpi_annual_2023 / bls_cpi_annual_2021  # For EPA VSL (11.3M USD-2021)
cpi_ratio_2023_2020 = bls_cpi_annual_2023 / bls_cpi_annual_2020  # For SCC
cpi_ratio_2023_2019 = bls_cpi_annual_2023 / bls_cpi_annual_2019 
cpi_ratio_2023_2018 = bls_cpi_annual_2023 / bls_cpi_annual_2018 
cpi_ratio_2023_2013 = bls_cpi_annual_2023 / bls_cpi_annual_2013
cpi_ratio_2023_2010 = bls_cpi_annual_2023 / bls_cpi_annual_2010
cpi_ratio_2023_2008 = bls_cpi_annual_2023 / bls_cpi_annual_2008  # For EPA VSL and SCC

Retrieved data for filename: bls_cpiu_2005-2023.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\inflation_data\bls_cpiu_2005-2023.xlsx


### Step 4: Use the updated Social Cost of Carbon (190 USD-2020/ton co2e) and inflate to USD-2023
- EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
- 190 USD-2020 has some inconsistency with the VSL being used. An old study and 2008 VSL is noted
- 190 USD value and inflate to USD 2023 because there is a clear source and ease of replicability.

In [97]:
# For co2e adjust SCC
EPA_SCC_USD2023_PER_TON = 190 * cpi_ratio_2023_2020

print(f"""
Steps 3 and 4: Obtain BLS CPI-U Data and Inflate Current Social Cost of Carbon (SCC) to USD2023
      
EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
Inflate 190 $USD-2020 Social Cost of Carbon to $USD-2023

SCC Value used in analysis is: ${round(EPA_SCC_USD2023_PER_TON, 2)} per mt CO2e
""")


Steps 3 and 4: Obtain BLS CPI-U Data and Inflate Current Social Cost of Carbon (SCC) to USD2023
      
EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
Inflate 190 $USD-2020 Social Cost of Carbon to $USD-2023

SCC Value used in analysis is: $223.69 per mt CO2e



### Step 4: Quantify monitized HEALTH damages using EASIUR Marginal Social Cost Factors
#### THE STEPS BELOW SUMMARIZE WHAT WAS DONE TO OBTAIN ALL NATIONAL EASIUR VALUES INCLUDED ON GITHUB
- Obtain all of the dwelling unit latitude and longitude values from the metadata columns
- Make a new dataframe of just the longitude and latitude values 
    - Make sure that the order is (longitude, latitude)
    - Do not include the index or column name when exporting 
- Export the CSV
- **Upload csv to EASIUR Website:**
    - Website: https://barney.ce.cmu.edu/~jinhyok/easiur/online/
    - See inputs in respective sections
- Download the file and put it in the 'easiur_batchConversion_download' folder
- Copy and paste the name of the file EASIUR generated when prompted
- Copy and paste the name of the filepath for the 'easiur_batchConversion_download' folder when prompted
- Match up the longitude and latitudes for each dwelling unit with the selected damages

### Fossil Fuels: EASIUR Marginal Damage (Social Cost) Factors Info
- Factor Type: Marginal Social Cost
- Calculation Method: Regression
- Metric: Marginal Social Cost [USD per metric ton]
- Dollar Year: 2010
- Income Year: 2018
- Population Year: 2018
- Aggregation: Longitude, and Latitude Coordinates
- Pollutants: Primary PM2.5, Sulfur Dioxide (SO2), Nitrogen Oxides (NOx), Ammonia (NH3)
- Elevation (Ground, 150m, 300m) and Seasons (Winter, Spring, Summer, Fall)

In [114]:
# Create a dataframe containing just the longitude and Latitude
df_EASIUR_batchConversion = pd.DataFrame({
    'Longitude':df_euss_am_baseline['in.weather_file_longitude'],
    'Latitude':df_euss_am_baseline['in.weather_file_latitude'],
})

# Drop duplicate rows based on 'Longitude' and 'Latitude' columns
df_EASIUR_batchConversion.drop_duplicates(subset=['Longitude', 'Latitude'], keep='first', inplace=True)

# Create a location ID for the name of the batch conversion file
while True:
    if menu_state == 'N':
        location_id = 'National'
        print("You chose to analyze all of the United States.")
        break
    elif menu_state == 'Y':
        if menu_city == 'N':
            try:
                location_id = str(input_state)
                print(f"Location ID is: {location_id}")
                break
            except ValueError:
                print("Invalid input for state!")
        elif menu_city == 'Y':
            try:
                location_id = input_cityFilter.replace(', ', '_').strip()
                print(f"Location ID is: {location_id}")
                break
            except AttributeError:
                print("Invalid input for city filter!")
        else:
            print("Incorrect state or city filter assignment!")
    else:
        print("Invalid data location. Check your inputs at the beginning of this notebook!")

# Updated GitHub code has EASIUR file with all unique latitude, longitude coordinates in the US
filename = 'easiur_National2024-06-1421-22.csv'
relative_path = os.path.join(r"margDamages_EASIUR\easiur_batchConversion_download", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_margSocialCosts = pd.read_csv(file_path)

# Convert from kg/MWh to lb/kWh
# Obtain value from the CSV file and convert to lbs pollutant per kWh 

# Define df_margSocialCosts_EASIUR DataFrame first
df_margSocialCosts_EASIUR = pd.DataFrame({
    'Longitude': df_margSocialCosts['Longitude'],
    'Latitude': df_margSocialCosts['Latitude'],
})
df_margSocialCosts_EASIUR

You chose to analyze all of the United States.
Retrieved data for filename: easiur_National2024-06-1421-22.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margDamages_EASIUR\easiur_batchConversion_download\easiur_National2024-06-1421-22.csv




Unnamed: 0,Longitude,Latitude,x,y,PM25 Annual Ground,PM25 Winter Ground,PM25 Spring Ground,PM25 Summer Ground,PM25 Fall Ground,SO2 Annual Ground,...,NOX Annual 300m,NOX Winter 300m,NOX Spring 300m,NOX Summer 300m,NOX Fall 300m,NH3 Annual 300m,NH3 Winter 300m,NH3 Spring 300m,NH3 Summer 300m,NH3 Fall 300m
0,-87.04,31.42,103,33,81072.352921,97983.065682,66122.238834,67093.464543,92772.783977,19955.830745,...,2483.710419,5929.794762,2232.787468,857.316844,823.300965,32236.247484,43160.622278,22326.210380,27742.933871,35523.886882
1,-85.86,33.59,106,40,122086.659729,149040.298282,111701.789450,94277.068143,132887.342049,21749.886294,...,3590.227171,8058.611768,3515.993478,1172.232495,1497.218427,45035.383139,57571.789826,33629.721297,39705.559055,48905.820895
2,-86.39,32.30,105,36,111079.763889,130673.294214,96866.726087,102723.887457,113635.286609,20685.101097,...,2935.774496,6702.323634,2612.117692,1279.482824,1049.983462,41446.800963,48977.921967,27814.892512,46317.760766,42467.417598
3,-85.45,31.32,108,34,92900.934172,109103.501316,76080.870209,85266.429711,100637.850615,20825.206151,...,2793.030035,6718.838552,2282.425801,1099.280251,970.074774,33185.456262,45103.101045,20799.662615,28677.411776,37902.133176
4,-86.75,33.56,103,40,210586.246246,239273.221876,200288.217454,185276.680504,217158.132018,23868.371679,...,3904.341520,8116.282158,4696.203368,1496.731684,1204.250267,67219.188151,77701.008208,63595.925126,67646.084789,59851.399474
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1024,-106.72,44.38,56,73,22039.484828,31488.281032,12364.586732,12580.294751,31712.802929,10711.825984,...,1711.170222,4765.897596,354.608117,115.532932,1541.745057,9483.584564,15249.799497,3791.953612,5543.820281,13347.159040
1025,-105.54,44.34,58,73,24398.158766,36094.854622,13764.885657,12374.329591,35351.688558,12910.498172,...,2174.378290,5591.793338,464.462503,196.267315,2374.571409,11429.479302,19973.941403,4219.051097,4585.702249,16922.536072
1026,-107.95,43.97,53,72,18881.775892,27708.279306,10197.591814,8352.447782,29154.889039,9542.723432,...,1324.757563,3540.476903,252.826004,101.678277,1349.397287,8939.007581,13060.681060,4351.246899,3642.042009,14697.004788
1027,-108.08,44.52,53,74,19824.063398,29425.186031,11196.853635,8785.910661,29878.593194,12250.358578,...,1636.477039,3529.366581,462.694413,255.114128,2247.879874,9751.985832,16313.581753,3835.375269,2597.783323,16238.070755


In [115]:
print("""
-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
""")
pollutants = ['so2', 'nox', 'pm25', 'co2']

# Create a lookup dictionary of emissions factors for fossil fuels
fossilFuel_factors = df_margEmis_factors[df_margEmis_factors['state'] == 'National']
emis_fossilFuel_lookup = {(row['fuel_type'], row['pollutant']): row['margEmis_factor_adjusted'] for _, row in fossilFuel_factors.iterrows()}

# FOSSIL FUELS DAMAGES LOOKUP
# Create a damages_fossilFuel_lookup dictionary from df_margSocialCosts_EASIUR
damages_fossilFuel_lookup = df_margSocialCosts_EASIUR.groupby(['Longitude', 'Latitude']).first().to_dict()
damages_fossilFuel_lookup


-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------



KeyError: 'margEmis_factor_adjusted'

### Step 4: Inflate Marginal Social Cost (Damage) Factors using BLS CPI for All Urban Consumers (CPI-U)
- Series Id:	CUUR0000SA0
- Not Seasonally Adjusted
- Series Title:	All items in U.S. city average, all urban consumers, not seasonally adjusted
- Area:	U.S. city average
- Item:	All items
- Base Period:	1982-84=100

### Use the updated Social Cost of Carbon (190 USD-2020/ton CO2) and inflate to USD-2022
- EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
- 190 USD-2020 has some inconsistency with the VSL being used. An old study and 2008 VSL is noted
- 190 USD value and inflate to USD 2022 because there is a clear source and ease of replicability.

### Adjustment for VSL
- EASIUR uses a VSL of 8.8M USD-2010 
- New EPA VSL is 11.3M USD-2021
- INFLATE TO $USD-2022

### ALL DOLLAR VALUES ARE NOW IN USD2022, PREVIOUSLY USED $USD-2021

## Electricity CEDM-EASIUR Marginal Damages: Current and Decarbonizing Grid
- Factor Type: Marginal
- Calculation Method: Regression
- Metric: Marginal Damages EASIUR [USD per MWh or kWh]
- Year: 2018
- Regional Aggregation: eGRID subregion (all regions)
- Pollutants: SO2, NOx, PM2.5 CO2

SCC Adjustment: We use the EPA suggested 190 USD-2020 value for the social cost of carbon and inflate to 2022 USD. **PREVIOUSLY USED 2021 USD**

VSL: "We use a value of a statistical life (VSL) of USD 8.8 million (in 2010 dollars) for both our AP2 and EASIUR calculations. EASIUR reports damage intensities in USD/metric ton using this VSL and dollar year."

In [101]:
# For CO2 adjust SCC
# Create an adjustment factor for the new Social Cost of Carbon (SCC)
epa_scc = 190 * cpi_ratio_2023_2020
old_scc = 40 * cpi_ratio_2023_2010
scc_adjustment_factor = epa_scc / old_scc

# For Health-Related Emissions Adjust for different Value of a Statistical Life (VSL) values
# Current VSL is $11.3 M USD2021
# INFLATE TO USD2022, PREVIOUSLY USD2021
current_VSL_USD2022 = 11.3 * cpi_ratio_2023_2021

# Easiur uses a VSL of $8.8 M USD2010
# INFLATE TO USD2022, PREVIOUSLY USD2021
easiur_VSL_USD2022 = 8.8 * (cpi_ratio_2023_2010)

# Calculate VSL adjustment factor
vsl_adjustment_factor = current_VSL_USD2022 / easiur_VSL_USD2022

### Electricity Damages from Health Related Emissions

In [99]:
filename = 'Generation-MARREG-DAMEASIUR-egrid-byYear_health2018.csv'
relative_path = os.path.join(r"margDamages_EASIUR", filename)
file_path = os.path.join(project_root, relative_path)
df_margDamages_health2018 = pd.read_csv(file_path, index_col=0)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

Retrieved data for filename: Generation-MARREG-DAMEASIUR-egrid-byYear_health2018.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margDamages_EASIUR\Generation-MARREG-DAMEASIUR-egrid-byYear_health2018.csv


In [105]:
# Marginal damages [$/kWh]
# Inflate from 2010 to 2022
# Note only 2018 data available, used in place of 2021
df_margDamages_EASIUR_health = pd.DataFrame({
    'subregion_eGRID': df_margDamages_health2018['region'],
    'pollutant': df_margDamages_health2018['pollutant'],
    'unit': '[$USD2023/kWh]',
    'margDamages_dollarPerkWh_adjustVSL_ref': (df_margDamages_health2018['factor'] * (vsl_adjustment_factor) * (1/1000)) * (cpi_ratio_2023_2010),
    'margDamages_dollarPerkWh_adjustVSL_2018': (df_margDamages_health2018['factor'] * (vsl_adjustment_factor) * (1/1000)) * (cpi_ratio_2023_2010)
})

# Apply the mapping to the 'cambium_gea_region' column
df_margDamages_EASIUR_health['cambium_gea_region'] = df_margDamages_EASIUR_health['subregion_eGRID'].map(mapping)
df_margDamages_EASIUR_health

Unnamed: 0,subregion_eGRID,pollutant,unit,margDamages_dollarPerkWh_adjustVSL_ref,margDamages_dollarPerkWh_adjustVSL_2018,cambium_gea_region
1,AZNM,so2,[$USD2023/kWh],0.001760,0.001760,AZNMc
2,AZNM,nox,[$USD2023/kWh],0.000575,0.000575,AZNMc
3,AZNM,pm25,[$USD2023/kWh],0.002026,0.002026,AZNMc
4,CAMX,so2,[$USD2023/kWh],0.000599,0.000599,CAMXc
5,CAMX,nox,[$USD2023/kWh],0.000714,0.000714,CAMXc
...,...,...,...,...,...,...
62,SRTV,nox,[$USD2023/kWh],0.002780,0.002780,SRTVc
63,SRTV,pm25,[$USD2023/kWh],0.007428,0.007428,SRTVc
64,SRVC,so2,[$USD2023/kWh],0.008226,0.008226,SRVCc
65,SRVC,nox,[$USD2023/kWh],0.002431,0.002431,SRVCc


#### Pre-IRA Projections

In [110]:
# Create dictionaries mapping 'gea_region' to marginal damage factors for each pollutant
# For SO2
so2_marg_damages = df_margDamages_EASIUR_health[df_margDamages_EASIUR_health['pollutant'] == 'so2'] \
    .set_index('cambium_gea_region')['margDamages_dollarPerkWh_adjustVSL_ref'].to_dict()

# For NOx
nox_marg_damages = df_margDamages_EASIUR_health[df_margDamages_EASIUR_health['pollutant'] == 'nox'] \
    .set_index('cambium_gea_region')['margDamages_dollarPerkWh_adjustVSL_ref'].to_dict()

# For PM2.5
pm25_marg_damages = df_margDamages_EASIUR_health[df_margDamages_EASIUR_health['pollutant'] == 'pm25'] \
    .set_index('cambium_gea_region')['margDamages_dollarPerkWh_adjustVSL_ref'].to_dict()

# Map the marginal damage factors to the projection factors dataframe
df_preIRA_coal_projection_factors['so2_marg_damage'] = df_preIRA_coal_projection_factors['gea_region'].map(so2_marg_damages)
df_preIRA_coal_projection_factors['nox_marg_damage'] = df_preIRA_coal_projection_factors['gea_region'].map(nox_marg_damages)
df_preIRA_coal_projection_factors['pm25_marg_damage'] = df_preIRA_coal_projection_factors['gea_region'].map(pm25_marg_damages)

# Calculate the new columns by multiplying the coal projection factors with the marginal damages
df_preIRA_coal_projection_factors['so2_dollarPerkWh_adjustVSL'] = (
    df_preIRA_coal_projection_factors['coal_projection_factors'] * df_preIRA_coal_projection_factors['so2_marg_damage']
)
df_preIRA_coal_projection_factors['nox_dollarPerkWh_adjustVSL'] = (
    df_preIRA_coal_projection_factors['coal_projection_factors'] * df_preIRA_coal_projection_factors['nox_marg_damage']
)
df_preIRA_coal_projection_factors['pm25_dollarPerkWh_adjustVSL'] = (
    df_preIRA_coal_projection_factors['coal_projection_factors'] * df_preIRA_coal_projection_factors['pm25_marg_damage']
)

# Optionally, drop the intermediate marginal damage columns if they're no longer needed
df_preIRA_coal_projection_factors.drop(
    columns=['so2_marg_damage', 'nox_marg_damage', 'pm25_marg_damage'], 
    inplace=True
)
df_preIRA_coal_projection_factors

Unnamed: 0,scenario,gea_region,year,coal_MWh,coal_projection_factors,so2_dollarPerkWh_adjustVSL,nox_dollarPerkWh_adjustVSL,pm25_dollarPerkWh_adjustVSL
0,MidCase,AZNMc,2018,44146154.50,1.000000,0.001760,0.000575,0.002026
1,MidCase,AZNMc,2019,37909639.38,0.858730,0.001512,0.000494,0.001740
2,MidCase,AZNMc,2020,26535437.00,0.601082,0.001058,0.000346,0.001218
3,MidCase,AZNMc,2021,26836891.44,0.607910,0.001070,0.000350,0.001232
4,MidCase,AZNMc,2022,26741465.00,0.605748,0.001066,0.000348,0.001227
...,...,...,...,...,...,...,...,...
655,MidCase,SRVCc,2046,27152428.00,0.433227,0.003564,0.001053,0.003264
656,MidCase,SRVCc,2047,27506921.00,0.438883,0.003610,0.001067,0.003307
657,MidCase,SRVCc,2048,27861414.00,0.444539,0.003657,0.001081,0.003349
658,MidCase,SRVCc,2049,21319390.00,0.340159,0.002798,0.000827,0.002563


In [111]:
import pandas as pd

# Assuming df_preIRA_coal_projection_factors is your dataframe

# Initialize the lookup dictionary
damages_preIRA_health_damages_lookup = {}

# Group the dataframe
grouped = df_preIRA_coal_projection_factors.groupby(['scenario', 'gea_region'])

for (scenario, gea_region), group in grouped:
    year_dict = {}
    for _, row in group.iterrows():
        year = int(row['year'])
        data_dict = {
            'so2_dollarPerkWh_adjustVSL': row['so2_dollarPerkWh_adjustVSL'],
            'nox_dollarPerkWh_adjustVSL': row['nox_dollarPerkWh_adjustVSL'],
            'pm25_dollarPerkWh_adjustVSL': row['pm25_dollarPerkWh_adjustVSL']
        }
        year_dict[year] = data_dict
    damages_preIRA_health_damages_lookup[(scenario, gea_region)] = year_dict

# Now, lookup_dict contains the desired nested dictionary
damages_preIRA_health_damages_lookup

{('MidCase',
  'AZNMc'): {2018: {'so2_dollarPerkWh_adjustVSL': 0.0017604045476692913,
   'nox_dollarPerkWh_adjustVSL': 0.000575015374879764,
   'pm25_dollarPerkWh_adjustVSL': 0.0020259847537919245}, 2019: {'so2_dollarPerkWh_adjustVSL': 0.0015117126807739248,
   'nox_dollarPerkWh_adjustVSL': 0.0004937831108176674,
   'pm25_dollarPerkWh_adjustVSL': 0.0017397744441280825}, 2020: {'so2_dollarPerkWh_adjustVSL': 0.0010581466154473767,
   'nox_dollarPerkWh_adjustVSL': 0.00034563110710250785,
   'pm25_dollarPerkWh_adjustVSL': 0.001217781965521054}, 2021: {'so2_dollarPerkWh_adjustVSL': 0.0010701676345622147,
   'nox_dollarPerkWh_adjustVSL': 0.0003495576311630751,
   'pm25_dollarPerkWh_adjustVSL': 0.0012316165136559971}, 2022: {'so2_dollarPerkWh_adjustVSL': 0.0010663623396085199,
   'nox_dollarPerkWh_adjustVSL': 0.00034831467646426797,
   'pm25_dollarPerkWh_adjustVSL': 0.0012272371398523595}, 2023: {'so2_dollarPerkWh_adjustVSL': 0.0012037391306330793,
   'nox_dollarPerkWh_adjustVSL': 0.000393187

#### IRA-Reference Projections

In [112]:
# Create dictionaries mapping 'gea_region' to marginal damage factors for each pollutant
# For SO2
so2_marg_damages = df_margDamages_EASIUR_health[df_margDamages_EASIUR_health['pollutant'] == 'so2'] \
    .set_index('cambium_gea_region')['margDamages_dollarPerkWh_adjustVSL_ref'].to_dict()

# For NOx
nox_marg_damages = df_margDamages_EASIUR_health[df_margDamages_EASIUR_health['pollutant'] == 'nox'] \
    .set_index('cambium_gea_region')['margDamages_dollarPerkWh_adjustVSL_ref'].to_dict()

# For PM2.5
pm25_marg_damages = df_margDamages_EASIUR_health[df_margDamages_EASIUR_health['pollutant'] == 'pm25'] \
    .set_index('cambium_gea_region')['margDamages_dollarPerkWh_adjustVSL_ref'].to_dict()

# Map the marginal damage factors to the projection factors dataframe
df_iraRef_coal_projection_factors['so2_marg_damage'] = df_iraRef_coal_projection_factors['gea_region'].map(so2_marg_damages)
df_iraRef_coal_projection_factors['nox_marg_damage'] = df_iraRef_coal_projection_factors['gea_region'].map(nox_marg_damages)
df_iraRef_coal_projection_factors['pm25_marg_damage'] = df_iraRef_coal_projection_factors['gea_region'].map(pm25_marg_damages)

# Calculate the new columns by multiplying the coal projection factors with the marginal damages
df_iraRef_coal_projection_factors['so2_dollarPerkWh_adjustVSL'] = (
    df_iraRef_coal_projection_factors['coal_projection_factors'] * df_iraRef_coal_projection_factors['so2_marg_damage']
)
df_iraRef_coal_projection_factors['nox_dollarPerkWh_adjustVSL'] = (
    df_iraRef_coal_projection_factors['coal_projection_factors'] * df_iraRef_coal_projection_factors['nox_marg_damage']
)
df_iraRef_coal_projection_factors['pm25_dollarPerkWh_adjustVSL'] = (
    df_iraRef_coal_projection_factors['coal_projection_factors'] * df_iraRef_coal_projection_factors['pm25_marg_damage']
)

# Optionally, drop the intermediate marginal damage columns if they're no longer needed
df_iraRef_coal_projection_factors.drop(
    columns=['so2_marg_damage', 'nox_marg_damage', 'pm25_marg_damage'], 
    inplace=True
)
df_iraRef_coal_projection_factors

Unnamed: 0,scenario,gea_region,year,coal_MWh,coal_projection_factors,so2_dollarPerkWh_adjustVSL,nox_dollarPerkWh_adjustVSL,pm25_dollarPerkWh_adjustVSL
0,MidCase,AZNMc,2018,44146154.50,1.000000,0.001760,0.000575,0.002026
1,MidCase,AZNMc,2019,37909639.38,0.858730,0.001512,0.000494,0.001740
2,MidCase,AZNMc,2020,26535437.00,0.601082,0.001058,0.000346,0.001218
3,MidCase,AZNMc,2021,26836891.44,0.607910,0.001070,0.000350,0.001232
4,MidCase,AZNMc,2022,26741465.00,0.605748,0.001066,0.000348,0.001227
...,...,...,...,...,...,...,...,...
655,MidCase,SRVCc,2046,3826239.64,0.061049,0.000502,0.000148,0.000460
656,MidCase,SRVCc,2047,3350872.78,0.053464,0.000440,0.000130,0.000403
657,MidCase,SRVCc,2048,2875505.92,0.045880,0.000377,0.000112,0.000346
658,MidCase,SRVCc,2049,2400139.06,0.038295,0.000315,0.000093,0.000289


In [113]:
import pandas as pd

# Assuming df_iraRef_coal_projection_factors is your dataframe

# Initialize the lookup dictionary
damages_iraRef_health_damages_lookup = {}

# Group the dataframe
grouped = df_iraRef_coal_projection_factors.groupby(['scenario', 'gea_region'])

for (scenario, gea_region), group in grouped:
    year_dict = {}
    for _, row in group.iterrows():
        year = int(row['year'])
        data_dict = {
            'so2_dollarPerkWh_adjustVSL': row['so2_dollarPerkWh_adjustVSL'],
            'nox_dollarPerkWh_adjustVSL': row['nox_dollarPerkWh_adjustVSL'],
            'pm25_dollarPerkWh_adjustVSL': row['pm25_dollarPerkWh_adjustVSL']
        }
        year_dict[year] = data_dict
    damages_iraRef_health_damages_lookup[(scenario, gea_region)] = year_dict

# Now, lookup_dict contains the desired nested dictionary
damages_iraRef_health_damages_lookup

{('MidCase',
  'AZNMc'): {2018: {'so2_dollarPerkWh_adjustVSL': 0.0017604045476692913,
   'nox_dollarPerkWh_adjustVSL': 0.000575015374879764,
   'pm25_dollarPerkWh_adjustVSL': 0.0020259847537919245}, 2019: {'so2_dollarPerkWh_adjustVSL': 0.0015117126807739248,
   'nox_dollarPerkWh_adjustVSL': 0.0004937831108176674,
   'pm25_dollarPerkWh_adjustVSL': 0.0017397744441280825}, 2020: {'so2_dollarPerkWh_adjustVSL': 0.0010581466154473767,
   'nox_dollarPerkWh_adjustVSL': 0.00034563110710250785,
   'pm25_dollarPerkWh_adjustVSL': 0.001217781965521054}, 2021: {'so2_dollarPerkWh_adjustVSL': 0.0010701676345622147,
   'nox_dollarPerkWh_adjustVSL': 0.0003495576311630751,
   'pm25_dollarPerkWh_adjustVSL': 0.0012316165136559971}, 2022: {'so2_dollarPerkWh_adjustVSL': 0.0010663623396085199,
   'nox_dollarPerkWh_adjustVSL': 0.00034831467646426797,
   'pm25_dollarPerkWh_adjustVSL': 0.0012272371398523595}, 2023: {'so2_dollarPerkWh_adjustVSL': 0.0009887543357298897,
   'nox_dollarPerkWh_adjustVSL': 0.000322964

In [None]:
# Combine them top to bottom
df_margDamages_EASIUR = pd.concat([df_margDamages_EASIUR_health], ignore_index=True)
df_margDamages_EASIUR

### Step 5: Calculate End-use specific marginal damages
**I used the total emissions column for each of the end uses for the following reasons:**
- Most homes only have 1 of each end-use, so it is unlikely that the homes have a significant consumption values from different fuel types. Thus, the total consumption and total emissions column (sum of each dwelling units consumption by end-use for each fuel) is fine to use to calculate marginal damages (social cost)
- We can visualize the emissions in 2 by 2 grid (CO2, PM25, SO2, NOx) with each appliance's heating fuel in a different shape or color. 

### Baseline Marginal Damages: WHOLE-HOME

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------
""")

# calculate_marginal_damages(df, menu_mp, policy_scenario)
df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
                                                      menu_mp=menu_mp,
                                                      policy_scenario='No Inflation Reduction Act',
                                                      drop_pollutant_damage_cols=True
                                                     )
df_euss_am_baseline_home

### Electricity - Method 3: CEDM Marginal Emissions Factors and EASIUR (Use Cambium CO2e reductions as a proxy)

### Step 5: Calculate End-use specific marginal damages

### Baseline Marginal Damages: WHOLE-HOME

In [34]:
import numpy as np
import pandas as pd

# Constants
TD_LOSSES = 0.06
TD_LOSSES_MULTIPLIER = 1 / (1 - TD_LOSSES)
EQUIPMENT_SPECS = {'heating': 15, 'waterHeating': 12, 'clothesDrying': 13, 'cooking': 15}

def calculate_marginal_damages(df, menu_mp, policy_scenario, df_summary):
    """
    Calculate marginal damages of pollutants based on equipment usage, emissions, and policy scenarios.

    Parameters:
        df (DataFrame): Input data with emissions and consumption data.
        menu_mp (int): Measure package identifier.
        policy_scenario (str): Specifies the policy scenario ('No Inflation Reduction Act' or 'AEO2023 Reference Case').
        df_summary (DataFrame): Summary DataFrame to store aggregated results.

    Returns:
        DataFrame: Updated DataFrame with calculated marginal emissions and damages.
    """
    df_copy = df.copy()

    # Define policy-specific settings
    scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(menu_mp, policy_scenario)

    # Precompute HDD adjustment factors by region and year
    hdd_factors_per_year = precompute_hdd_factors(df_copy)

    # Compute marginal damages based on grid scenario
    df_new_columns = calculate_damages_grid_scenario(
        df_copy, df_summary, menu_mp, TD_LOSSES_MULTIPLIER, emis_electricity_lookup,
        cambium_scenario, scenario_prefix, hdd_factors_per_year
    )

    # Handle overlapping columns
    overlapping_columns = df_new_columns.columns.intersection(df_copy.columns)
    if not overlapping_columns.empty:
        df_copy.drop(columns=overlapping_columns, inplace=True)

    # Merge newly calculated columns
    df_copy = df_copy.join(df_new_columns, how='left')
    return df_copy


def define_scenario_settings(menu_mp, policy_scenario):
    """
    Define scenario-specific settings based on menu and policy inputs.

    Parameters:
        menu_mp (int): Measure package identifier.
        policy_scenario (str): Policy scenario.

    Returns:
        Tuple: (scenario_prefix, cambium_scenario, emis_electricity_lookup)
    """
    if menu_mp == 0:
        return "baseline_", "MidCase", emis_preIRA_co2e_cambium21_lookup

    if policy_scenario == 'No Inflation Reduction Act':
        return f"preIRA_mp{menu_mp}_", "MidCase", emis_preIRA_co2e_cambium21_lookup

    if policy_scenario == 'AEO2023 Reference Case':
        return f"iraRef_mp{menu_mp}_", "MidCase", emis_IRA_co2e_cambium22_lookup

    raise ValueError("Invalid Policy Scenario! Choose 'No Inflation Reduction Act' or 'AEO2023 Reference Case'.")


def precompute_hdd_factors(df):
    """
    Precompute heating degree day (HDD) factors for each region and year.

    Parameters:
        df (DataFrame): Input data.

    Returns:
        dict: HDD factors mapped by year and region.
    """
    max_lifetime = max(EQUIPMENT_SPECS.values())
    hdd_factors_per_year = {}
    for year_label in range(2024, 2024 + max_lifetime + 1):
        # Map census_division to HDD factors
        hdd_factors = df['census_division'].map(
            lambda x: hdd_factor_lookup.get(x, hdd_factor_lookup['National']).get(year_label, 1.0)
        )
        hdd_factors_per_year[year_label] = hdd_factors
    return hdd_factors_per_year


def calculate_damages_grid_scenario(df, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup,
                                    cambium_scenario, scenario_prefix, hdd_factors):
    """
    Calculate damages for electricity grid emissions under different scenarios.

    Parameters:
        df (DataFrame): Input DataFrame.
        df_summary (DataFrame): DataFrame to store summary results.
        menu_mp (int): Measure package identifier.
        td_losses_multiplier (float): Adjusted factor for transmission/distribution losses.
        emis_electricity_lookup (dict): Lookup for emissions data.
        cambium_scenario (str): Scenario identifier for emissions data.
        scenario_prefix (str): Prefix for column naming.
        hdd_factors (dict): Precomputed HDD adjustment factors.

    Returns:
        DataFrame: New columns with calculated emissions and damages.
    """
    new_columns_data = {}
    for category, lifetime in EQUIPMENT_SPECS.items():
        for mer_type in ['lrmer', 'srmer']:
            process_emissions_for_category(
                df, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup,
                cambium_scenario, scenario_prefix, hdd_factors, new_columns_data,
                category, lifetime, mer_type
            )

    return pd.DataFrame(new_columns_data, index=df.index)


def process_emissions_for_category(df, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup,
                                   cambium_scenario, scenario_prefix, hdd_factors, new_columns_data,
                                   category, lifetime, mer_type):
    """
    Process emissions and damages for a specific category and MER type.

    Parameters:
        df (DataFrame): Input DataFrame.
        df_summary (DataFrame): Summary DataFrame.
        category (str): Equipment category (e.g., heating, cooking).
        lifetime (int): Equipment lifetime in years.
        mer_type (str): Marginal emissions type ('lrmer' or 'srmer').
        hdd_factors (dict): Precomputed HDD factors.
    """
    lifetime_emissions = np.zeros(len(df))
    lifetime_damages = np.zeros(len(df))

    for year in range(1, lifetime + 1):
        year_label = year + 2023
        emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)

        # Calculate electricity emissions
        emis_electricity = calculate_electricity_emissions(
            df, category, hdd_factors[year_label], td_losses_multiplier, emis_electricity_lookup,
            cambium_scenario, year_label, mer_type
        )

        # Calculate fossil fuel emissions
        fossil_fuel_emissions = calculate_fossil_fuel_emissions(
            df, category, hdd_factors[year_label], emission_factors=emis_factors_fossil_fuels
        )

        # Total emissions and damages
        total_emissions = fossil_fuel_emissions + emis_electricity
        total_damages = total_emissions * EPA_SCC_USD2023_PER_TON

        # Store results
        new_columns_data[emis_col] = np.round(total_emissions, 2)
        new_columns_data[damage_col] = np.round(total_damages, 2)
        lifetime_emissions += total_emissions
        lifetime_damages += total_damages

    # Lifetime and avoided emissions
    store_lifetime_and_avoided_emissions(df, df_summary, scenario_prefix, category, mer_type,
                                         lifetime_emissions, lifetime_damages, new_columns_data)


def generate_column_names(scenario_prefix, year_label, category, mer_type):
    """
    Generate column names for emissions and damages.

    Parameters:
        scenario_prefix (str): Scenario prefix.
        year_label (int): Year of calculation.
        category (str): Equipment category.
        mer_type (str): Marginal emissions type.

    Returns:
        tuple: Emission and damage column names.
    """
    emis_col = f'{scenario_prefix}{year_label}_{category}_tons_co2e_{mer_type}'
    damage_col = f'{scenario_prefix}{year_label}_{category}_damages_climate_{mer_type}'
    return emis_col, damage_col


def calculate_electricity_emissions(df, category, hdd_factor, td_losses_multiplier,
                                    emis_electricity_lookup, cambium_scenario, year_label, mer_type):
    """
    Calculate electricity emissions for a category.

    Returns:
        Series: Calculated emissions.
    """
    # Precompute emission factors for each gea_region
    emis_factors = df['gea_region'].map(
        lambda gea_region: emis_electricity_lookup.get(
            (cambium_scenario, gea_region), {}
        ).get(year_label, {}).get(f'{mer_type}_co2e', 0)
    )

    return (
        df[f'base_electricity_{category}_consumption'] *
        hdd_factor * td_losses_multiplier *
        emis_factors.fillna(0)
    )


def calculate_fossil_fuel_emissions(df, category, hdd_factor, emission_factors=emis_factors_fossil_fuels):
    """
    Calculate fossil fuel emissions for a category using the provided emission factors lookup dictionary.

    Parameters:
        df (DataFrame): Input DataFrame containing fuel consumption data.
        category (str): Equipment category (e.g., 'heating', 'cooking').
        hdd_factor (float or Series): Heating Degree Day adjustment factor.
        emission_factors (dict): Lookup dictionary for emission factors.

    Returns:
        Series: Combined fossil fuel emissions (in tons CO2e).
    """
    # Access emission factors from the lookup dictionary
    natural_gas_factor = emission_factors['naturalGas']['co2e']
    propane_factor = emission_factors['propane']['co2e']
    fuel_oil_factor = emission_factors['fuelOil']['co2e']

    # Calculate emissions for each fuel type
    emis_naturalGas = (
        df[f'base_naturalGas_{category}_consumption'] * hdd_factor * natural_gas_factor
    )
    emis_propane = (
        df[f'base_propane_{category}_consumption'] * hdd_factor * propane_factor
    )

    # Fuel oil is not used for cooking or clothes drying
    if category not in ['cooking', 'clothesDrying']:
        emis_fuelOil = (
            df[f'base_fuelOil_{category}_consumption'] * hdd_factor * fuel_oil_factor
        )
    else:
        emis_fuelOil = pd.Series(0, index=df.index)

    # Sum the emissions from all applicable fuel types
    total_emissions = (
        emis_naturalGas.fillna(0) +
        emis_propane.fillna(0) +
        emis_fuelOil.fillna(0)
    )

    return total_emissions


def store_lifetime_and_avoided_emissions(df, df_summary, scenario_prefix, category, mer_type,
                                         lifetime_emissions, lifetime_damages, new_columns_data):
    """
    Store lifetime and avoided emissions in the summary DataFrame and new columns.
    """
    lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
    lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'

    new_columns_data[lifetime_emissions_col] = np.round(lifetime_emissions, 2)
    new_columns_data[lifetime_damages_col] = np.round(lifetime_damages, 2)

    df_summary[lifetime_emissions_col] = np.round(lifetime_emissions, 2)
    df_summary[lifetime_damages_col] = np.round(lifetime_damages, 2)


In [35]:
print("""
-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------
""")
# Make copies from scenario consumption to keep df smaller
print("\n", "Creating dataframe to store marginal damages calculations ...")
df_baseline_scenario_damages = df_euss_am_baseline_home.copy()

# calculate_marginal_damages(df, menu_mp, policy_scenario)
df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
                                                      menu_mp=menu_mp,
                                                      policy_scenario='No Inflation Reduction Act',
                                                      df_summary=df_baseline_scenario_damages
                                                     )
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------


 Creating dataframe to store marginal damages calculations ...


Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,reeds_balancing_area,gea_region,state,city,...,baseline_2035_cooking_tons_co2e_srmer,baseline_2035_cooking_damages_climate_srmer,baseline_2036_cooking_tons_co2e_srmer,baseline_2036_cooking_damages_climate_srmer,baseline_2037_cooking_tons_co2e_srmer,baseline_2037_cooking_damages_climate_srmer,baseline_2038_cooking_tons_co2e_srmer,baseline_2038_cooking_damages_climate_srmer,baseline_cooking_lifetime_tons_co2e_srmer,baseline_cooking_lifetime_damages_climate_srmer
2,239,1690.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,0.03,5.75,0.03,5.66,0.02,5.57,0.02,5.48,0.40,89.27
3,273,1690.0,South,East South Central,East South Central,Mixed-Humid,90,SRSOc,AL,In another census Place,...,0.43,95.47,0.42,94.00,0.41,92.47,0.41,90.96,6.62,1481.71
4,307,1220.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,0.43,95.47,0.42,94.00,0.41,92.47,0.41,90.96,6.62,1481.71
5,409,1220.0,South,East South Central,East South Central,Hot-Humid,90,SRSOc,AL,Not in a census Place,...,0.03,5.96,0.03,5.87,0.03,5.77,0.03,5.68,0.41,92.51
7,517,1220.0,South,East South Central,East South Central,Mixed-Humid,89,SRSOc,AL,In another census Place,...,0.30,67.03,0.30,66.00,0.29,64.93,0.29,63.87,4.65,1040.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548905,548109,1690.0,West,Mountain,Mountain North,Cold,23,RMPAc,WY,In another census Place,...,0.31,69.28,0.30,67.07,0.30,66.81,0.30,66.54,4.91,1098.83
548907,548226,2176.0,West,Mountain,Mountain North,Cold,23,RMPAc,WY,In another census Place,...,0.31,69.28,0.30,67.07,0.30,66.81,0.30,66.54,4.91,1098.83
548908,548228,1690.0,West,Mountain,Mountain North,Cold,24,RMPAc,WY,Not in a census Place,...,0.31,69.28,0.30,67.07,0.30,66.81,0.30,66.54,4.91,1098.83
548910,548417,885.0,West,Mountain,Mountain North,Cold,24,RMPAc,WY,Casper,...,0.37,83.10,0.36,80.45,0.36,80.13,0.36,79.81,5.89,1317.96


In [43]:
import unittest
import pandas as pd
import numpy as np

# Mock data and constants for testing
EQUIPMENT_SPECS = {'heating': 15, 'waterHeating': 12, 'clothesDrying': 13, 'cooking': 15}

emis_preIRA_co2e_cambium21_lookup = {
    ('MidCase', 'Region1'): {
        2024: {'lrmer_co2e': 0.5, 'srmer_co2e': 0.6},
        2025: {'lrmer_co2e': 0.4, 'srmer_co2e': 0.5},
    },
    ('MidCase', 'Region2'): {
        2024: {'lrmer_co2e': 0.3, 'srmer_co2e': 0.35},
        2025: {'lrmer_co2e': 0.25, 'srmer_co2e': 0.3},
    },
}

emis_IRA_co2e_cambium22_lookup = emis_preIRA_co2e_cambium21_lookup  # Using the same for simplicity

EPA_SCC_USD2023_PER_TON = 51  # Example value

emis_factors_fossil_fuels = {
    'naturalGas': {'co2e': 0.2},
    'propane': {'co2e': 0.25},
    'fuelOil': {'co2e': 0.3},
}

hdd_factor_lookup = {
    'Division1': {2024: 1.1, 2025: 1.0},
    'Division2': {2024: 0.9, 2025: 0.95},
    'National': {2024: 1.0, 2025: 1.0},
}

class TestMarginalDamagesCalculations(unittest.TestCase):
    def setUp(self):
        # Create a sample DataFrame for testing
        data = {
            'census_division': ['Division1', 'Division2'],
            'gea_region': ['Region1', 'Region2'],
        }
        
        # Add consumption data for all equipment categories
        for category in EQUIPMENT_SPECS.keys():
            data[f'base_electricity_{category}_consumption'] = [1000, 2000]
            data[f'base_naturalGas_{category}_consumption'] = [500, 600]
            data[f'base_propane_{category}_consumption'] = [300, 400]
            # Fuel oil is not used for 'cooking' and 'clothesDrying'
            if category not in ['cooking', 'clothesDrying']:
                data[f'base_fuelOil_{category}_consumption'] = [200, 300]
        
        self.df = pd.DataFrame(data)
        # Sample df_summary DataFrame
        self.df_summary = pd.DataFrame(index=self.df.index)

    # (Your test methods remain the same.)

    def test_define_scenario_settings(self):
        # Test with menu_mp = 0
        scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(0, 'AEO2023 Reference Case')
        self.assertEqual(scenario_prefix, 'baseline_')
        self.assertEqual(cambium_scenario, 'MidCase')
        self.assertEqual(emis_electricity_lookup, emis_preIRA_co2e_cambium21_lookup)

        # Test with 'No Inflation Reduction Act' policy
        scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(1, 'No Inflation Reduction Act')
        self.assertEqual(scenario_prefix, 'preIRA_mp1_')
        self.assertEqual(emis_electricity_lookup, emis_preIRA_co2e_cambium21_lookup)

        # Test with 'AEO2023 Reference Case' policy
        scenario_prefix, cambium_scenario, emis_electricity_lookup = define_scenario_settings(2, 'AEO2023 Reference Case')
        self.assertEqual(scenario_prefix, 'iraRef_mp2_')
        self.assertEqual(emis_electricity_lookup, emis_IRA_co2e_cambium22_lookup)

        # Test with an invalid policy scenario
        with self.assertRaises(ValueError):
            define_scenario_settings(1, 'Invalid Policy')

    def test_precompute_hdd_factors(self):
        hdd_factors = precompute_hdd_factors(self.df)
        # Check that the HDD factors are correctly computed
        self.assertIn(2024, hdd_factors)
        self.assertIn(2025, hdd_factors)

        np.testing.assert_array_almost_equal(
            hdd_factors[2024],
            pd.Series([1.1, 0.9], index=self.df.index)
        )
        np.testing.assert_array_almost_equal(
            hdd_factors[2025],
            pd.Series([1.0, 0.95], index=self.df.index)
        )

    def test_calculate_electricity_emissions(self):
        hdd_factor = pd.Series([1.1, 0.9], index=self.df.index)
        td_losses_multiplier = 1 / (1 - 0.06)
        year_label = 2024
        mer_type = 'lrmer'
        cambium_scenario = 'MidCase'

        emis_electricity = calculate_electricity_emissions(
            self.df, 'heating', hdd_factor, td_losses_multiplier,
            emis_preIRA_co2e_cambium21_lookup, cambium_scenario, year_label, mer_type
        )

        expected_emissions = pd.Series([
            1000 * 1.1 * td_losses_multiplier * 0.5,
            2000 * 0.9 * td_losses_multiplier * 0.3
        ], index=self.df.index)

        np.testing.assert_array_almost_equal(emis_electricity, expected_emissions)

    def test_calculate_fossil_fuel_emissions(self):
        hdd_factor = pd.Series([1.1, 0.9], index=self.df.index)

        emissions = calculate_fossil_fuel_emissions(
            self.df, 'heating', hdd_factor, emission_factors=emis_factors_fossil_fuels
        )

        expected_emissions = pd.Series([
            500 * 1.1 * 0.2 + 300 * 1.1 * 0.25 + 200 * 1.1 * 0.3,
            600 * 0.9 * 0.2 + 400 * 0.9 * 0.25 + 300 * 0.9 * 0.3
        ], index=self.df.index)

        np.testing.assert_array_almost_equal(emissions, expected_emissions)

    def test_process_emissions_for_category(self):
        new_columns_data = {}
        category = 'heating'
        lifetime = EQUIPMENT_SPECS[category]
        mer_type = 'lrmer'
        scenario_prefix = 'test_'
        cambium_scenario = 'MidCase'
        td_losses_multiplier = TD_LOSSES_MULTIPLIER

        hdd_factors = precompute_hdd_factors(self.df)

        process_emissions_for_category(
            self.df, self.df_summary, 1, td_losses_multiplier,
            emis_preIRA_co2e_cambium21_lookup, cambium_scenario,
            scenario_prefix, hdd_factors, new_columns_data,
            category, lifetime, mer_type
        )

        # Check that new_columns_data has expected keys
        expected_columns = []
        for year in range(1, lifetime + 1):
            year_label = year + 2023
            emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)
            expected_columns.extend([emis_col, damage_col])

        # Add lifetime columns
        lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
        lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
        expected_columns.extend([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(new_columns_data.keys()), set(expected_columns))

    def test_calculate_damages_grid_scenario(self):
        scenario_prefix = 'test_'
        cambium_scenario = 'MidCase'
        td_losses_multiplier = TD_LOSSES_MULTIPLIER
        hdd_factors = precompute_hdd_factors(self.df)

        df_new_columns = calculate_damages_grid_scenario(
            self.df, self.df_summary, 1, td_losses_multiplier,
            emis_preIRA_co2e_cambium21_lookup, cambium_scenario,
            scenario_prefix, hdd_factors
        )

        # Check that df_new_columns has expected columns
        expected_columns = []
        for category, lifetime in EQUIPMENT_SPECS.items():
            for mer_type in ['lrmer', 'srmer']:
                for year in range(1, lifetime + 1):
                    year_label = year + 2023
                    emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)
                    expected_columns.extend([emis_col, damage_col])

                # Lifetime columns
                lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
                lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
                expected_columns.extend([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(df_new_columns.columns), set(expected_columns))

    def test_calculate_marginal_damages(self):
        # Run the full calculation and check outputs
        menu_mp = 1
        policy_scenario = 'No Inflation Reduction Act'

        df_result = calculate_marginal_damages(self.df, menu_mp, policy_scenario, self.df_summary)

        # Expected columns
        expected_columns = set(self.df.columns)
        scenario_prefix = 'preIRA_mp1_'
        for category, lifetime in EQUIPMENT_SPECS.items():
            for mer_type in ['lrmer', 'srmer']:
                for year in range(1, lifetime + 1):
                    year_label = year + 2023
                    emis_col, damage_col = generate_column_names(scenario_prefix, year_label, category, mer_type)
                    expected_columns.update([emis_col, damage_col])

                # Lifetime columns
                lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
                lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
                expected_columns.update([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(df_result.columns), expected_columns)

        # Check that df_summary has the lifetime columns
        expected_summary_columns = []
        for category, lifetime in EQUIPMENT_SPECS.items():
            for mer_type in ['lrmer', 'srmer']:
                lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
                lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'
                expected_summary_columns.extend([lifetime_emissions_col, lifetime_damages_col])

        self.assertEqual(set(self.df_summary.columns), set(expected_summary_columns))

# if __name__ == '__main__':
#     unittest.main(argv=['first-arg-is-ignored'], exit=False)
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False, verbosity=2)



test_calculate_damages_grid_scenario (__main__.TestMarginalDamagesCalculations.test_calculate_damages_grid_scenario) ... ok
test_calculate_electricity_emissions (__main__.TestMarginalDamagesCalculations.test_calculate_electricity_emissions) ... ok
test_calculate_fossil_fuel_emissions (__main__.TestMarginalDamagesCalculations.test_calculate_fossil_fuel_emissions) ... ok
test_calculate_marginal_damages (__main__.TestMarginalDamagesCalculations.test_calculate_marginal_damages) ... ok
test_define_scenario_settings (__main__.TestMarginalDamagesCalculations.test_define_scenario_settings) ... ok
test_precompute_hdd_factors (__main__.TestMarginalDamagesCalculations.test_precompute_hdd_factors) ... ok
test_process_emissions_for_category (__main__.TestMarginalDamagesCalculations.test_process_emissions_for_category) ... ok

----------------------------------------------------------------------
Ran 7 tests in 0.231s

OK


## Private Perspective: Annual Energy Costs

### Step 1: Obtain Level Energy Fuel Cost Data from the EIA
**Data Sources for Excel workbook containing state average Residential fuel cost for each fuel in 2018**
- EIA State Electricity Price: https://www.eia.gov/electricity/state/archive/2018/
- EIA Natural Gas Prices: https://www.eia.gov/dnav/ng/ng_pri_sum_dcu_SPA_a.htm
- Propane and Fuel Oil: EIA March 2023 Short Term Energy Outlook
    - https://www.eia.gov/outlooks/steo/pdf/wf01.pdf
    - Table WF01: Average Consumer Prices and Expenditures for Heating Fuels During the Winter
    - US Average: 2018-2019 Data

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Private Perspective: Annual Energy Costs
-------------------------------------------------------------------------------------------------------
- Step 1: Obtain Level Energy Fuel Cost Data from the EIA
- Step 2: Calculate Annual Operating (Fuel) Costs
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Step 1: Obtain Level Energy Fuel Cost Data from the EIA
-------------------------------------------------------------------------------------------------------
**Data Sources for Excel workbook containing state average Residential fuel cost for each fuel in 2018**
- EIA State Electricity Price: https://www.eia.gov/electricity/state/archive/2018/
- EIA Natural Gas Prices: https://www.eia.gov/dnav/ng/ng_pri_sum_dcu_SPA_a.htm
- Propane and Fuel Oil: EIA March 2023 Short Term Energy Outlook
    - https://www.eia.gov/outlooks/steo/pdf/wf01.pdf
    - Table WF01: Average Consumer Prices and Expenditures for Heating Fuels During the Winter
    - US Average: 2018-2019 Data
-------------------------------------------------------------------------------------------------------
""")

filename = 'fuel_prices_nominal.csv'
relative_path = os.path.join(r"fuel_prices", filename)
file_path = os.path.join(project_root, relative_path)
df_fuelPrices_perkWh = pd.read_csv(file_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# New units for the converted and inflated prices below
# $USD-2023, PREVIOUSLY USED $USD-2021
df_fuelPrices_perkWh['units'] = 'USD2022 per kWh'

years = ['2018', '2019', '2020', '2021', '2022']

# Take dataframe with nominal prices in their base units and convert to $/kWh equivalent
# https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php
for year in years:
    for index, row in df_fuelPrices_perkWh.iterrows():
        
        # Propane: (dollars per gallon) * (1 gallon propane/91,452 BTU) * (3412 BTU/1 kWh)
        if row['fuel_type'] == 'propane':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/91452) * (3412/1)
        
        # Fuel Oil: (dollars/gallon) * (1 gallon heating oil/138,500 BTU) * (3412 BTU/1 kWh)
        elif row['fuel_type'] == 'fuelOil':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/138500) * (3412/1)
        
        # Natural Gas: (dollars/cf) * (thousand cf/1000 cf) * (1 cf natural gas/1039 BTU) * (3412 BTU/1 kWh)
        elif row['fuel_type'] == 'naturalGas':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/1000) * (1/1039) * (3412/1)
        
        # Electricity: convert cents per kWh to $ per kWh
        elif row['fuel_type'] == 'electricity':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] / 100

# Convert nominal dollars to real 2022 US dollars (USD2022)
# $USD-2023, PREVIOUSLY USED $USD-2021
df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] * cpi_ratio_2023_2018
df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] * cpi_ratio_2023_2019
df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] * cpi_ratio_2023_2020
df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] * cpi_ratio_2023_2021
df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] * cpi_ratio_2023_2022

# Original dictionary mapping census divisions to states
map_states_census_divisions = {
    "New England": ["CT", "ME", "MA", "NH", "RI", "VT"],
    "Middle Atlantic": ["NJ", "NY", "PA"],
    "East North Central": ["IN", "IL", "MI", "OH", "WI"],
    "West North Central": ["IA", "KS", "MN", "MO", "NE", "ND", "SD"],
    "South Atlantic": ["DE", "DC", "FL", "GA", "MD", "NC", "SC", "VA", "WV"],
    "East South Central": ["AL", "KY", "MS", "TN"],
    "West South Central": ["AR", "LA", "OK", "TX"],
    "Mountain": ["AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY"],
    "Pacific": ["AK", "CA", "HI", "OR", "WA"]
}

# Reverse the mapping to create a state-to-census-division map
state_to_census_division = {}
for division, states in map_states_census_divisions.items():
    for state in states:
        state_to_census_division[state] = division

# Function to map location to census division
def map_location_to_census_division(location):
    if location in state_to_census_division:
        return state_to_census_division[location]
    return location

# Apply the function to map locations using .loc
df_fuelPrices_perkWh.loc[:, 'census_division'] = df_fuelPrices_perkWh['location_map'].apply(map_location_to_census_division)
# print(df_fuelPrices_perkWh)

In [None]:
# Project Fuel Prices from 2022 to 2050
filename = 'aeo_projections_2022_2050.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_fuelPrices_projection_factors = pd.read_excel(io=file_path, sheet_name='fuel_price_factors_2022_2050')

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
# print(df_fuelPrices_projection_factors)

# Convert the factors dataframe into a lookup dictionary including policy_scenario
factor_dict = df_fuelPrices_projection_factors.set_index(['region', 'fuel_type', 'policy_scenario']).to_dict('index')
# print(factor_dict)

In [None]:
# Pre-IRA policy_scenario: No Inflation Reduction Act
# Pass the desired policy_scenario as a parameter when applying the function
preIRA_projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict, 'No Inflation Reduction Act'), axis=1)

# Concatenate the projected prices with the original DataFrame
df_fuelPrices_perkWh_preIRA = pd.concat([df_fuelPrices_perkWh, preIRA_projected_prices_df], axis=1)

# Create Fuel Price Lookup with the policy_scenario included
preIRA_fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_preIRA, 'No Inflation Reduction Act')
# print(preIRA_fuel_price_lookup)

In [None]:
# IRA-Reference policy_scenario: AEO2023 Reference Case
# Pass the desired policy_scenario as a parameter when applying the function
iraRef_projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict, 'AEO2023 Reference Case'), axis=1)

# Concatenate the projected prices with the original DataFrame
df_fuelPrices_perkWh_iraRef = pd.concat([df_fuelPrices_perkWh, iraRef_projected_prices_df], axis=1)

# Create Fuel Price Lookup with the policy_scenario included
iraRef_fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_iraRef, 'AEO2023 Reference Case')
# print(iraRef_fuel_price_lookup)

### Step 2: Calculate Annual Operating (Fuel) Costs

### Baseline Fuel Cost: WHOLE-HOME

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 2: Calculate Annual Operating (Fuel) Costs
-------------------------------------------------------------------------------------------------------
- Create a mapping dictionary for fuel types
- Create new merge columns to ensure a proper match.
- Merge df_copy with df_fuel_prices to get fuel prices for electricity, natural gas, propane, and fuel oil
- Calculate the per kWh fuel costs for each fuel type and region
- Calculate the baseline fuel cost 
-------------------------------------------------------------------------------------------------------
""")
# calculate_annual_fuelCost(df, menu_mp, policy_scenario, drop_fuel_cost_columns)
df_euss_am_baseline_home = calculate_annual_fuelCost(df=df_euss_am_baseline_home,
                                                     menu_mp=menu_mp,
                                                     policy_scenario='No Inflation Reduction Act',
                                                     drop_fuel_cost_columns=False
                                                     )
df_euss_am_baseline_home

## Area Median Income Data Used to determine LMI Designation and IRA Rebates Eligibility/Amount

### PUMA Median Income

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0003_ds261_2022_puma.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_puma_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
# df_puma_medianIncome = df_puma_medianIncome.drop(0)
df_puma_medianIncome = df_puma_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB', 'PUMAA', 'NAME_E', 'AP2PE001', 'AP2PM001']
df_puma_medianIncome = df_puma_medianIncome[cols_interest]
df_puma_medianIncome = df_puma_medianIncome.rename(columns={"GISJOIN": "gis_joinID_puma", "STUSAB": "state_abbrev", "PUMAA": "puma_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
df_puma_medianIncome['median_income_USD2023'] = round((df_puma_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
df_puma_medianIncome

### County Median Income

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0005_ds261_2022_county.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_county_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
# df_county_medianIncome = df_county_medianIncome.drop(0)
df_county_medianIncome = df_county_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB', 'COUNTYA', 'NAME_E', 'AP2PE001', 'AP2PM001']
df_county_medianIncome = df_county_medianIncome[cols_interest]
df_county_medianIncome = df_county_medianIncome.rename(columns={"GISJOIN": "gis_joinID_county", "STUSAB": "state_abbrev", "COUNTYA": "county_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
df_county_medianIncome['median_income_USD2023'] = round((df_county_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
df_county_medianIncome

### State Median Income

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0004_ds261_2022_state.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_state_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
# df_state_medianIncome = df_state_medianIncome.drop(0)
df_state_medianIncome = df_state_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB','STATEA', 'NAME_E', 'AP2PE001', 'AP2PM001']
df_state_medianIncome = df_state_medianIncome[cols_interest]
df_state_medianIncome = df_state_medianIncome.rename(columns={"GISJOIN": "gis_joinID_state", "STUSAB": "state_abbrev", "STATEA": "state_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
df_state_medianIncome['median_income_USD2023'] = round((df_state_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
df_state_medianIncome

### Adjustment Factors for Construction: 
#### RSMeans City Cost Index
#### Consumer Price Index for All Urban Consumers (CPI, CPI-U)

In [None]:
# Adjust for regional cost differences with RSMeans
filename = "rsMeans_cityCostIndex.csv"
relative_path = os.path.join(r"inflation_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_rsMeans_cityCostIndex = pd.read_csv(file_path)

df_rsMeans_cityCostIndex = pd.DataFrame({
    'State': df_rsMeans_cityCostIndex['State'],
    'City': df_rsMeans_cityCostIndex['City'],
    'Material': (df_rsMeans_cityCostIndex['Material']).round(2),
    'Installation': (df_rsMeans_cityCostIndex['Installation']).round(2),
    'Average': (df_rsMeans_cityCostIndex['Average']).round(2),
})
df_rsMeans_cityCostIndex

# Model Runtime

In [None]:
# Get the current datetime again
end_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Calculate the elapsed time
elapsed_time = datetime.strptime(end_time, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_time, "%Y-%m-%d_%H-%M-%S")

# Format the elapsed time
elapsed_seconds = elapsed_time.total_seconds()
elapsed_minutes = int(elapsed_seconds // 60)
elapsed_seconds = int(elapsed_seconds % 60)

# Print the elapsed time
print(f"The code took {elapsed_minutes} minutes and {elapsed_seconds} seconds to execute.")