In [1]:
# Set columns in display
# pd.set_option('display.max_columns', None)
# pd.reset_option('display.max_columns') # Reset options to default

# Set rows in display
# pd.set_option('display.max_rows', None)
# pd.reset_option('display.max_rows') # Reset options to default

# Load Util File with TARE Model Functions

In [2]:
import os

# Measure Package 0: Baseline
menu_mp = 0
input_mp = 'baseline'

# Get the current working directory of the project
project_root = os.path.abspath(os.getcwd())
print(f"Project root directory: {project_root}")

# Relative path to the file from the project root
relative_path = r"tare_model_functions_v1.4.1.ipynb"

# Construct the absolute path to the file
file_path = os.path.join(project_root, relative_path)
print(f"File path: {file_path}")

# Run the notebook and import variables
if os.path.exists(relative_path):
    get_ipython().run_line_magic('run', f'-i "{relative_path}"')
    print("Loaded All TARE Model Functions")
else:
    print(f"File not found: {relative_path}")

Project root directory: c:\Users\14128\Research\cmu-tare-model
File path: c:\Users\14128\Research\cmu-tare-model\tare_model_functions_v1.4.1.ipynb
Loaded All TARE Model Functions


In [3]:
# Storing Result Outputs in output_results folder
relative_path = r"output_results"
output_folder_path = os.path.join(project_root, relative_path)
print(f"Result outputs will be exported here: {output_folder_path}")

Result outputs will be exported here: c:\Users\14128\Research\cmu-tare-model\output_results


# Simulate Residential Energy Consumption using NREL End-Use Savings Shapes
- Filter EUSS Data: Only occupied units and Single Family Homes



In [4]:
# The ``inline`` flag will use the appropriate backend to make figures appear inline in the notebook.  
%matplotlib inline

import pandas as pd
import numpy as np

# `plt` is an alias for the `matplotlib.pyplot` module
import matplotlib.pyplot as plt

# import seaborn library (wrapper of matplotlib)
import seaborn as sns
sns.set(style="darkgrid")

# For regex, import re
import re

from datetime import datetime

# Get the current datetime
# Start the timer
start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

In [5]:
print("""
-------------------------------------------------------------------------------------------------------
Welcome to the Trade-off Analysis of residential Retrofits for energy Equity Tool (TARE Model)
Let's start by reading the data from the NREL EUSS Database.

Make sure that the zipped folders stay organized as they are once unzipped.
If changes are made to the file path, then the program will not run properly.
-------------------------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------------------------
BASELINE (Measure Package 0)
-------------------------------------------------------------------------------------------------------
""")

# Measure Package 0: Baseline
menu_mp = 0
input_mp = 'baseline'

filename = "baseline_metadata_and_annual_results.csv"
relative_path = os.path.join(r"euss_data\resstock_amy2018_release_1.1\state", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

print("""
-------------------------------------------------------------------------------------------------------
Data Filters: Only occupied units and Single Family Homes
-------------------------------------------------------------------------------------------------------
""")

# Fix DtypeWarning error in columns:
# 'in.neighbors', 'in.geometry_stories_low_rise', 'in.iso_rto_region', 'in.pv_orientation', 'in.pv_system_size'
columns_to_string = {11: str, 61: str, 121: str, 103: str, 128: str, 129: str}
df_euss_am_baseline = pd.read_csv(file_path, dtype=columns_to_string)
occupancy_filter = df_euss_am_baseline['in.vacancy_status'] == 'Occupied'
df_euss_am_baseline = df_euss_am_baseline.loc[occupancy_filter]

# Filter for single family home building type
house_type_list = ['Single-Family Attached', 'Single-Family Detached']
house_type_filter = df_euss_am_baseline['in.geometry_building_type_recs'].isin(house_type_list)
df_euss_am_baseline = df_euss_am_baseline.loc[house_type_filter]
# df_euss_am_baseline


-------------------------------------------------------------------------------------------------------
Welcome to the Trade-off Analysis of residential Retrofits for energy Equity Tool (TARE Model)
Let's start by reading the data from the NREL EUSS Database.

Make sure that the zipped folders stay organized as they are once unzipped.
If changes are made to the file path, then the program will not run properly.
-------------------------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------------------------
BASELINE (Measure Package 0)
-------------------------------------------------------------------------------------------------------

Retrieved data for filename: baseline_metadata_and_annual_results.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\euss_data\resstock_amy2018_release_1.1\state\baseline_metadata_and_annual_results.csv

------------------------

In [6]:
# # Create a location ID for the name of the batch conversion file
# while True:
#     if menu_state == 'N':
#         location_id = 'National'
#         print("You chose to analyze all of the United States.")
#         break
#     elif menu_state == 'Y':
#         if menu_city == 'N':
#             try:
#                 location_id = str(input_state)
#                 print(f"Location ID is: {location_id}")
#                 break
#             except ValueError:
#                 print("Invalid input for state!")
#         elif menu_city == 'Y':
#             try:
#                 location_id = input_cityFilter.replace(', ', '_').strip()
#                 print(f"Location ID is: {location_id}")
#                 break
#             except AttributeError:
#                 print("Invalid input for city filter!")
#         else:
#             print("Incorrect state or city filter assignment!")
#     else:
#         print("Invalid data location. Check your inputs at the beginning of this notebook!")

In [7]:
# # Make a copy of the dataframe
# df_euss_am_baseline = df_euss_am_baseline.copy()

# Choose between national or sub-national level analysis
menu_state = get_menu_choice(menu_prompt, {'N', 'Y'})   # This code is only run in baseline

# National Level 
if menu_state == 'N':
    print("You chose to analyze all of the United States.")
    input_state = 'National'
    location_id = 'National'

# Filter down to state or city
else:
    input_state = get_state_choice(df_euss_am_baseline)    
    print(f"You chose to filter for: {input_state}")

    location_id = str(input_state)
    print(f"Location ID is: {location_id}")


    state_filter = df_euss_am_baseline['in.state'].eq(input_state)
    df_euss_am_baseline = df_euss_am_baseline.loc[state_filter]

    print(city_prompt)
    print(df_euss_am_baseline['in.city'].value_counts())

    menu_city = get_menu_choice(city_menu_prompt, {'N', 'Y'})

    # Filter for the entire selected state
    if menu_city == 'N':
        print(f"You chose to analyze all of state: {input_state}")
        
        location_id = str(input_state)
        print(f"Location ID is: {location_id}")
        
    # Filter to a city within the selected state
    else:
        input_cityFilter = get_city_choice(df_euss_am_baseline, input_state)
        print(f"You chose to filter for: {input_state}, {input_cityFilter}")

        location_id = input_cityFilter.replace(', ', '_').strip()
        print(f"Location ID is: {location_id}")

        city_filter = df_euss_am_baseline['in.city'].eq(f"{input_state}, {input_cityFilter}")
        df_euss_am_baseline = df_euss_am_baseline.loc[city_filter]

# Display the filtered dataframe
df_euss_am_baseline

You chose to analyze all of the United States.


Unnamed: 0,bldg_id,upgrade,weight,applicability,in.sqft,in.ahs_region,in.ashrae_iecc_climate_zone_2004,in.ashrae_iecc_climate_zone_2004_2_a_split,in.bathroom_spot_vent_hour,in.bedrooms,...,out.emissions.natural_gas.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.propane.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.electricity.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.fuel_oil.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.natural_gas.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.propane.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_95_decarb_by_2035_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_low_re_cost_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_mid_case_15_2025_start.co2e_kg
2,239,0,242.131013,True,1690.0,Non-CBSA East South Central,3A,3A,Hour20,3,...,215.943534,0.000000,8773.384074,0.0,215.943534,0.000000,3565.038262,6416.193347,5755.373221,8989.327608
3,273,0,242.131013,True,1690.0,Non-CBSA East South Central,3A,3A,Hour12,3,...,0.000000,0.000000,11296.731129,0.0,0.000000,0.000000,4339.903757,8193.678510,7254.665194,11296.731129
4,307,0,242.131013,True,1220.0,Non-CBSA East South Central,3A,3A,Hour0,4,...,0.000000,0.000000,8750.011820,0.0,0.000000,0.000000,3345.187937,6249.625611,5587.946834,8750.011820
5,409,0,242.131013,True,1220.0,Non-CBSA East South Central,3A,3A,Hour20,2,...,1642.477930,0.000000,5725.103641,0.0,1642.477930,0.000000,3784.993820,5489.549041,5164.458936,7367.581571
7,517,0,242.131013,True,1220.0,Non-CBSA East South Central,3A,3A,Hour1,3,...,0.000000,0.000000,8932.439414,0.0,0.000000,0.000000,3441.414837,6415.303853,5721.073473,8932.439414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548907,548226,0,242.131013,True,2176.0,Non-CBSA Mountain,6B,6B,Hour3,4,...,15324.356044,0.000000,2313.298407,0.0,15324.356044,0.000000,16802.023908,16545.095582,16472.920871,17637.654451
548908,548228,0,242.131013,True,1690.0,Non-CBSA Mountain,6B,6B,Hour6,4,...,8192.601682,0.000000,1889.439924,0.0,8192.601682,0.000000,9394.122057,9195.903552,9129.578822,10082.041606
548910,548417,0,242.131013,True,885.0,Non-CBSA Mountain,6B,6B,Hour18,2,...,5212.758359,0.000000,2112.907195,0.0,5212.758359,0.000000,6546.826589,6318.763521,6253.986448,7325.665554
548914,549740,0,242.131013,True,1220.0,Non-CBSA Mountain,7B,7B,Hour4,2,...,0.000000,268.627834,11423.104685,0.0,0.000000,268.627834,7173.561517,5931.433285,4609.821155,11691.732519


## Project Future Energy Consumption Using EIA Heating Degree Day (HDD) Forecasted Data (Factors)

In [8]:
# Factors for 2022 to 2050
filename = 'aeo_projections_2022_2050.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_hdd_projection_factors = pd.read_excel(io=file_path, sheet_name='hdd_factors_2022_2050')

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# Convert the factors dataframe into a lookup dictionary
hdd_factor_lookup = df_hdd_projection_factors.set_index(['census_division']).to_dict('index')
hdd_factor_lookup

Retrieved data for filename: aeo_projections_2022_2050.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\aeo_projections_2022_2050.xlsx


{'National': {2022: 1,
  2023: 1.0028349414260749,
  2024: 0.9389536266963965,
  2025: 0.9344844368179533,
  2026: 0.9300828169743566,
  2027: 0.9257070259326153,
  2028: 0.9212779053519207,
  2029: 0.9168538581973203,
  2030: 0.9124351151864318,
  2031: 0.9080377039911245,
  2032: 0.9036541297129915,
  2033: 0.8992492812396443,
  2034: 0.894875507855348,
  2035: 0.8904758482849783,
  2036: 0.8860390020882589,
  2037: 0.8817285900905196,
  2038: 0.877365886428882,
  2039: 0.8729314040841085,
  2040: 0.8685839209369028,
  2041: 0.8642702226890459,
  2042: 0.8599120736340495,
  2043: 0.8555441810694344,
  2044: 0.8511753084862802,
  2045: 0.8468232704962843,
  2046: 0.8425090534289743,
  2047: 0.8382247585710751,
  2048: 0.8339389072548168,
  2049: 0.8297055204635582,
  2050: 0.8255002687057338},
 'East North Central': {2022: 1,
  2023: 0.9811731756651626,
  2024: 0.9307608526528707,
  2025: 0.928426948809709,
  2026: 0.9262486385560915,
  2027: 0.9239147347129298,
  2028: 0.921580830869

In [9]:
print("""
-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------
""")

# df_baseline_enduse(df_baseline, df_enduse, category, fuel_filter='Yes', tech_filter='Yes')
df_euss_am_baseline_home = df_enduse_refactored(df_baseline = df_euss_am_baseline,
                                                fuel_filter = 'Yes',
                                                tech_filter = 'Yes')

# Project Future Energy Consumption
df_euss_am_baseline_home = project_future_consumption(df=df_euss_am_baseline_home, hdd_factor_lookup=hdd_factor_lookup, menu_mp=menu_mp)
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------

Processing column: in.clothes_dryer
Initial data types: object
Data types after processing: object
Processing column: in.cooking_range
Initial data types: object
Data types after processing: object
331531 rows remain after applying total heating consumption calculation
Filtered for the following fuels: ['Natural Gas', 'Electricity', 'Propane', 'Fuel Oil']
321357 rows remain after applying heating fuel filter
Filtered for the following Heating technologies: ['Electricity ASHP', 'Electricity Baseboard', 'Electricity Electric Boiler', 'Electricity Electric Furnace', 'Fuel Oil Fuel Boiler', 'Fuel Oil Fuel Furnace', 'Natural Gas Fuel Boiler', 'Natural Gas Fuel Furnace', 'Propane Fuel Boiler', 'Propane Fuel Furnace']
291558 rows remain after applying heating techn

Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,reeds_balancing_area,state,city,county,...,baseline_2029_cooking_consumption,baseline_2030_cooking_consumption,baseline_2031_cooking_consumption,baseline_2032_cooking_consumption,baseline_2033_cooking_consumption,baseline_2034_cooking_consumption,baseline_2035_cooking_consumption,baseline_2036_cooking_consumption,baseline_2037_cooking_consumption,baseline_2038_cooking_consumption
2,239,1690.0,South,East South Central,East South Central,Hot-Humid,90,AL,Not in a census Place,G0100390,...,979.44,979.44,979.44,979.44,979.44,979.44,979.44,979.44,979.44,979.44
3,273,1690.0,South,East South Central,East South Central,Mixed-Humid,90,AL,In another census Place,G0100150,...,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20
4,307,1220.0,South,East South Central,East South Central,Hot-Humid,90,AL,Not in a census Place,G0100850,...,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20,537.20
5,409,1220.0,South,East South Central,East South Central,Hot-Humid,90,AL,Not in a census Place,G0100050,...,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63,1009.63
7,517,1220.0,South,East South Central,East South Central,Mixed-Humid,89,AL,In another census Place,G0101270,...,377.18,377.18,377.18,377.18,377.18,377.18,377.18,377.18,377.18,377.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548905,548109,1690.0,West,Mountain,Mountain North,Cold,23,WY,In another census Place,G5600050,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
548907,548226,2176.0,West,Mountain,Mountain North,Cold,23,WY,In another census Place,G5600050,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
548908,548228,1690.0,West,Mountain,Mountain North,Cold,24,WY,Not in a census Place,G5600010,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
548910,548417,885.0,West,Mountain,Mountain North,Cold,24,WY,Casper,G5600250,...,486.50,486.50,486.50,486.50,486.50,486.50,486.50,486.50,486.50,486.50


# Public Perspective: Monetized Marginal Damages from Emissions

## Fossil Fuels: All Pollutants

In [12]:
# Calculate emissions factors for fossil fuels
# This is before adjusting for natural gas leakage
# Note: We use electricity marginal damages directly instead of multiplying
# CEDM emissions factors by the EASIUR marginal damages. 
def calculate_fossilFuel_emission_factor(fuel_type, so2_factor, nox_factor, pm25_factor, fuelConversion_factor1, fuelConversion_factor2):
    """
    Calculate Emissions Factors: FOSSIL FUELS
    Fossil Fuels (Natural Gas, Fuel Oil, Propane):
    - NOx, SO2, CO2: 
        - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
        - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
        - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
        - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
    - PM2.5: 
        - A National Methodology and Emission Inventory for Residential Fuel Combustion
        - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
    """
    
    # Create an empty dictionary called margEmis_factors to store the values
    margEmis_factors = {}

    # SO2, NOx, CO2: (_ lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
    # PM2.5 - FUEL OIL: 0.83 lb/thousand gallons * (1 thousand gallons / 1000 gallons) * (1 gallon heating oil/138,500 BTU) * (3412 BTU/1 kWh)
    # PM2.5 - NATURAL GAS: 1.9 lb/million cf * (million cf/1000000 cf) * (1 cf natural gas/1039 BTU) * (3412 BTU/1 kWh)
    # PM2.5 - PROPANE: 0.17 lb/thousand gallons * (1 thousand gallons / 1000 gallons) * (1 gallon propane/91,452 BTU) * (3412 BTU/1 kWh)
    margEmis_factors[f"{fuel_type}_so2"] = so2_factor * (1 / 1000000) * (3412 / 1)
    margEmis_factors[f"{fuel_type}_nox"] = nox_factor * (1 / 1000000) * (3412 / 1)
    margEmis_factors[f"{fuel_type}_pm25"] = pm25_factor * (1 / fuelConversion_factor1) * (1 / fuelConversion_factor2) * (3412 / 1)

    # NATURAL GAS LEAKAGE: NATURAL GAS INFRASTRUCTURE
    # leakage rate for natural gas infrastructure
    # 1 Therm = 29.30 kWh --> 1.27 kg CO2e/therm * (1 therm/29.30 kWh) = 0.043 kg CO2e/kWh = 0.095 lb CO2e/kWh
    naturalGas_leakage_mtCO2e_perkWh = 0.043 * (1/1000)

    # CO2e include pre- and post-combustion emissions
    margEmis_factors[f"naturalGas_co2e"] = (228.5 * (1/1000) * (1/1000)) + naturalGas_leakage_mtCO2e_perkWh
    margEmis_factors[f"propane_co2e"]  = 275.8 * (1/1000) * (1/1000)
    margEmis_factors[f"fuelOil_co2e"]  = 303.9 * (1/1000) * (1/1000)

    return margEmis_factors

print("""
-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
Fossil Fuels (Natural Gas, Fuel Oil, Propane):
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
-------------------------------------------------------------------------------------------------------
""")

fuelOil_factors = calculate_fossilFuel_emission_factor(fuel_type="fuelOil", so2_factor=0.0015, nox_factor=0.1300, pm25_factor=0.83, fuelConversion_factor1=1000, fuelConversion_factor2=138500)
naturalGas_factors = calculate_fossilFuel_emission_factor(fuel_type="naturalGas", so2_factor=0.0006, nox_factor=0.0922, pm25_factor=1.9, fuelConversion_factor1=1000000, fuelConversion_factor2=1039)
propane_factors = calculate_fossilFuel_emission_factor(fuel_type="propane", so2_factor=0.0002, nox_factor=0.1421, pm25_factor=0.17, fuelConversion_factor1=1000, fuelConversion_factor2=91452)

all_factors = {**fuelOil_factors, **naturalGas_factors, **propane_factors}

df_margEmis_factors = pd.DataFrame.from_dict(all_factors, orient="index", columns=["value"])
df_margEmis_factors.reset_index(inplace=True)
df_margEmis_factors.columns = ["pollutant", "value"]
df_margEmis_factors[["fuel_type", "pollutant"]] = df_margEmis_factors["pollutant"].str.split("_", expand=True)
# df_margEmis_factors["unit"] = "[lb/kWh]"

# Update the units to metric tons per kWh
df_margEmis_factors["unit"] = "[mt/kWh]"

# Convert the values from lb/kWh to mt/kWh
lb_to_mt = 0.00045359237
df_margEmis_factors["value"] = df_margEmis_factors["value"] * lb_to_mt

# Add the 'state' column and assign 'National' to every row
df_margEmis_factors = df_margEmis_factors.assign(state='National')

df_margEmis_factors = df_margEmis_factors[["state", "fuel_type", "pollutant", "value", "unit"]]
df_margEmis_factors


-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
Fossil Fuels (Natural Gas, Fuel Oil, Propane):
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
-------------------------------------------------------------------------------------------------------



Unnamed: 0,state,fuel_type,pollutant,value,unit
0,National,fuelOil,so2,2.321486e-09,[mt/kWh]
1,National,fuelOil,nox,2.011954e-07,[mt/kWh]
2,National,fuelOil,pm25,9.274769e-09,[mt/kWh]
3,National,naturalGas,co2e,1.231503e-07,[mt/kWh]
4,National,propane,co2e,1.251008e-07,[mt/kWh]
5,National,fuelOil,co2e,1.378467e-07,[mt/kWh]
6,National,naturalGas,so2,9.285943e-10,[mt/kWh]
7,National,naturalGas,nox,1.42694e-07,[mt/kWh]
8,National,naturalGas,pm25,2.830172e-09,[mt/kWh]
9,National,propane,so2,3.095314e-10,[mt/kWh]


## Method 1: CAMBIUM and 

## Step 1: Calculate emissions factors for different fuel sources

In [13]:
print("""
-------------------------------------------------------------------------------------------------------
Public Perspective: Monetized Marginal Damages from Emissions
-------------------------------------------------------------------------------------------------------
Step 1: Calculate emissions factors for different fuel sources
- Electricity
- Natural Gas
- Fuel Oil 
- Propane
      
Step 2: Adjust Natural Gas & Electricity Emissions Factors for Natural Gas Leakage
- CAMBIUM includes fugitive emissions along with other pre-combustion and combustion emissions
- Natural Gas Leakage: Deetjen et al 2021
      
Step 3: Multiply Emissions by Marginal Social Cost
- Only CO2e in the updated methods due to data quality issues for 
-------------------------------------------------------------------------------------------------------
""")


-------------------------------------------------------------------------------------------------------
Public Perspective: Monetized Marginal Damages from Emissions
-------------------------------------------------------------------------------------------------------
Step 1: Calculate emissions factors for different fuel sources
- Electricity
- Natural Gas
- Fuel Oil 
- Propane
      
Step 2: Adjust Natural Gas & Electricity Emissions Factors for Natural Gas Leakage
- CAMBIUM includes fugitive emissions along with other pre-combustion and combustion emissions
- Natural Gas Leakage: Deetjen et al 2021
      
Step 3: Multiply Emissions by Marginal Social Cost
- Only CO2e in the updated methods due to data quality issues for 
-------------------------------------------------------------------------------------------------------



## Electricity CO2e LRMER from CAMBIUM (includes pre-combustion (fugitive) and combustion)

### Pre-IRA Scenario: Obtain Data from Cambium 2021 (MidCase)

In [14]:
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd

def calculate_electricity_emission_factor_cambium(df_cambium_import):
    """
    Interpolates Cambium electricity emission factors and converts units.

    This function takes a dataframe containing Cambium electricity emission factors and performs the following:
    - Interpolates the Long Run Marginal Emissions Rates (LRMER) and Short Run Marginal Emissions Rates (SRMER)
      values for each scenario, state, and balancing area on an annual basis.
    - Converts the LRMER and SRMER values from kg per MWh to tons per MWh and tons per kWh.

    Parameters
    ----------
    df_cambium_import : pandas.DataFrame
        DataFrame containing Cambium electricity emission factors with the following columns:
        - 'scenario': Scenario name or identifier.
        - 'state': State abbreviation.
        - 'reeds_balancing_area': REEDS balancing area identifier.
        - 'year': Year of the data.
        - 'lrmer_co2e_kg_per_MWh': Long Run Marginal Emissions Rate in kg CO2e per MWh.
        - 'srmer_co2e_kg_per_MWh': Short Run Marginal Emissions Rate in kg CO2e per MWh.

    Returns
    -------
    df_cambium_import_copy : pandas.DataFrame
        DataFrame with interpolated LRMER and SRMER values for each year and additional columns for emission factors
        converted to tons per MWh and tons per kWh.

    Notes
    -----
    - The interpolation is performed linearly between the available years for each unique combination of scenario,
      state, and balancing area.
    - The converted emission factors are added as new columns:
        - 'lrmer_co2e_ton_per_MWh'
        - 'lrmer_co2e_ton_per_kWh'
        - 'srmer_co2e_ton_per_MWh'
        - 'srmer_co2e_ton_per_kWh'
    - The conversion from kg to tons is done by dividing by 1,000 (1 ton = 1,000 kg).
    - The conversion from MWh to kWh is done by dividing by 1,000 (1 MWh = 1,000 kWh).

    Examples
    --------
    >>> df_cambium_import = pd.DataFrame({
    ...     'scenario': ['S1', 'S1'],
    ...     'state': ['CA', 'CA'],
    ...     'reeds_balancing_area': ['BA1', 'BA1'],
    ...     'year': [2020, 2025],
    ...     'lrmer_co2e_kg_per_MWh': [500, 450],
    ...     'srmer_co2e_kg_per_MWh': [300, 280]
    ... })
    >>> df_result = calculate_electricity_emission_factor_cambium(df_cambium_import)
    >>> df_result.head()
      scenario state reeds_balancing_area  year  lrmer_co2e_kg_per_MWh  srmer_co2e_kg_per_MWh  lrmer_co2e_ton_per_MWh  lrmer_co2e_ton_per_kWh  srmer_co2e_ton_per_MWh  srmer_co2e_ton_per_kWh
    0       S1    CA                  BA1  2020                   500.0                   300.0                   0.5000                0.000500                  0.3000               0.000300
    1       S1    CA                  BA1  2021                   490.0                   296.0                   0.4900                0.000490                  0.2960               0.000296
    ...

    """
    # Create a copy of the dataframe
    df_cambium_import_copy = df_cambium_import.copy()

    # Create a new DataFrame to store interpolated results
    interpolated_data = []

    # Group by 'scenario', 'state', and 'reeds_balancing_area'
    grouped = df_cambium_import_copy.groupby(['scenario', 'state', 'reeds_balancing_area'])

    for (scenario, state, reeds_balancing_area), group in grouped:
        years = group['year'].values

        # Interpolate for LRMER (Long Run Marginal Emissions Rates)
        lrmer_values = group['lrmer_co2e_kg_per_MWh'].values
        lrmer_interp_func = interp1d(years, lrmer_values, kind='linear')

        # Interpolate for SRMER (Short Run Marginal Emissions Rates)
        srmer_values = group['srmer_co2e_kg_per_MWh'].values
        srmer_interp_func = interp1d(years, srmer_values, kind='linear')

        # Generate new years in 1-year increments
        new_years = np.arange(years.min(), years.max() + 1)

        # Interpolate the LRMER and SRMER values for these new years
        new_lrmer_values = lrmer_interp_func(new_years)
        new_srmer_values = srmer_interp_func(new_years)

        # Store the results in a DataFrame
        interpolated_group = pd.DataFrame({
            'scenario': scenario,
            'state': state,
            'reeds_balancing_area': reeds_balancing_area,
            'year': new_years,
            'lrmer_co2e_kg_per_MWh': new_lrmer_values,
            'srmer_co2e_kg_per_MWh': new_srmer_values
        })

        interpolated_data.append(interpolated_group)

    # Concatenate all the interpolated data into a single DataFrame
    df_cambium_import_copy = pd.concat(interpolated_data).reset_index(drop=True)

    # Convert both LRMER and SRMER values to tons per MWh and tons per kWh
    df_cambium_import_copy['lrmer_co2e_ton_per_MWh'] = df_cambium_import_copy['lrmer_co2e_kg_per_MWh'] / 1000
    df_cambium_import_copy['lrmer_co2e_ton_per_kWh'] = df_cambium_import_copy['lrmer_co2e_kg_per_MWh'] / 1_000_000

    df_cambium_import_copy['srmer_co2e_ton_per_MWh'] = df_cambium_import_copy['srmer_co2e_kg_per_MWh'] / 1000
    df_cambium_import_copy['srmer_co2e_ton_per_kWh'] = df_cambium_import_copy['srmer_co2e_kg_per_MWh'] / 1_000_000

    return df_cambium_import_copy

def create_cambium_emission_factor_lookup(df_cambium_processed):
    """
    Creates a nested lookup dictionary for Cambium emission factors.

    This function takes a processed dataframe containing Cambium emission factors and constructs a nested dictionary
    that allows quick lookup of LRMER and SRMER emission factors based on scenario, state, balancing area, and year.

    Parameters
    ----------
    df_cambium_processed : pandas.DataFrame
        DataFrame containing processed Cambium emission factors with the following columns:
        - 'scenario': Scenario name or identifier.
        - 'state': State abbreviation.
        - 'reeds_balancing_area': REEDS balancing area identifier.
        - 'year': Year of the data.
        - 'lrmer_co2e_ton_per_kWh': Long Run Marginal Emissions Rate in tons CO2e per kWh.
        - 'srmer_co2e_ton_per_kWh': Short Run Marginal Emissions Rate in tons CO2e per kWh.

    Returns
    -------
    emis_scenario_cambium_lookup : dict
        Nested dictionary structured as:
        {
            (scenario, state, reeds_balancing_area): {
                year: {
                    'lrmer_co2e': lrmer_value,
                    'srmer_co2e': srmer_value
                },
                ...
            },
            ...
        }

    Notes
    -----
    - The outer keys of the dictionary are tuples containing (scenario, state, reeds_balancing_area).
    - The inner dictionary maps years to a dictionary containing both LRMER and SRMER values.
    - This structure allows efficient retrieval of emission factors based on scenario, location, and year.

    Examples
    --------
    >>> df_cambium_processed = pd.DataFrame({
    ...     'scenario': ['S1', 'S1'],
    ...     'state': ['CA', 'CA'],
    ...     'reeds_balancing_area': ['BA1', 'BA1'],
    ...     'year': [2020, 2021],
    ...     'lrmer_co2e_ton_per_kWh': [0.0005, 0.00049],
    ...     'srmer_co2e_ton_per_kWh': [0.0003, 0.00029]
    ... })
    >>> lookup = create_cambium_emission_factor_lookup(df_cambium_processed)
    >>> lookup[('S1', 'CA', 'BA1')][2020]
    {'lrmer_co2e': 0.0005, 'srmer_co2e': 0.0003}

    """
    # Create a copy of the dataframe
    df_cambium_processed_copy = df_cambium_processed.copy()

    # Create the nested lookup dictionary for both LRMER and SRMER in tons CO2e per kWh
    emis_scenario_cambium_lookup = {}

    # Populate the dictionary
    for _, row in df_cambium_processed_copy.iterrows():
        outer_key = (row['scenario'], row['state'], row['reeds_balancing_area'])
        year = row['year']

        # Extract both LRMER and SRMER values in tons per kWh
        lrmer_value = row['lrmer_co2e_ton_per_kWh']
        srmer_value = row['srmer_co2e_ton_per_kWh']

        # Initialize the outer key if not already present
        if outer_key not in emis_scenario_cambium_lookup:
            emis_scenario_cambium_lookup[outer_key] = {}

        # Assign both LRMER and SRMER values in the inner dictionary for each year
        emis_scenario_cambium_lookup[outer_key][year] = {
            'lrmer_co2e': lrmer_value,
            'srmer_co2e': srmer_value
        }

    return emis_scenario_cambium_lookup


In [15]:
# INTERPOLATE ANNUAL DATA BETWEEN 5-YEAR TIME STEPS
print("""
-------------------------------------------------------------------------------------------------------
CLIMATE DAMAGES FROM CAMBIUM
-------------------------------------------------------------------------------------------------------
- Load CSV
- Convert MWh --> kWh and kg --> metric tons (mt)
- Inflate updated Social Cost of Carbon from $190 USD2020 to $USD2023
- Convert SCC to $USD2023/lb
- Calculate damage factors for CO2e: LRMER[lb/kWh] * SCC[$USD2023/lb] = $USD2023/kWh
- Map state, county pairs to Cambium 2023 GEA region

Possibly for other emissions projections:
- Calculate projection factors for each group of scenario and GEA region for 2025 to 2050 (normalize all annual data in group by 2025 value) 
-------------------------------------------------------------------------------------------------------
""")


-------------------------------------------------------------------------------------------------------
CLIMATE DAMAGES FROM CAMBIUM
-------------------------------------------------------------------------------------------------------
- Load CSV
- Convert MWh --> kWh and kg --> metric tons (mt)
- Inflate updated Social Cost of Carbon from $190 USD2020 to $USD2023
- Convert SCC to $USD2023/lb
- Calculate damage factors for CO2e: LRMER[lb/kWh] * SCC[$USD2023/lb] = $USD2023/kWh
- Map state, county pairs to Cambium 2023 GEA region

Possibly for other emissions projections:
- Calculate projection factors for each group of scenario and GEA region for 2025 to 2050 (normalize all annual data in group by 2025 value) 
-------------------------------------------------------------------------------------------------------



In [16]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

print("""
-------------------------------------------------------------------------------------------------------
PRE-IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2021 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2021 FOR PRE-IRA SCENARIO
filename = 'cambium21_midCase_ba.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium21_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='cambium21_midCase_ba')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}

Loading dataframe ...
Creating lookup dictionary for LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------
""")

# Calculate electricity emission factors for Cambium 2021
# Process the data using the provided function to interpolate and convert units
df_cambium21_processed = calculate_electricity_emission_factor_cambium(df_cambium21_margEmis_electricity)

# Display the processed DataFrame
df_cambium21_processed


-------------------------------------------------------------------------------------------------------
PRE-IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2021 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium21_midCase_ba.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium21_midCase_ba.xlsx

Loading dataframe ...
Creating lookup dictionary for LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------



Unnamed: 0,scenario,state,reeds_balancing_area,year,lrmer_co2e_kg_per_MWh,srmer_co2e_kg_per_MWh,lrmer_co2e_ton_per_MWh,lrmer_co2e_ton_per_kWh,srmer_co2e_ton_per_MWh,srmer_co2e_ton_per_kWh
0,MidCase,AL,89,2022,703.10,877.7,0.70310,0.000703,0.8777,0.000878
1,MidCase,AL,89,2023,686.20,887.5,0.68620,0.000686,0.8875,0.000888
2,MidCase,AL,89,2024,669.30,897.3,0.66930,0.000669,0.8973,0.000897
3,MidCase,AL,89,2025,613.15,895.5,0.61315,0.000613,0.8955,0.000896
4,MidCase,AL,89,2026,557.00,893.7,0.55700,0.000557,0.8937,0.000894
...,...,...,...,...,...,...,...,...,...,...
3881,MidCase,WY,24,2046,223.60,709.7,0.22360,0.000224,0.7097,0.000710
3882,MidCase,WY,24,2047,229.30,700.0,0.22930,0.000229,0.7000,0.000700
3883,MidCase,WY,24,2048,235.00,690.3,0.23500,0.000235,0.6903,0.000690
3884,MidCase,WY,24,2049,229.20,689.0,0.22920,0.000229,0.6890,0.000689


In [17]:
# Create the lookup dictionary using the create_cambium_emission_factor_lookup function
emis_preIRA_cambium21_lookup = create_cambium_emission_factor_lookup(df_cambium21_processed)

# Display the lookup dictionary
emis_preIRA_cambium21_lookup

{('MidCase',
  'AL',
  89): {2022: {'lrmer_co2e': 0.0007031,
   'srmer_co2e': 0.0008777}, 2023: {'lrmer_co2e': 0.0006862000000000001, 'srmer_co2e': 0.0008875}, 2024: {'lrmer_co2e': 0.0006693,
   'srmer_co2e': 0.0008973}, 2025: {'lrmer_co2e': 0.00061315,
   'srmer_co2e': 0.0008955}, 2026: {'lrmer_co2e': 0.000557,
   'srmer_co2e': 0.0008937000000000001}, 2027: {'lrmer_co2e': 0.0005158,
   'srmer_co2e': 0.00088895}, 2028: {'lrmer_co2e': 0.00047460000000000004,
   'srmer_co2e': 0.0008842}, 2029: {'lrmer_co2e': 0.0004573,
   'srmer_co2e': 0.00087045}, 2030: {'lrmer_co2e': 0.00044,
   'srmer_co2e': 0.0008567000000000001}, 2031: {'lrmer_co2e': 0.00043595,
   'srmer_co2e': 0.00084995}, 2032: {'lrmer_co2e': 0.0004319,
   'srmer_co2e': 0.0008432}, 2033: {'lrmer_co2e': 0.000459,
   'srmer_co2e': 0.00084375}, 2034: {'lrmer_co2e': 0.0004861,
   'srmer_co2e': 0.0008443}, 2035: {'lrmer_co2e': 0.0004675,
   'srmer_co2e': 0.0008354500000000001}, 2036: {'lrmer_co2e': 0.00044889999999999996,
   'srmer_co

### IRA-Reference Scenario: 
### Obtain 2024 Data from Cambium 2022

In [28]:
import os
import pandas as pd

print("""
-------------------------------------------------------------------------------------------------------
IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2022 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2022 FOR IRA SCENARIO
filename = 'cambium22_allScenarios_ba.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium22_2024_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='cambium22_scenarios_2024_ba')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}

Loading dataframe ...
Creating lookup dictionary for 2024 LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------
""")

# THIS DOES NOT USE THE INTERPOLATION FUNCTION AS THERE IS ONLY ONE YEAR OF DATA!!!
df_cambium22_2024_processed = df_cambium22_2024_margEmis_electricity.copy()

# Convert kg/MWh to tons/MWh and tons/kWh for LRMER
df_cambium22_2024_processed['lrmer_co2e_ton_per_MWh'] = df_cambium22_2024_processed['lrmer_co2e_kg_per_MWh'] * (1 / 1000)
df_cambium22_2024_processed['lrmer_co2e_ton_per_kWh'] = df_cambium22_2024_processed['lrmer_co2e_kg_per_MWh'] * (1 / 1000) * (1 / 1000)

# Convert kg/MWh to tons/MWh and tons/kWh for SRMER if available
if 'srmer_co2e_kg_per_MWh' in df_cambium22_2024_processed.columns:
    df_cambium22_2024_processed['srmer_co2e_ton_per_MWh'] = df_cambium22_2024_processed['srmer_co2e_kg_per_MWh'] * (1 / 1000)
    df_cambium22_2024_processed['srmer_co2e_ton_per_kWh'] = df_cambium22_2024_processed['srmer_co2e_kg_per_MWh'] * (1 / 1000) * (1 / 1000)

# Display the df
df_cambium22_2024_processed


-------------------------------------------------------------------------------------------------------
IRA LONG RUN AND SHORT RUN MARGINAL EMISSIONS RATES (LRMER, SRMER) FROM CAMBIUM 2022 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium22_allScenarios_ba.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium22_allScenarios_ba.xlsx

Loading dataframe ...
Creating lookup dictionary for 2024 LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------



Unnamed: 0,scenario,reeds_balancing_area,state,year,lrmer_co2e_kg_per_MWh,srmer_co2e_kg_per_MWh,lrmer_co2e_ton_per_MWh,lrmer_co2e_ton_per_kWh,srmer_co2e_ton_per_MWh,srmer_co2e_ton_per_kWh
0,MidCase,1,WA,2024,283.2,751.7,0.2832,0.000283,0.7517,0.000752
1,MidCase,10,CA,2024,207.6,765.1,0.2076,0.000208,0.7651,0.000765
2,MidCase,100,VA,2024,389.0,871.8,0.3890,0.000389,0.8718,0.000872
3,MidCase,101,FL,2024,288.4,775.9,0.2884,0.000288,0.7759,0.000776
4,MidCase,102,FL,2024,283.9,766.7,0.2839,0.000284,0.7667,0.000767
...,...,...,...,...,...,...,...,...,...,...
397,MidCase95by2050,95,SC,2024,331.8,867.9,0.3318,0.000332,0.8679,0.000868
398,MidCase95by2050,96,SC,2024,332.8,858.8,0.3328,0.000333,0.8588,0.000859
399,MidCase95by2050,97,NC,2024,323.1,878.6,0.3231,0.000323,0.8786,0.000879
400,MidCase95by2050,98,NC,2024,324.1,869.7,0.3241,0.000324,0.8697,0.000870


In [29]:
# Create the lookup dictionary using the create_cambium_emission_factor_lookup function
emis_IRA_2024_cambium22_lookup = create_cambium_emission_factor_lookup(df_cambium22_2024_processed)

# Display the lookup dictionary
emis_IRA_2024_cambium22_lookup

{('MidCase',
  'WA',
  1): {2024: {'lrmer_co2e': 0.0002832, 'srmer_co2e': 0.0007517}},
 ('MidCase',
  'CA',
  10): {2024: {'lrmer_co2e': 0.0002076, 'srmer_co2e': 0.0007651}},
 ('MidCase',
  'VA',
  100): {2024: {'lrmer_co2e': 0.000389, 'srmer_co2e': 0.0008718}},
 ('MidCase',
  'FL',
  101): {2024: {'lrmer_co2e': 0.0002884, 'srmer_co2e': 0.0007759}},
 ('MidCase',
  'FL',
  102): {2024: {'lrmer_co2e': 0.0002839, 'srmer_co2e': 0.0007667}},
 ('MidCase',
  'MI',
  103): {2024: {'lrmer_co2e': 0.0004359, 'srmer_co2e': 0.0009234}},
 ('MidCase',
  'MI',
  104): {2024: {'lrmer_co2e': 0.00043680000000000005,
   'srmer_co2e': 0.0009228999999999999}},
 ('MidCase',
  'IN',
  105): {2024: {'lrmer_co2e': 0.00045400000000000003,
   'srmer_co2e': 0.0009211}},
 ('MidCase',
  'IN',
  106): {2024: {'lrmer_co2e': 0.000453, 'srmer_co2e': 0.0009214000000000001}},
 ('MidCase',
  'IN',
  107): {2024: {'lrmer_co2e': 0.0004529, 'srmer_co2e': 0.0009281000000000001}},
 ('MidCase',
  'KY',
  108): {2024: {'lrmer_co2

### Obtain 2025-2050 Data from Cambium 2023

In [30]:
print("""
-------------------------------------------------------------------------------------------------------
IRA REFERENCE SCENARIO LRMER AND SRMER (2025-2050) FROM CAMBIUM 2023 RELEASE
-------------------------------------------------------------------------------------------------------
""")

# CAMBIUM 2023 FOR IRA REFERENCE SCENARIO
filename = 'cambium23_allScenarios_ba.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_cambium23_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='cambium23_allScenarios_ba')

print(f"""
Retrieved data for filename: {filename}
Located at filepath: {file_path}

Loading dataframe ...
Creating lookup dictionary for 2025-2050 LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------
""")

# Calculate electricity emission factors for Cambium 2021
# Process the data using the provided function to interpolate and convert units
df_cambium23_processed = calculate_electricity_emission_factor_cambium(df_cambium23_margEmis_electricity)

# Display the processed DataFrame
df_cambium23_processed


-------------------------------------------------------------------------------------------------------
IRA REFERENCE SCENARIO LRMER AND SRMER (2025-2050) FROM CAMBIUM 2023 RELEASE
-------------------------------------------------------------------------------------------------------


Retrieved data for filename: cambium23_allScenarios_ba.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\cambium23_allScenarios_ba.xlsx

Loading dataframe ...
Creating lookup dictionary for 2025-2050 LRMER and SRMER ...
-------------------------------------------------------------------------------------------------------



Unnamed: 0,scenario,state,reeds_balancing_area,year,lrmer_co2e_kg_per_MWh,srmer_co2e_kg_per_MWh,lrmer_co2e_ton_per_MWh,lrmer_co2e_ton_per_kWh,srmer_co2e_ton_per_MWh,srmer_co2e_ton_per_kWh
0,Decarb100by2035,AL,89,2025,591.60,842.80,0.59160,0.000592,0.84280,0.000843
1,Decarb100by2035,AL,89,2026,520.28,748.64,0.52028,0.000520,0.74864,0.000749
2,Decarb100by2035,AL,89,2027,448.96,654.48,0.44896,0.000449,0.65448,0.000654
3,Decarb100by2035,AL,89,2028,377.64,560.32,0.37764,0.000378,0.56032,0.000560
4,Decarb100by2035,AL,89,2029,306.32,466.16,0.30632,0.000306,0.46616,0.000466
...,...,...,...,...,...,...,...,...,...,...
27867,MidCase,WY,24,2046,122.38,122.86,0.12238,0.000122,0.12286,0.000123
27868,MidCase,WY,24,2047,152.46,120.12,0.15246,0.000152,0.12012,0.000120
27869,MidCase,WY,24,2048,182.54,117.38,0.18254,0.000183,0.11738,0.000117
27870,MidCase,WY,24,2049,212.62,114.64,0.21262,0.000213,0.11464,0.000115


In [31]:
# Create the lookup dictionary using the create_cambium_emission_factor_lookup function
emis_IRA_2025_2050_cambium23_lookup = create_cambium_emission_factor_lookup(df_cambium23_processed)

# Display the lookup dictionary
emis_IRA_2025_2050_cambium23_lookup

{('Decarb100by2035',
  'AL',
  89): {2025: {'lrmer_co2e': 0.0005916000000000001,
   'srmer_co2e': 0.0008428}, 2026: {'lrmer_co2e': 0.00052028,
   'srmer_co2e': 0.00074864}, 2027: {'lrmer_co2e': 0.00044896000000000004,
   'srmer_co2e': 0.00065448}, 2028: {'lrmer_co2e': 0.00037764,
   'srmer_co2e': 0.0005603199999999999}, 2029: {'lrmer_co2e': 0.00030632,
   'srmer_co2e': 0.00046615999999999997}, 2030: {'lrmer_co2e': 0.000235,
   'srmer_co2e': 0.000372}, 2031: {'lrmer_co2e': 0.000188,
   'srmer_co2e': 0.00030896}, 2032: {'lrmer_co2e': 0.000141,
   'srmer_co2e': 0.00024592}, 2033: {'lrmer_co2e': 9.4e-05,
   'srmer_co2e': 0.00018287999999999998}, 2034: {'lrmer_co2e': 4.7e-05,
   'srmer_co2e': 0.00011984}, 2035: {'lrmer_co2e': 0.0,
   'srmer_co2e': 5.680000000000001e-05}, 2036: {'lrmer_co2e': 0.0,
   'srmer_co2e': 5.2379999999999997e-05}, 2037: {'lrmer_co2e': 0.0,
   'srmer_co2e': 4.796e-05}, 2038: {'lrmer_co2e': 0.0,
   'srmer_co2e': 4.354e-05}, 2039: {'lrmer_co2e': 0.0,
   'srmer_co2e': 3.

## HEALTH-RELATED EMISSIONS

### Electricity - Method 1: Schmitt et al 2024 Study (Assumes GEA Region and EPA eGRID subregions are the same - which they aren't)

In [40]:
# Adjust for regional cost differences with RSMeans
filename = "grid_mix_reg_full_delta.csv"
relative_path = os.path.join(r"projections\schmitt_ev_study", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_grid_mix = pd.read_csv(file_path)

df_grid_mix = pd.DataFrame({
    'year': df_grid_mix['Year'],
    'cambium_gea_region': df_grid_mix['Cambium.GEA'],
    'fuel_source': df_grid_mix['Source'],
    'fraction_generation': df_grid_mix['Fraction'],
})
df_grid_mix

Retrieved data for filename: grid_mix_reg_full_delta.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\schmitt_ev_study\grid_mix_reg_full_delta.csv




Unnamed: 0,year,cambium_gea_region,fuel_source,fraction_generation
0,2022,AZNMc,Coal,0.000000
1,2023,AZNMc,Coal,0.000000
2,2024,AZNMc,Coal,0.000000
3,2025,AZNMc,Coal,0.137832
4,2026,AZNMc,Coal,0.275665
...,...,...,...,...
2895,2046,SRVCc,Renewable,0.868251
2896,2047,SRVCc,Renewable,0.883341
2897,2048,SRVCc,Renewable,0.898431
2898,2049,SRVCc,Renewable,0.913521


In [35]:
# Adjust for regional cost differences with RSMeans
filename = "ef_pollutants_egrid.csv"
relative_path = os.path.join(r"projections\schmitt_ev_study", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_grid_emis_factors = pd.read_csv(file_path)

df_grid_emis_factors = pd.DataFrame({
    'cambium_gea_region': df_grid_emis_factors['eGRID_subregion'],
    'fuel_source': df_grid_emis_factors['Fuel'],
    'pollutant': df_grid_emis_factors['Pollutant'],
    'emis_rate': df_grid_emis_factors['Emission_rate'],
    'unit': df_grid_emis_factors['Unit'],
})

mapping = {
    'AKGD': None,       # Alaska Grid - Not included
    'AKMS': None,       # Alaska Miscellaneous - Not included
    'AZNM': 'AZNMc',    # Arizona/New Mexico Power Area
    'CAMX': 'CAMXc',    # California Mexico
    'ERCT': 'ERCTc',    # Electric Reliability Council of Texas
    'FRCC': 'FRCCc',    # Florida Reliability Coordinating Council
    'HIMS': None,       # Hawaii Maui Subregion - Not included
    'HIOA': None,       # Hawaii Oahu Subregion - Not included
    'MROE': 'MROEc',    # Midwest Reliability Organization East
    'MROW': 'MROWc',    # Midwest Reliability Organization West
    'NEWE': 'NEWEc',    # New England
    'NWPP': 'NWPPc',    # Northwest Power Pool
    'NYCW': 'NYSTc',    # New York City/Westchester mapped to New York State
    'NYLI': 'NYSTc',    # New York Long Island mapped to New York State
    'NYUP': 'NYSTc',    # New York Upstate mapped to New York State
    'PRMS': None,       # Puerto Rico Miscellaneous - Not included
    'RFCE': 'RFCEc',    # ReliabilityFirst Corporation East
    'RFCM': 'RFCMc',    # ReliabilityFirst Corporation Midwest
    'RFCW': 'RFCWc',    # ReliabilityFirst Corporation West
    'RMPA': 'RMPAc',    # Rocky Mountain Power Area
    'SPNO': 'SPNOc',    # Southwest Power Pool North
    'SPSO': 'SPSOc',    # Southwest Power Pool South
    'SRMV': 'SRMVc',    # SERC Reliability Corporation Mississippi Valley
    'SRMW': 'SRMWc',    # SERC Reliability Corporation Midwest
    'SRSO': 'SRSOc',    # SERC Reliability Corporation South
    'SRTV': 'SRTVc',    # SERC Reliability Corporation Tennessee Valley
    'SRVC': 'SRVCc',    # SERC Reliability Corporation Virginia/Carolina
}

# Apply the mapping to the 'cambium_gea_region' column
df_grid_emis_factors['cambium_gea_region'] = df_grid_emis_factors['cambium_gea_region'].map(mapping)

# Drop rows where 'cambium_gea_region' is None (regions not included in the mapping)
df_grid_emis_factors = df_grid_emis_factors.dropna(subset=['cambium_gea_region']).reset_index(drop=True)

# Conversion factor from pounds to metric tons
lb_to_mt = 0.00045359237
perMWh_to_perkWh = 1/1000

# Apply the conversion where the unit is 'lb/MWh'
df_grid_emis_factors.loc[df_grid_emis_factors['unit'] == 'lb/MWh', 'emis_rate'] *= (lb_to_mt * perMWh_to_perkWh)
df_grid_emis_factors.loc[df_grid_emis_factors['unit'] == 'lb/MWh', 'unit'] = 'mt/kWh'

df_grid_emis_factors

Retrieved data for filename: ef_pollutants_egrid.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\schmitt_ev_study\ef_pollutants_egrid.csv




Unnamed: 0,cambium_gea_region,fuel_source,pollutant,emis_rate,unit
0,AZNMc,Coal,NOx,7.171295e-07,mt/kWh
1,CAMXc,Coal,NOx,1.243750e-06,mt/kWh
2,ERCTc,Coal,NOx,5.030339e-07,mt/kWh
3,FRCCc,Coal,NOx,2.326929e-07,mt/kWh
4,MROEc,Coal,NOx,3.451838e-07,mt/kWh
...,...,...,...,...,...
545,SRMVc,Renewables,VOC,0.000000e+00,mt/kWh
546,SRMWc,Renewables,VOC,0.000000e+00,mt/kWh
547,SRSOc,Renewables,VOC,0.000000e+00,mt/kWh
548,SRTVc,Renewables,VOC,0.000000e+00,mt/kWh


In [36]:
def process_emissions_data(df_grid_mix, df_grid_emis_factors):
    # Check unique fuel sources in both dataframes
    fuel_sources_mix = set(df_grid_mix['fuel_source'].unique())
    fuel_sources_emis = set(df_grid_emis_factors['fuel_source'].unique())

    print("Fuel sources in df_grid_mix:", fuel_sources_mix)
    print("Fuel sources in df_grid_emis_factors:", fuel_sources_emis)

    # Merge the dataframes
    df_combined = pd.merge(
        df_grid_mix,
        df_grid_emis_factors,
        on=['cambium_gea_region', 'fuel_source'],
        how='inner'
    )

    # Calculate emissions contribution
    df_combined['emis_contribution'] = df_combined['fraction_generation'] * df_combined['emis_rate']

    # Sum emissions contributions
    df_emis_factors = df_combined.groupby(
        ['year', 'cambium_gea_region', 'pollutant']
    )['emis_contribution'].sum().reset_index()

    # Pivot the dataframe
    df_emis_factors_pivot = df_emis_factors.pivot_table(
        index=['year', 'cambium_gea_region'],
        columns='pollutant',
        values='emis_contribution'
    ).reset_index()

    # Rename columns
    df_emis_factors_pivot.rename(columns={
        'NH3': 'delta_egrid_nh3',
        'NOx': 'delta_egrid_nox',
        'PM25': 'delta_egrid_pm25',
        'SO2': 'delta_egrid_so2',
        'VOC': 'delta_egrid_voc'
    }, inplace=True)

    return df_emis_factors_pivot

# Example usage
df_emis_factors_epa_egrid = process_emissions_data(df_grid_mix, df_grid_emis_factors)
df_emis_factors_epa_egrid

Fuel sources in df_grid_mix: {'Coal', 'Renewable', 'Natural Gas', 'Nuclear', 'Oil'}
Fuel sources in df_grid_emis_factors: {'Coal', 'Renewables', 'Natural Gas', 'Nuclear', 'Oil'}


pollutant,year,cambium_gea_region,delta_egrid_nh3,delta_egrid_nox,delta_egrid_pm25,delta_egrid_so2,delta_egrid_voc
0,2022,AZNMc,4.284055e-11,9.898048e-09,4.965068e-11,3.457965e-09,1.953264e-11
1,2022,CAMXc,7.522755e-09,2.069686e-07,9.580178e-09,3.556412e-09,2.775051e-09
2,2022,ERCTc,4.432087e-09,4.835162e-07,4.317288e-08,1.320681e-06,1.101529e-08
3,2022,FRCCc,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
4,2022,MROEc,4.849361e-09,5.447874e-07,1.078161e-08,8.474062e-08,9.076871e-09
...,...,...,...,...,...,...,...
575,2050,SRMVc,1.411854e-09,1.876759e-08,2.205976e-09,5.428756e-09,6.989808e-10
576,2050,SRMWc,6.359354e-10,1.704447e-08,9.728582e-10,4.954324e-09,2.334395e-10
577,2050,SRSOc,4.354236e-10,1.003789e-08,1.331772e-09,3.118447e-09,4.730064e-10
578,2050,SRTVc,2.546688e-09,2.709200e-08,4.472789e-09,2.509969e-08,1.490707e-09


In [43]:
# Convert the emissions factors dataframe into a lookup dictionary
lookup_electricity_emissions_egrid = df_emis_factors_epa_egrid.set_index(['year', 'cambium_gea_region']).to_dict('index')

# Display the lookup dictionary
lookup_electricity_emissions_egrid

{(2022, 'AZNMc'): {'delta_egrid_nh3': 4.2840552430687625e-11,
  'delta_egrid_nox': 9.898047990490191e-09,
  'delta_egrid_pm25': 4.965068338564084e-11,
  'delta_egrid_so2': 3.457964622852321e-09,
  'delta_egrid_voc': 1.9532640138985512e-11},
 (2022, 'CAMXc'): {'delta_egrid_nh3': 7.522755410773406e-09,
  'delta_egrid_nox': 2.0696856018167551e-07,
  'delta_egrid_pm25': 9.580177724431295e-09,
  'delta_egrid_so2': 3.55641151857646e-09,
  'delta_egrid_voc': 2.7750506632228643e-09},
 (2022, 'ERCTc'): {'delta_egrid_nh3': 4.432086644184407e-09,
  'delta_egrid_nox': 4.835161961260566e-07,
  'delta_egrid_pm25': 4.3172882994071317e-08,
  'delta_egrid_so2': 1.3206809363102089e-06,
  'delta_egrid_voc': 1.101529215996984e-08},
 (2022, 'FRCCc'): {'delta_egrid_nh3': 0.0,
  'delta_egrid_nox': 0.0,
  'delta_egrid_pm25': 0.0,
  'delta_egrid_so2': 0.0,
  'delta_egrid_voc': 0.0},
 (2022, 'MROEc'): {'delta_egrid_nh3': 4.849361388656854e-09,
  'delta_egrid_nox': 5.447873793436593e-07,
  'delta_egrid_pm25': 1.

In [None]:
def calculate_co2e_and_climate_damages(df_copy, df_summary, df_new_columns, category, year_label, consumption_col, hdd_factor, td_losses_multiplier, emis_electricity_lookup, mer_type, policy_scenario, cambium_scenario, scenario_prefix, menu_mp):
    # ELECTRICITY EMISSIONS FOR CO2e
    if policy_scenario != 'No Inflation Reduction Act':
        if year_label == 2024:
            emis_electricity_lookup = emis_IRA_2024_cambium22_lookup
        else:
            emis_electricity_lookup = emis_IRA_2025_2050_cambium23_lookup

    emis_electricity = (
        df_copy[consumption_col] *
        hdd_factor *
        td_losses_multiplier *
        df_copy.apply(
            lambda row: emis_electricity_lookup.get(
                (cambium_scenario, row['state'], row['gea_region']), {}
            ).get(year_label, {}).get(f'{mer_type}_co2e', np.nan),
            axis=1
        ).fillna(0)
    )

    # FOSSIL FUEL EMISSIONS FOR CO2e (Only for baseline scenario)
    if menu_mp == 0:
        emis_naturalGas = df_copy[f'base_naturalGas_{category}_consumption'] * hdd_factor * emis_factor_co2e_naturalGas_ton_perkWh
        emis_propane = df_copy[f'base_propane_{category}_consumption'] * hdd_factor * emis_factor_co2e_propane_ton_perkWh

        if 'cooking' in category or 'clothesDrying' in category:
            fossilFuel_emissions = emis_naturalGas.fillna(0) + emis_propane.fillna(0)
        else:
            emis_fuelOil = df_copy[f'base_fuelOil_{category}_consumption'] * hdd_factor * emis_factor_co2e_fuelOil_ton_perkWh
            fossilFuel_emissions = emis_naturalGas.fillna(0) + emis_propane.fillna(0) + emis_fuelOil.fillna(0)

        total_emissions = fossilFuel_emissions + emis_electricity
    else:
        total_emissions = emis_electricity

    total_damages = total_emissions * epa_scc_usd2023_per_ton

    emis_col = f'{scenario_prefix}{year_label}_{category}_tons_co2e_{mer_type}'
    damage_col = f'{scenario_prefix}{year_label}_{category}_damages_climate_{mer_type}'

    df_new_columns[emis_col] = np.round(total_emissions, 2)
    df_new_columns[damage_col] = np.round(total_damages, 2)

    return total_emissions, total_damages

def calculate_health_emissions_and_damages(df_copy, df_summary, df_new_columns, category, year_label, consumption_col, hdd_factor, td_losses_multiplier, emis_electricity_lookup, pollutants, emis_factors_fossil_fuels, damage_costs_per_ton, mer_type, scenario_prefix, menu_mp):
    lifetime_emissions_pollutants = {pollutant: 0 for pollutant in pollutants}
    lifetime_damages_pollutants = {pollutant: 0 for pollutant in pollutants}

    for pollutant in pollutants:
        # ELECTRICITY EMISSIONS FOR POLLUTANTS
        emis_electricity_pollutant = (
            df_copy[consumption_col] *
            hdd_factor *
            td_losses_multiplier *
            df_copy.apply(
                lambda row: emis_electricity_lookup.get(
                    (year_label, row['gea_region']), {}
                ).get(f'delta_egrid_{pollutant}', np.nan),
                axis=1
            ).fillna(0)
        )

        # FOSSIL FUEL EMISSIONS FOR POLLUTANTS (Only for baseline scenario)
        if menu_mp == 0:
            emis_naturalGas_pollutant = df_copy[f'base_naturalGas_{category}_consumption'] * hdd_factor * emis_factors_fossil_fuels['naturalGas'][pollutant]
            emis_propane_pollutant = df_copy[f'base_propane_{category}_consumption'] * hdd_factor * emis_factors_fossil_fuels['propane'][pollutant]

            if 'cooking' in category or 'clothesDrying' in category:
                fossilFuel_emissions_pollutant = emis_naturalGas_pollutant.fillna(0) + emis_propane_pollutant.fillna(0)
            else:
                emis_fuelOil_pollutant = df_copy[f'base_fuelOil_{category}_consumption'] * hdd_factor * emis_factors_fossil_fuels['fuelOil'][pollutant]
                fossilFuel_emissions_pollutant = emis_naturalGas_pollutant.fillna(0) + emis_propane_pollutant.fillna(0) + emis_fuelOil_pollutant.fillna(0)

            total_emissions_pollutant = fossilFuel_emissions_pollutant + emis_electricity_pollutant
        else:
            total_emissions_pollutant = emis_electricity_pollutant

        total_damages_pollutant = total_emissions_pollutant * damage_costs_per_ton[pollutant]

        emis_col_pollutant = f'{scenario_prefix}{year_label}_{category}_tons_{pollutant}_{mer_type}'
        damage_col_pollutant = f'{scenario_prefix}{year_label}_{category}_damages_{pollutant}_{mer_type}'

        df_new_columns[emis_col_pollutant] = np.round(total_emissions_pollutant, 6)
        df_new_columns[damage_col_pollutant] = np.round(total_damages_pollutant, 2)

        lifetime_emissions_pollutants[pollutant] += total_emissions_pollutant
        lifetime_damages_pollutants[pollutant] += total_damages_pollutant

    return lifetime_emissions_pollutants, lifetime_damages_pollutants

def calculate_damages_grid_scenario(df_copy, df_summary, menu_mp, td_losses_multiplier, emis_electricity_lookup, policy_scenario, cambium_scenario, scenario_prefix, hdd_factors_per_year):
    """
    Calculate damages for the specified electricity grid policy_scenario.

    Parameters:
        df_copy (DataFrame): The DataFrame containing consumption data.
        df_summary (DataFrame): The DataFrame for summarizing results.
        menu_mp (int): The menu number for the measure package.
        td_losses_multiplier (float): Transmission and distribution losses multiplier.
        emis_electricity_lookup (dict): Lookup table for emissions.
        policy_scenario (str): The policy scenario.
        cambium_scenario (str): The Cambium scenario.
        scenario_prefix (str): Prefix for the scenario.
        hdd_factors_per_year (dict): Heating Degree Day factors per year.

    Returns:
        DataFrame: The DataFrame with calculated damages.
    """
    new_columns_data = {}

    # Define pollutants and associated data
    pollutants = ['nox', 'pm25', 'so2']

    # Emission factors for fossil fuels in tons per kWh
    emis_factors_fossil_fuels = {
        'naturalGas': {'nox': emis_factor_nox_naturalGas_ton_perkWh, 'pm25': emis_factor_pm25_naturalGas_ton_perkWh, 'so2': emis_factor_so2_naturalGas_ton_perkWh},
        'propane': {'nox': emis_factor_nox_propane_ton_perkWh, 'pm25': emis_factor_pm25_propane_ton_perkWh, 'so2': emis_factor_so2_propane_ton_perkWh},
        'fuelOil': {'nox': emis_factor_nox_fuelOil_ton_perkWh, 'pm25': emis_factor_pm25_fuelOil_ton_perkWh, 'so2': emis_factor_so2_fuelOil_ton_perkWh},
    }

    # Damage costs per ton for pollutants in USD
    damage_costs_per_ton = {
        'nox': damage_cost_nox_usd_per_ton,
        'pm25': damage_cost_pm25_usd_per_ton,
        'so2': damage_cost_so2_usd_per_ton,
    }

    for category, lifetime in equipment_specs.items():
        print(f"Calculating marginal emissions and marginal damages for {category}")

        # Perform calculations for both LRMER and SRMER
        for mer_type in ['lrmer', 'srmer']:
            print(f"For Marginal Emissions Factor: {mer_type}")

            lifetime_emissions = 0
            lifetime_damages = 0
            lifetime_emissions_pollutants = {pollutant: 0 for pollutant in pollutants}
            lifetime_damages_pollutants = {pollutant: 0 for pollutant in pollutants}

            for year in range(1, lifetime + 1):
                year_label = year + 2023
                if menu_mp == 0:
                    consumption_col = f'base_electricity_{category}_consumption'
                else:
                    consumption_col = f'mp{menu_mp}_{year_label}_{category}_consumption'

                # Get precomputed hdd_factor for the current year
                if category in ['heating', 'waterHeating']:
                    hdd_factor = hdd_factors_per_year[year_label]
                else:
                    hdd_factor = 1.0

                # Calculate CO2e emissions and climate damages
                df_new_columns = {}
                total_emissions, total_damages = calculate_co2e_and_climate_damages(
                    df_copy, df_summary, df_new_columns, category, year_label, consumption_col, hdd_factor, td_losses_multiplier,
                    emis_electricity_lookup, mer_type, policy_scenario, cambium_scenario, scenario_prefix, menu_mp
                )

                lifetime_emissions += total_emissions
                lifetime_damages += total_damages

                # Calculate health emissions and damages
                lifetime_emissions_pollutant, lifetime_damages_pollutant = calculate_health_emissions_and_damages(
                    df_copy, df_summary, df_new_columns, category, year_label, consumption_col, hdd_factor, td_losses_multiplier,
                    emis_electricity_lookup, pollutants, emis_factors_fossil_fuels, damage_costs_per_ton, mer_type, scenario_prefix, menu_mp
                )

                for pollutant in pollutants:
                    lifetime_emissions_pollutants[pollutant] += lifetime_emissions_pollutant[pollutant]
                    lifetime_damages_pollutants[pollutant] += lifetime_damages_pollutant[pollutant]

                # Update new_columns_data with results from helper functions
                new_columns_data.update(df_new_columns)

            # Columns for Lifetime Emissions and Damages
            lifetime_emissions_col = f'{scenario_prefix}{category}_lifetime_tons_co2e_{mer_type}'
            lifetime_damages_col = f'{scenario_prefix}{category}_lifetime_damages_climate_{mer_type}'

            new_columns_data[lifetime_emissions_col] = np.round(lifetime_emissions, 2)
            new_columns_data[lifetime_damages_col] = np.round(lifetime_damages, 2)

            df_summary[lifetime_emissions_col] = np.round(lifetime_emissions, 2)
            df_summary[lifetime_damages_col] = np.round(lifetime_damages, 2)

            if menu_mp != 0:
                avoided_emissions_col = f'{scenario_prefix}{category}_avoided_tons_co2e_{mer_type}'
                avoided_damages_col = f'{scenario_prefix}{category}_avoided_damages_climate_{mer_type}'

                new_columns_data[avoided_emissions_col] = np.round(df_copy[f'baseline_{category}_lifetime_tons_co2e_{mer_type}'] - lifetime_emissions, 2)
                new_columns_data[avoided_damages_col] = np.round(df_copy[f'baseline_{category}_lifetime_damages_climate_{mer_type}'] - lifetime_damages, 2)

                df_summary[avoided_emissions_col] = new_columns_data[avoided_emissions_col]
                df_summary[avoided_damages_col] = new_columns_data[avoided_damages_col]

            # Lifetime Emissions and Damages for Pollutants
            for pollutant in pollutants:
                lifetime_emissions_col_pollutant = f'{scenario_prefix}{category}_lifetime_tons_{pollutant}_{mer_type}'
                lifetime_damages_col_pollutant = f'{scenario_prefix}{category}_lifetime_damages_{pollutant}_{mer_type}'

                new_columns_data[lifetime_emissions_col_pollutant] = np.round(lifetime_emissions_pollutants[pollutant], 6)
                new_columns_data[lifetime_damages_col_pollutant] = np.round(lifetime_damages_pollutants[pollutant], 2)

                df_summary[lifetime_emissions_col_pollutant] = new_columns_data[lifetime_emissions_col_pollutant]
                df_summary[lifetime_damages_col_pollutant] = new_columns_data[lifetime_damages_col_pollutant]

                if menu_mp != 0:
                    avoided_emissions_col_pollutant = f'{scenario_prefix}{category}_avoided_tons_{pollutant}_{mer_type}'
                    avoided_damages_col_pollutant = f'{scenario_prefix}{category}_avoided_damages_{pollutant}_{mer_type}'

                    new_columns_data[avoided_emissions_col_pollutant] = np.round(df_copy[f'baseline_{category}_lifetime_tons_{pollutant}_{mer_type}'] - lifetime_emissions_pollutants[pollutant], 6)
                    new_columns_data[avoided_damages_col_pollutant] = np.round(df_copy[f'baseline_{category}_lifetime_damages_{pollutant}_{mer_type}'] - lifetime_damages_pollutants[pollutant], 2)

                    df_summary[avoided_emissions_col_pollutant] = new_columns_data[avoided_emissions_col_pollutant]
                    df_summary[avoided_damages_col_pollutant] = new_columns_data[avoided_damages_col_pollutant]

    df_new_columns = pd.DataFrame(new_columns_data, index=df_copy.index)

    return df_new_columns, df_summary


In [None]:
# # Check unique fuel sources in both dataframes
# fuel_sources_mix = set(df_grid_mix['fuel_source'].unique())
# fuel_sources_emis = set(df_grid_emis_factors['fuel_source'].unique())

# print("Fuel sources in df_grid_mix:", fuel_sources_mix)
# print("Fuel sources in df_grid_emis_factors:", fuel_sources_emis)

# # Merge the dataframes
# df_combined = pd.merge(
#     df_grid_mix,
#     df_grid_emis_factors,
#     on=['cambium_gea_region', 'fuel_source'],
#     how='inner'
# )

# # Calculate emissions contribution
# df_combined['emis_contribution'] = df_combined['fraction_generation'] * df_combined['emis_rate']

# # Sum emissions contributions
# df_emis_factors = df_combined.groupby(
#     ['year', 'cambium_gea_region', 'pollutant']
# )['emis_contribution'].sum().reset_index()
# df_emis_factors

### Electricity - Method 2: CEDM Marginal Emissions Factors and EASIUR (Coal Generation Reduction)

### Electricity - Method 3: CEDM Marginal Emissions Factors and EASIUR (Use Cambium CO2e reductions as a proxy)

### Step 3: Obtain CPI-U Inflation Data
- Series Id:	CUUR0000SA0
- Not Seasonally Adjusted
- Series Title:	All items in U.S. city average, all urban consumers, not seasonally adjusted
- Area:	U.S. city average
- Item:	All items
- Base Period:	1982-84=100

In [None]:
# Load the BLS Inflation Data
filename = 'bls_cpiu_2005-2023.xlsx'
relative_path = os.path.join(r"inflation_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# Create a pandas dataframe
df_bls_cpiu = pd.read_excel(file_path, sheet_name='bls_cpiu')

df_bls_cpiu = pd.DataFrame({
    'year': df_bls_cpiu['Year'],
    'cpiu_annual': df_bls_cpiu['Annual']
})

# Obtain the Annual CPIU values for the years of interest
bls_cpi_annual_2008 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2008)].item()
bls_cpi_annual_2010 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2010)].item()
bls_cpi_annual_2013 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2013)].item()
bls_cpi_annual_2018 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2018)].item()
bls_cpi_annual_2019 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2019)].item()
bls_cpi_annual_2020 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2020)].item()
bls_cpi_annual_2021 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2021)].item()
bls_cpi_annual_2022 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2022)].item()
bls_cpi_annual_2023 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2023)].item()

# Precompute constant values
cpi_ratio_2023_2023 = bls_cpi_annual_2023 / bls_cpi_annual_2023
cpi_ratio_2023_2022 = bls_cpi_annual_2023 / bls_cpi_annual_2022
cpi_ratio_2023_2021 = bls_cpi_annual_2023 / bls_cpi_annual_2021  # For EPA VSL (11.3M USD-2021)
cpi_ratio_2023_2020 = bls_cpi_annual_2023 / bls_cpi_annual_2020  # For SCC
cpi_ratio_2023_2019 = bls_cpi_annual_2023 / bls_cpi_annual_2019 
cpi_ratio_2023_2018 = bls_cpi_annual_2023 / bls_cpi_annual_2018 
cpi_ratio_2023_2013 = bls_cpi_annual_2023 / bls_cpi_annual_2013
cpi_ratio_2023_2010 = bls_cpi_annual_2023 / bls_cpi_annual_2010
cpi_ratio_2023_2008 = bls_cpi_annual_2023 / bls_cpi_annual_2008  # For EPA VSL and SCC

### Step 4: Use the updated Social Cost of Carbon (190 USD-2020/ton co2e) and inflate to USD-2023
- EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
- 190 USD-2020 has some inconsistency with the VSL being used. An old study and 2008 VSL is noted
- 190 USD value and inflate to USD 2023 because there is a clear source and ease of replicability.

In [None]:
# For co2e adjust SCC
epa_scc_usd2023_per_ton = 190 * cpi_ratio_2023_2020

print(f"""
Steps 3 and 4: Obtain BLS CPI-U Data and Inflate Current Social Cost of Carbon (SCC) to USD2023
      
EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
Inflate 190 $USD-2020 Social Cost of Carbon to $USD-2023

SCC Value used in analysis is: ${round(epa_scc_usd2023_per_ton, 2)} per mt CO2e
""")

### Step 5: Calculate End-use specific marginal damages

### Baseline Marginal Damages: WHOLE-HOME

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------
""")
# Make copies from scenario consumption to keep df smaller
print("\n", "Creating dataframe to store marginal damages calculations ...")
df_baseline_scenario_damages = df_euss_am_baseline_home.copy()

# calculate_marginal_damages(df, menu_mp, policy_scenario)
df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
                                                      menu_mp=menu_mp,
                                                      policy_scenario='No Inflation Reduction Act',
                                                      df_summary=df_baseline_scenario_damages
                                                     )
df_euss_am_baseline_home

## Private Perspective: Annual Energy Costs

### Step 1: Obtain Level Energy Fuel Cost Data from the EIA
**Data Sources for Excel workbook containing state average Residential fuel cost for each fuel in 2018**
- EIA State Electricity Price: https://www.eia.gov/electricity/state/archive/2018/
- EIA Natural Gas Prices: https://www.eia.gov/dnav/ng/ng_pri_sum_dcu_SPA_a.htm
- Propane and Fuel Oil: EIA March 2023 Short Term Energy Outlook
    - https://www.eia.gov/outlooks/steo/pdf/wf01.pdf
    - Table WF01: Average Consumer Prices and Expenditures for Heating Fuels During the Winter
    - US Average: 2018-2019 Data

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Private Perspective: Annual Energy Costs
-------------------------------------------------------------------------------------------------------
- Step 1: Obtain Level Energy Fuel Cost Data from the EIA
- Step 2: Calculate Annual Operating (Fuel) Costs
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Step 1: Obtain Level Energy Fuel Cost Data from the EIA
-------------------------------------------------------------------------------------------------------
**Data Sources for Excel workbook containing state average Residential fuel cost for each fuel in 2018**
- EIA State Electricity Price: https://www.eia.gov/electricity/state/archive/2018/
- EIA Natural Gas Prices: https://www.eia.gov/dnav/ng/ng_pri_sum_dcu_SPA_a.htm
- Propane and Fuel Oil: EIA March 2023 Short Term Energy Outlook
    - https://www.eia.gov/outlooks/steo/pdf/wf01.pdf
    - Table WF01: Average Consumer Prices and Expenditures for Heating Fuels During the Winter
    - US Average: 2018-2019 Data
-------------------------------------------------------------------------------------------------------
""")

filename = 'fuel_prices_nominal.csv'
relative_path = os.path.join(r"fuel_prices", filename)
file_path = os.path.join(project_root, relative_path)
df_fuelPrices_perkWh = pd.read_csv(file_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# New units for the converted and inflated prices below
# $USD-2023, PREVIOUSLY USED $USD-2021
df_fuelPrices_perkWh['units'] = 'USD2022 per kWh'

years = ['2018', '2019', '2020', '2021', '2022']

# Take dataframe with nominal prices in their base units and convert to $/kWh equivalent
# https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php
for year in years:
    for index, row in df_fuelPrices_perkWh.iterrows():
        
        # Propane: (dollars per gallon) * (1 gallon propane/91,452 BTU) * (3412 BTU/1 kWh)
        if row['fuel_type'] == 'propane':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/91452) * (3412/1)
        
        # Fuel Oil: (dollars/gallon) * (1 gallon heating oil/138,500 BTU) * (3412 BTU/1 kWh)
        elif row['fuel_type'] == 'fuelOil':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/138500) * (3412/1)
        
        # Natural Gas: (dollars/cf) * (thousand cf/1000 cf) * (1 cf natural gas/1039 BTU) * (3412 BTU/1 kWh)
        elif row['fuel_type'] == 'naturalGas':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/1000) * (1/1039) * (3412/1)
        
        # Electricity: convert cents per kWh to $ per kWh
        elif row['fuel_type'] == 'electricity':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] / 100

# Convert nominal dollars to real 2022 US dollars (USD2022)
# $USD-2023, PREVIOUSLY USED $USD-2021
df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] * cpi_ratio_2023_2018
df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] * cpi_ratio_2023_2019
df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] * cpi_ratio_2023_2020
df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] * cpi_ratio_2023_2021
df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] * cpi_ratio_2023_2022

# Original dictionary mapping census divisions to states
map_states_census_divisions = {
    "New England": ["CT", "ME", "MA", "NH", "RI", "VT"],
    "Middle Atlantic": ["NJ", "NY", "PA"],
    "East North Central": ["IN", "IL", "MI", "OH", "WI"],
    "West North Central": ["IA", "KS", "MN", "MO", "NE", "ND", "SD"],
    "South Atlantic": ["DE", "DC", "FL", "GA", "MD", "NC", "SC", "VA", "WV"],
    "East South Central": ["AL", "KY", "MS", "TN"],
    "West South Central": ["AR", "LA", "OK", "TX"],
    "Mountain": ["AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY"],
    "Pacific": ["AK", "CA", "HI", "OR", "WA"]
}

# Reverse the mapping to create a state-to-census-division map
state_to_census_division = {}
for division, states in map_states_census_divisions.items():
    for state in states:
        state_to_census_division[state] = division

# Function to map location to census division
def map_location_to_census_division(location):
    if location in state_to_census_division:
        return state_to_census_division[location]
    return location

# Apply the function to map locations using .loc
df_fuelPrices_perkWh.loc[:, 'census_division'] = df_fuelPrices_perkWh['location_map'].apply(map_location_to_census_division)
# print(df_fuelPrices_perkWh)

In [None]:
# Project Fuel Prices from 2022 to 2050
filename = 'aeo_projections_2022_2050.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_fuelPrices_projection_factors = pd.read_excel(io=file_path, sheet_name='fuel_price_factors_2022_2050')

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
# print(df_fuelPrices_projection_factors)

# Convert the factors dataframe into a lookup dictionary including policy_scenario
factor_dict = df_fuelPrices_projection_factors.set_index(['region', 'fuel_type', 'policy_scenario']).to_dict('index')
# print(factor_dict)

In [None]:
# Pre-IRA policy_scenario: No Inflation Reduction Act
# Pass the desired policy_scenario as a parameter when applying the function
preIRA_projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict, 'No Inflation Reduction Act'), axis=1)

# Concatenate the projected prices with the original DataFrame
df_fuelPrices_perkWh_preIRA = pd.concat([df_fuelPrices_perkWh, preIRA_projected_prices_df], axis=1)

# Create Fuel Price Lookup with the policy_scenario included
preIRA_fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_preIRA, 'No Inflation Reduction Act')
# print(preIRA_fuel_price_lookup)

In [None]:
# IRA-Reference policy_scenario: AEO2023 Reference Case
# Pass the desired policy_scenario as a parameter when applying the function
iraRef_projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict, 'AEO2023 Reference Case'), axis=1)

# Concatenate the projected prices with the original DataFrame
df_fuelPrices_perkWh_iraRef = pd.concat([df_fuelPrices_perkWh, iraRef_projected_prices_df], axis=1)

# Create Fuel Price Lookup with the policy_scenario included
iraRef_fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_iraRef, 'AEO2023 Reference Case')
# print(iraRef_fuel_price_lookup)

### Step 2: Calculate Annual Operating (Fuel) Costs

### Baseline Fuel Cost: WHOLE-HOME

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 2: Calculate Annual Operating (Fuel) Costs
-------------------------------------------------------------------------------------------------------
- Create a mapping dictionary for fuel types
- Create new merge columns to ensure a proper match.
- Merge df_copy with df_fuel_prices to get fuel prices for electricity, natural gas, propane, and fuel oil
- Calculate the per kWh fuel costs for each fuel type and region
- Calculate the baseline fuel cost 
-------------------------------------------------------------------------------------------------------
""")
# calculate_annual_fuelCost(df, menu_mp, policy_scenario, drop_fuel_cost_columns)
df_euss_am_baseline_home = calculate_annual_fuelCost(df=df_euss_am_baseline_home,
                                                     menu_mp=menu_mp,
                                                     policy_scenario='No Inflation Reduction Act',
                                                     drop_fuel_cost_columns=False
                                                     )
df_euss_am_baseline_home

## Area Median Income Data Used to determine LMI Designation and IRA Rebates Eligibility/Amount

### PUMA Median Income

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0003_ds261_2022_puma.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_puma_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
# df_puma_medianIncome = df_puma_medianIncome.drop(0)
df_puma_medianIncome = df_puma_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB', 'PUMAA', 'NAME_E', 'AP2PE001', 'AP2PM001']
df_puma_medianIncome = df_puma_medianIncome[cols_interest]
df_puma_medianIncome = df_puma_medianIncome.rename(columns={"GISJOIN": "gis_joinID_puma", "STUSAB": "state_abbrev", "PUMAA": "puma_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
df_puma_medianIncome['median_income_USD2023'] = round((df_puma_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
df_puma_medianIncome

### County Median Income

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0005_ds261_2022_county.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_county_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
# df_county_medianIncome = df_county_medianIncome.drop(0)
df_county_medianIncome = df_county_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB', 'COUNTYA', 'NAME_E', 'AP2PE001', 'AP2PM001']
df_county_medianIncome = df_county_medianIncome[cols_interest]
df_county_medianIncome = df_county_medianIncome.rename(columns={"GISJOIN": "gis_joinID_county", "STUSAB": "state_abbrev", "COUNTYA": "county_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
df_county_medianIncome['median_income_USD2023'] = round((df_county_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
df_county_medianIncome

### State Median Income

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0004_ds261_2022_state.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_state_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
# df_state_medianIncome = df_state_medianIncome.drop(0)
df_state_medianIncome = df_state_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB','STATEA', 'NAME_E', 'AP2PE001', 'AP2PM001']
df_state_medianIncome = df_state_medianIncome[cols_interest]
df_state_medianIncome = df_state_medianIncome.rename(columns={"GISJOIN": "gis_joinID_state", "STUSAB": "state_abbrev", "STATEA": "state_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
df_state_medianIncome['median_income_USD2023'] = round((df_state_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
df_state_medianIncome

### Adjustment Factors for Construction: 
#### RSMeans City Cost Index
#### Consumer Price Index for All Urban Consumers (CPI, CPI-U)

In [None]:
# Adjust for regional cost differences with RSMeans
filename = "rsMeans_cityCostIndex.csv"
relative_path = os.path.join(r"inflation_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_rsMeans_cityCostIndex = pd.read_csv(file_path)

df_rsMeans_cityCostIndex = pd.DataFrame({
    'State': df_rsMeans_cityCostIndex['State'],
    'City': df_rsMeans_cityCostIndex['City'],
    'Material': (df_rsMeans_cityCostIndex['Material']).round(2),
    'Installation': (df_rsMeans_cityCostIndex['Installation']).round(2),
    'Average': (df_rsMeans_cityCostIndex['Average']).round(2),
})
df_rsMeans_cityCostIndex

# Model Runtime

In [None]:
# Get the current datetime again
end_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Calculate the elapsed time
elapsed_time = datetime.strptime(end_time, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_time, "%Y-%m-%d_%H-%M-%S")

# Format the elapsed time
elapsed_seconds = elapsed_time.total_seconds()
elapsed_minutes = int(elapsed_seconds // 60)
elapsed_seconds = int(elapsed_seconds % 60)

# Print the elapsed time
print(f"The code took {elapsed_minutes} minutes and {elapsed_seconds} seconds to execute.")