# Load Util File with TARE Model Functions

In [1]:
import os

# Measure Package 0: Baseline
menu_mp = 0
input_mp = 'baseline'

# Get the current working directory of the project
project_root = os.path.abspath(os.getcwd())
print(f"Project root directory: {project_root}")

# Relative path to the file from the project root
relative_path = r"tare_model_functions_v3.ipynb"

# Construct the absolute path to the file
file_path = os.path.join(project_root, relative_path)
print(f"File path: {file_path}")

# Run the notebook and import variables
if os.path.exists(relative_path):
    get_ipython().run_line_magic('run', f'-i "{relative_path}"')
    print("Loaded All TARE Model Functions")
else:
    print(f"File not found: {relative_path}")

Project root directory: c:\Users\14128\Research\cmu-tare-model
File path: c:\Users\14128\Research\cmu-tare-model\tare_model_functions_v3.ipynb
Loaded All TARE Model Functions


In [2]:
# Storing Result Outputs in output_results folder
relative_path = r"output_results"
output_folder_path = os.path.join(project_root, relative_path)
print(f"Result outputs will be exported here: {output_folder_path}")

Result outputs will be exported here: c:\Users\14128\Research\cmu-tare-model\output_results


# Baseline: 

## Simulate Residential Energy Consumption
- Filter EUSS Data: Only occupied units and Single Family Homes



In [3]:
# The ``inline`` flag will use the appropriate backend to make figures appear inline in the notebook.  
%matplotlib inline

import pandas as pd
import numpy as np

# `plt` is an alias for the `matplotlib.pyplot` module
import matplotlib.pyplot as plt

# import seaborn library (wrapper of matplotlib)
import seaborn as sns
sns.set(style="darkgrid")

# For regex, import re
import re

from datetime import datetime

# Get the current datetime
# Start the timer
start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

In [4]:
print("""
-------------------------------------------------------------------------------------------------------
Welcome to the Trade-off Analysis of residential Retrofits for energy Equity Tool (TARE Model)
Let's start by reading the data from the NREL EUSS Database.

Make sure that the zipped folders stay organized as they are once unzipped.
If changes are made to the file path, then the program will not run properly.
-------------------------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------------------------
BASELINE (Measure Package 0)
-------------------------------------------------------------------------------------------------------
""")

# Measure Package 0: Baseline
menu_mp = 0
input_mp = 'baseline'

filename = "baseline_metadata_and_annual_results.csv"
relative_path = os.path.join(r"euss_data\resstock_amy2018_release_1.1\state", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

print("""
-------------------------------------------------------------------------------------------------------
Data Filters: Only occupied units and Single Family Homes
-------------------------------------------------------------------------------------------------------
""")

# Fix DtypeWarning error in columns:
# 'in.neighbors', 'in.geometry_stories_low_rise', 'in.iso_rto_region', 'in.pv_orientation', 'in.pv_system_size'
columns_to_string = {11: str, 61: str, 121: str, 103: str, 128: str, 129: str}
df_euss_am_baseline = pd.read_csv(file_path, dtype=columns_to_string)
occupancy_filter = df_euss_am_baseline['in.vacancy_status'] == 'Occupied'
df_euss_am_baseline = df_euss_am_baseline.loc[occupancy_filter]

# Filter for single family home building type
house_type_list = ['Single-Family Attached', 'Single-Family Detached']
house_type_filter = df_euss_am_baseline['in.geometry_building_type_recs'].isin(house_type_list)
df_euss_am_baseline = df_euss_am_baseline.loc[house_type_filter]
# df_euss_am_baseline


-------------------------------------------------------------------------------------------------------
Welcome to the Trade-off Analysis of residential Retrofits for energy Equity Tool (TARE Model)
Let's start by reading the data from the NREL EUSS Database.

Make sure that the zipped folders stay organized as they are once unzipped.
If changes are made to the file path, then the program will not run properly.
-------------------------------------------------------------------------------------------------------

-------------------------------------------------------------------------------------------------------
BASELINE (Measure Package 0)
-------------------------------------------------------------------------------------------------------

Retrieved data for filename: baseline_metadata_and_annual_results.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\euss_data\resstock_amy2018_release_1.1\state\baseline_metadata_and_annual_results.csv

------------------------

In [5]:
# # Make a copy of the dataframe
# df_euss_am_baseline = df_euss_am_baseline.copy()

# Choose between national or sub-national level analysis
menu_state = get_menu_choice(menu_prompt, {'N', 'Y'})   # This code is only run in baseline

# National Level 
if menu_state == 'N':
    print("You chose to analyze all of the United States.")
    input_state = 'National'

# Filter down to state or city
else:
    input_state = get_state_choice(df_euss_am_baseline)
    print(f"You chose to filter for: {input_state}")
    state_filter = df_euss_am_baseline['in.state'].eq(input_state)
    df_euss_am_baseline = df_euss_am_baseline.loc[state_filter]

    print(city_prompt)
    print(df_euss_am_baseline['in.city'].value_counts())

    menu_city = get_menu_choice(city_menu_prompt, {'N', 'Y'})

    # Filter for the entire selected state
    if menu_city == 'N':
        print(f"You chose to analyze all of state: {input_state}")
        
    # Filter to a city within the selected state
    else:
        input_cityFilter = get_city_choice(df_euss_am_baseline, input_state)
        print(f"You chose to filter for: {input_state}, {input_cityFilter}")
        city_filter = df_euss_am_baseline['in.city'].eq(f"{input_state}, {input_cityFilter}")
        df_euss_am_baseline = df_euss_am_baseline.loc[city_filter]

# Display the filtered dataframe
df_euss_am_baseline

You chose to filter for: PA

To accurately characterize load profile, it is recommended to select subsets of data with >= 1000 models (~240,000 representative dwelling units).

The following cities (number of models also shown) are available for this state:

in.city
Not in a census Place      7035
In another census Place    5851
PA, Philadelphia           1631
PA, Pittsburgh              329
PA, Erie                    105
PA, Allentown               104
PA, Reading                  92
PA, Bethlehem                85
PA, Scranton                 80
PA, Harrisburg               63
PA, Lancaster                58
PA, Levittown                58
PA, Altoona                  56
PA, York                     56
PA, Wilkes-Barre             48
Name: count, dtype: int64
You chose to analyze all of state: PA


Unnamed: 0,bldg_id,upgrade,weight,applicability,in.sqft,in.ahs_region,in.ashrae_iecc_climate_zone_2004,in.ashrae_iecc_climate_zone_2004_2_a_split,in.bathroom_spot_vent_hour,in.bedrooms,...,out.emissions.natural_gas.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.propane.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.electricity.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.fuel_oil.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.natural_gas.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.propane.lrmer_mid_case_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_95_decarb_by_2035_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_low_re_cost_15_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_low_re_cost_25_2025_start.co2e_kg,out.emissions.all_fuels.lrmer_mid_case_15_2025_start.co2e_kg
410846,119,0,242.131013,True,2152.0,"CBSA Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",4A,4A,Hour1,4,...,5540.594966,0.000000,4179.118870,0.000000,5540.594966,0.000000,7380.371515,9495.197860,9035.427108,9719.713836
410847,122,0,242.131013,True,2176.0,"CBSA Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",4A,4A,Hour18,4,...,0.000000,0.000000,3794.589113,13038.928166,0.000000,0.000000,14675.825549,16590.276103,16204.985672,16833.517280
410848,150,0,242.131013,True,1690.0,Non-CBSA Middle Atlantic,5A,5A,Hour2,4,...,0.000000,0.000000,11378.222173,0.000000,0.000000,0.000000,4530.910597,10702.844907,9290.264827,11378.222173
410849,153,0,242.131013,True,2176.0,"CBSA Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",4A,4A,Hour6,3,...,0.000000,0.000000,4612.318634,14589.636195,0.000000,0.000000,16564.727059,18949.747039,18437.677994,19201.954829
410850,162,0,242.131013,True,2663.0,"CBSA Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",4A,4A,Hour8,5,...,890.321536,0.000000,4330.249044,15002.759507,890.321536,0.000000,17837.666007,20060.715408,19596.115712,20223.330087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433889,549882,0,242.131013,True,1202.0,Non-CBSA Middle Atlantic,5A,5A,Hour5,2,...,6675.629132,0.000000,2916.093184,0.000000,6675.629132,0.000000,7916.182492,9422.157695,9095.470037,9591.722316
433891,549915,0,242.131013,True,2176.0,Non-CBSA Middle Atlantic,5A,5A,Hour0,5,...,0.000000,0.000000,16728.421288,0.000000,0.000000,0.000000,6870.847124,15814.400458,13772.445542,16728.421288
433892,549937,0,242.131013,True,885.0,Non-CBSA Middle Atlantic,5A,5A,Hour20,4,...,7002.945922,0.000000,3823.559604,0.000000,7002.945922,0.000000,8577.560537,10516.078249,10222.759114,10826.505527
433893,549963,0,242.131013,True,1690.0,Non-CBSA Middle Atlantic,5A,5A,Hour13,3,...,8277.210267,0.000000,3992.118611,0.000000,8277.210267,0.000000,9951.214681,11921.778240,11608.667963,12269.328878


## Baseline Energy Consumption
### Factors to Project Future Energy Consumption Using EIA Heating Degree Day (HDD) Forecasted Data

In [6]:
# Factors for 2022 to 2050
filename = 'aeo_projections_2022_2050.xlsx'
relative_path = os.path.join(r"projections", filename)
file_path = os.path.join(project_root, relative_path)
df_hdd_projection_factors = pd.read_excel(io=file_path, sheet_name='hdd_factors_2022_2050')

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# Convert the factors dataframe into a lookup dictionary
hdd_factor_lookup = df_hdd_projection_factors.set_index(['census_division']).to_dict('index')
hdd_factor_lookup

Retrieved data for filename: aeo_projections_2022_2050.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\projections\aeo_projections_2022_2050.xlsx


{'National': {2022: 1,
  2023: 1.0028349414260749,
  2024: 0.9389536266963965,
  2025: 0.9344844368179533,
  2026: 0.9300828169743566,
  2027: 0.9257070259326153,
  2028: 0.9212779053519207,
  2029: 0.9168538581973203,
  2030: 0.9124351151864318,
  2031: 0.9080377039911245,
  2032: 0.9036541297129915,
  2033: 0.8992492812396443,
  2034: 0.894875507855348,
  2035: 0.8904758482849783,
  2036: 0.8860390020882589,
  2037: 0.8817285900905196,
  2038: 0.877365886428882,
  2039: 0.8729314040841085,
  2040: 0.8685839209369028,
  2041: 0.8642702226890459,
  2042: 0.8599120736340495,
  2043: 0.8555441810694344,
  2044: 0.8511753084862802,
  2045: 0.8468232704962843,
  2046: 0.8425090534289743,
  2047: 0.8382247585710751,
  2048: 0.8339389072548168,
  2049: 0.8297055204635582,
  2050: 0.8255002687057338},
 'East North Central': {2022: 1,
  2023: 0.9811731756651626,
  2024: 0.9307608526528707,
  2025: 0.928426948809709,
  2026: 0.9262486385560915,
  2027: 0.9239147347129298,
  2028: 0.921580830869

In [7]:
print("""
-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------
""")

# df_baseline_enduse(df_baseline, df_enduse, category, fuel_filter='Yes', tech_filter='Yes')
df_euss_am_baseline_home = df_enduse_refactored(df_baseline = df_euss_am_baseline,
                                                fuel_filter = 'Yes',
                                                tech_filter = 'Yes')

# Project Future Energy Consumption
df_euss_am_baseline_home = project_future_consumption(df=df_euss_am_baseline_home, hdd_factor_lookup=hdd_factor_lookup, menu_mp=menu_mp)
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------

Processing column: in.clothes_dryer
Initial data types: object
Data types after processing: object
Processing column: in.cooking_range
Initial data types: object
Data types after processing: object
15651 rows remain after applying total heating consumption calculation
Filtered for the following fuels: ['Natural Gas', 'Electricity', 'Propane', 'Fuel Oil']
14853 rows remain after applying heating fuel filter
Filtered for the following Heating technologies: ['Electricity ASHP', 'Electricity Baseboard', 'Electricity Electric Boiler', 'Electricity Electric Furnace', 'Fuel Oil Fuel Boiler', 'Fuel Oil Fuel Furnace', 'Natural Gas Fuel Boiler', 'Natural Gas Fuel Furnace', 'Propane Fuel Boiler', 'Propane Fuel Furnace']
12950 rows remain after applying heating technolo

Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,cambium_GEA_region,state,city,county,...,baseline_2027_cooking_consumption,baseline_2028_cooking_consumption,baseline_2029_cooking_consumption,baseline_2030_cooking_consumption,baseline_2031_cooking_consumption,baseline_2032_cooking_consumption,baseline_2033_cooking_consumption,baseline_2034_cooking_consumption,baseline_2035_cooking_consumption,baseline_2036_cooking_consumption
410846,119,2152.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200450,...,986.18,986.18,986.18,986.18,986.18,986.18,986.18,986.18,986.18,986.18
410847,122,2176.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,In another census Place,G4200450,...,339.67,339.67,339.67,339.67,339.67,339.67,339.67,339.67,339.67,339.67
410848,150,1690.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,In another census Place,G4201190,...,324.43,324.43,324.43,324.43,324.43,324.43,324.43,324.43,324.43,324.43
410850,162,2663.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200450,...,429.94,429.94,429.94,429.94,429.94,429.94,429.94,429.94,429.94,429.94
410851,181,885.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,Not in a census Place,G4200270,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433884,549748,3301.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200910,...,841.41,841.41,841.41,841.41,841.41,841.41,841.41,841.41,841.41,841.41
433891,549915,2176.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,Lancaster,G4200710,...,603.14,603.14,603.14,603.14,603.14,603.14,603.14,603.14,603.14,603.14
433892,549937,885.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCWc,PA,In another census Place,G4200050,...,1341.39,1341.39,1341.39,1341.39,1341.39,1341.39,1341.39,1341.39,1341.39,1341.39
433893,549963,1690.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCWc,PA,In another census Place,G4201290,...,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61,405.61


## Factors to Project Future Energy Consumption Using EIA Heating Degree Day (HDD) Forecasted Data

In [8]:
print("""
-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------
""")



# df_baseline_enduse(df_baseline, df_enduse, category, fuel_filter='Yes', tech_filter='Yes')
df_euss_am_baseline_home = df_enduse_refactored(df_baseline = df_euss_am_baseline,
                                                fuel_filter = 'Yes',
                                                tech_filter = 'Yes')
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Baseline Consumption:
-------------------------------------------------------------------------------------------------------

Processing column: in.clothes_dryer
Initial data types: object
Data types after processing: object
Processing column: in.cooking_range
Initial data types: object
Data types after processing: object
15651 rows remain after applying total heating consumption calculation
Filtered for the following fuels: ['Natural Gas', 'Electricity', 'Propane', 'Fuel Oil']
14853 rows remain after applying heating fuel filter
Filtered for the following Heating technologies: ['Electricity ASHP', 'Electricity Baseboard', 'Electricity Electric Boiler', 'Electricity Electric Furnace', 'Fuel Oil Fuel Boiler', 'Fuel Oil Fuel Furnace', 'Natural Gas Fuel Boiler', 'Natural Gas Fuel Furnace', 'Propane Fuel Boiler', 'Propane Fuel Furnace']
12950 rows remain after applying heating technolo

Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,cambium_GEA_region,state,city,county,...,base_naturalGas_clothesDrying_consumption,base_propane_clothesDrying_consumption,base_cooking_fuel,base_electricity_cooking_consumption,base_naturalGas_cooking_consumption,base_propane_cooking_consumption,baseline_heating_consumption,baseline_waterHeating_consumption,baseline_clothesDrying_consumption,baseline_cooking_consumption
410846,119,2152.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200450,...,629.809729,0.0,Natural Gas,32.530889,953.653262,0.00000,17917.779078,4801.969482,677.287243,986.184151
410847,122,2176.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,In another census Place,G4200450,...,0.000000,0.0,Electricity,339.669370,0.000000,0.00000,40613.202738,2391.459931,275.779877,339.669370
410848,150,1690.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,In another census Place,G4201190,...,0.000000,0.0,Electricity,324.429674,0.000000,0.00000,20677.336273,1339.920932,320.033608,324.429674
410850,162,2663.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200450,...,0.000000,0.0,Electricity,429.935260,0.000000,0.00000,49481.533317,3905.172008,989.701003,429.935260
410851,181,885.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,Not in a census Place,G4200270,...,0.000000,0.0,Electricity,405.610361,0.000000,0.00000,21317.110419,2506.636862,400.042011,405.610361
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433884,549748,3301.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200910,...,475.068204,0.0,Natural Gas,27.841752,813.565290,0.00000,19924.729765,4080.135437,510.822875,841.407042
433891,549915,2176.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,Lancaster,G4200710,...,0.000000,0.0,Electricity,603.140262,0.000000,0.00000,22397.663454,4859.704483,773.121483,603.140262
433892,549937,885.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCWc,PA,In another census Place,G4200050,...,1762.529415,0.0,Natural Gas,44.253732,1297.132556,0.00000,15632.703945,12024.999073,1895.290610,1341.386287
433893,549963,1690.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCWc,PA,In another census Place,G4201290,...,0.000000,0.0,Electricity,405.610361,0.000000,0.00000,32303.172549,4003.643887,400.042011,405.610361


## Public Perspective: Monetized Marginal Damages from Emissions

### Step 1: Calculate emissions factors for different fuel sources

### Marginal Emissions Factors
#### Electricity
- STATE Regional Aggregation is what is used in the Parth Analysis 
- "Marginal Emissions Factors for Electricity"
- Factor Type: Marginal
- Calculation Method: Regression
- Metric: Emissions [kg/MWh]")
- Predictor: Year")
- Pollutants: SO2, NOx, PM2.5, CO2")
#### Fossil Fuels
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf

In [9]:
print("""
-------------------------------------------------------------------------------------------------------
Public Perspective: Monetized Marginal Damages from Emissions
-------------------------------------------------------------------------------------------------------
Step 1: Calculate emissions factors for different fuel sources
- Electricity
- Natural Gas
- Fuel Oil 
- Propane
-------------------------------------------------------------------------------------------------------
""")


-------------------------------------------------------------------------------------------------------
Public Perspective: Monetized Marginal Damages from Emissions
-------------------------------------------------------------------------------------------------------
Step 1: Calculate emissions factors for different fuel sources
- Electricity
- Natural Gas
- Fuel Oil 
- Propane
-------------------------------------------------------------------------------------------------------



In [10]:
print("""
-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: ELECTRICITY
-------------------------------------------------------------------------------------------------------
Electricity Marginal Emissions Factors:
- STATE Regional Aggregation is what is used in the Parth Analysis 
- "Marginal Emissions Factors for Electricity"
- Factor Type: Marginal
- Calculation Method: Regression
- Metric: Emissions [kg/MWh]
- Predictor: Year")
- Pollutants: SO2, NOx, PM2.5, CO2
-------------------------------------------------------------------------------------------------------
""")
filename = 'Generation-MARREG-EMIT-state-byYear.csv'
relative_path = os.path.join(r"margEmis_electricity", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_margEmissions = pd.read_csv(file_path, index_col=0)

# Convert from kg/MWh to lb/kWh
# Obtain value from the CSV file and convert to lbs pollutant per kWh 
df_margEmis_electricity = pd.DataFrame({
    'state': df_margEmissions['region'],
    'fuel_type': 'electricity',
    'pollutant': df_margEmissions['pollutant'],
    'value': df_margEmissions['factor'] * (2.20462/1) * (1/1000),
    'unit': '[lb/kWh]'
})
df_margEmis_electricity


-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: ELECTRICITY
-------------------------------------------------------------------------------------------------------
Electricity Marginal Emissions Factors:
- STATE Regional Aggregation is what is used in the Parth Analysis 
- "Marginal Emissions Factors for Electricity"
- Factor Type: Marginal
- Calculation Method: Regression
- Metric: Emissions [kg/MWh]
- Predictor: Year")
- Pollutants: SO2, NOx, PM2.5, CO2
-------------------------------------------------------------------------------------------------------

Retrieved data for filename: Generation-MARREG-EMIT-state-byYear.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margEmis_electricity\Generation-MARREG-EMIT-state-byYear.csv




Unnamed: 0,state,fuel_type,pollutant,value,unit
1,AL,electricity,so2,0.000131,[lb/kWh]
2,AL,electricity,nox,0.000440,[lb/kWh]
3,AL,electricity,pm25,0.000140,[lb/kWh]
4,AL,electricity,co2,1.172667,[lb/kWh]
5,AR,electricity,so2,0.002555,[lb/kWh]
...,...,...,...,...,...
184,WV,electricity,co2,1.713400,[lb/kWh]
185,WY,electricity,so2,0.001223,[lb/kWh]
186,WY,electricity,nox,0.001602,[lb/kWh]
187,WY,electricity,pm25,0.000259,[lb/kWh]


In [11]:
print("""
-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
Fossil Fuels (Natural Gas, Fuel Oil, Propane):
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
-------------------------------------------------------------------------------------------------------
""")

fuelOil_factors = calculate_fossilFuel_emission_factor("fuelOil", 0.0015, 0.1300, 0.83, 161.0, 1000, 138500)
naturalGas_factors = calculate_fossilFuel_emission_factor("naturalGas", 0.0006, 0.0922, 1.9, 117.6, 1000000, 1039)
propane_factors = calculate_fossilFuel_emission_factor("propane", 0.0002, 0.1421, 0.17, 136.6, 1000, 91452)

all_factors = {**fuelOil_factors, **naturalGas_factors, **propane_factors}

df_margEmis_factors = pd.DataFrame.from_dict(all_factors, orient="index", columns=["value"])
df_margEmis_factors.reset_index(inplace=True)
df_margEmis_factors.columns = ["pollutant", "value"]
df_margEmis_factors[["fuel_type", "pollutant"]] = df_margEmis_factors["pollutant"].str.split("_", expand=True)
df_margEmis_factors["unit"] = "[lb/kWh]"

# Add the 'state' column and assign 'National' to every row
df_margEmis_factors = df_margEmis_factors.assign(state='National')

df_margEmis_factors = df_margEmis_factors[["state", "fuel_type", "pollutant", "value", "unit"]]
df_margEmis_factors


-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
Fossil Fuels (Natural Gas, Fuel Oil, Propane):
- NOx, SO2, CO2: 
    - RESNET Table 7.1.2 Emissions Factors for Household Combustion Fuels
    - Source: https://www.resnet.us/wp-content/uploads/ANSIRESNETICC301-2022_resnetpblshd.pdf
    - All factors are in units of lb/Mbtu so energy consumption in kWh need to be converted to kWh 
    - (1 lb / Mbtu) * (1 Mbtu / 1x10^6 Btu) * (3412 Btu / 1 kWh)
- PM2.5: 
    - A National Methodology and Emission Inventory for Residential Fuel Combustion
    - Source: https://www3.epa.gov/ttnchie1/conference/ei12/area/haneke.pdf
-------------------------------------------------------------------------------------------------------



Unnamed: 0,state,fuel_type,pollutant,value,unit
0,National,fuelOil,so2,5.118e-06,[lb/kWh]
1,National,fuelOil,nox,0.00044356,[lb/kWh]
2,National,fuelOil,pm25,2.044736e-05,[lb/kWh]
3,National,fuelOil,co2,0.549332,[lb/kWh]
4,National,naturalGas,so2,2.0472e-06,[lb/kWh]
5,National,naturalGas,nox,0.0003145864,[lb/kWh]
6,National,naturalGas,pm25,6.239461e-06,[lb/kWh]
7,National,naturalGas,co2,0.4012512,[lb/kWh]
8,National,propane,so2,6.824e-07,[lb/kWh]
9,National,propane,nox,0.0004848452,[lb/kWh]


### Step 2: Adjust Natural Gas & Electricity Emissions Factors for Natural Gas Leakage

In [12]:
print("""
-------------------------------------------------------------------------------------------------------
Step 2: Adjust Natural Gas & Electricity Emissions Factors for Natural Gas Leakage
-------------------------------------------------------------------------------------------------------
Natural Gas (Deetjen et al.): 
"To account for the natural gas infrastructure's leakage of the greenhouse gas methane, 
we estimate the amount of methane leaked per therm of natural gas consumed for heating and 
convert to CO2-equivalent emissions via the GWP of methane. We assume that for every therm of 
natural gas consumed for heating, 0.023 therms of methane escape to the atmosphere [28]. 
Using the energy density of natural gas, we convert from therms to kilograms and multiply 
by 28—the GWP of methane [29]—to calculate a rate of 1.27 kg CO2-equivalent per therm of 
consumed natural gas."

Electricity NERC Regions (Deetjen et al): 
"To account for the natural gas infrastructure's leakage of the greenhouse gas methane, 
we estimate the amount of methane leaked per MWh of electricity generation in each NERC 
region and convert to CO2-equivalent emissions via the global warming potential (GWP) of methane. 
For example, we find that in 2017, the states comprising the western region (WECC) of 
the US electric grid consumed 1.45 million MMcf of natural gas in the power sector [27]. 
We assume that for every MMcf of consumed natural gas, 0.023 MMcf of methane is leaked into 
the atmosphere [28]. By multiplying that leakage rate by the 1.45 million MMcf of consumed 
natural gas, converting to tonnes, and multiplying by a GWP of 28 [29], we estimate 
that the 2017 WECC power sector contributed to methane leakage amounting to 18.6 Mt CO2-equivalent.
By dividing this 18.6 Mt by the 724 TWh of the WECC states' generated electricity [27], we 
calculate a methane leakage rate factor of 25.7 kg MWh−1. In the same manner, we calculate the 
methane leakage rate factors for the other NERC regions. We use the 100 years GWP value of 28 
for methane. Although there have been proposals to use 20 years GWP values, recent research 
shows that the benefits of this alternative 20 years time from are overstated [30]."
-------------------------------------------------------------------------------------------------------
""")
filename = 'natural_gas_leakage_rate.csv'
relative_path = os.path.join(r"margEmis_electricity", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_naturalGas_leakage_rate = pd.read_csv(file_path)

state_abbreviations = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'District of Columbia': 'DC',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

# Map full state names to abbreviations
df_naturalGas_leakage_rate['state'] = df_naturalGas_leakage_rate['state_name'].map(state_abbreviations)

# thousand Mcf * (0.023 Mcf leak/1 Mcf) * (19.3 tonnes/1000 Mcf) * (1000 kg/1 tonne) * (2.205 lb/1 kg)) / (thousand MWh * (1000 MWh/thousand MWh)) 
df_naturalGas_leakage_rate['naturalGas_leakage_lbCH4_perMWh'] = (df_naturalGas_leakage_rate['naturalGas_electricity_generation'] * (0.023/1) * (19.3/1) * (1000/1) * (2.205/1)) / (df_naturalGas_leakage_rate['net_generation'] * (1000/1)) 

# (lb CH4/MWh) * (28 lb CO2e/1 lb CH4)
df_naturalGas_leakage_rate['naturalGas_leakage_lbCO2e_perMWh'] = df_naturalGas_leakage_rate['naturalGas_leakage_lbCH4_perMWh'] * (28/1)

# (lb CO2e/MWh) * (1 MWh / 1000 kWh)
df_naturalGas_leakage_rate['naturalGas_leakage_lbCO2e_perkWh'] = df_naturalGas_leakage_rate['naturalGas_leakage_lbCO2e_perMWh'] * (1/1000)
df_naturalGas_leakage_rate


-------------------------------------------------------------------------------------------------------
Step 2: Adjust Natural Gas & Electricity Emissions Factors for Natural Gas Leakage
-------------------------------------------------------------------------------------------------------
Natural Gas (Deetjen et al.): 
"To account for the natural gas infrastructure's leakage of the greenhouse gas methane, 
we estimate the amount of methane leaked per therm of natural gas consumed for heating and 
convert to CO2-equivalent emissions via the GWP of methane. We assume that for every therm of 
natural gas consumed for heating, 0.023 therms of methane escape to the atmosphere [28]. 
Using the energy density of natural gas, we convert from therms to kilograms and multiply 
by 28—the GWP of methane [29]—to calculate a rate of 1.27 kg CO2-equivalent per therm of 
consumed natural gas."

Electricity NERC Regions (Deetjen et al): 
"To account for the natural gas infrastructure's leakage of the

Unnamed: 0,state_name,naturalGas_electricity_generation,units,net_generation,units.1,state,naturalGas_leakage_lbCH4_perMWh,naturalGas_leakage_lbCO2e_perMWh,naturalGas_leakage_lbCO2e_perkWh
0,Connecticut,135274,thousand Mcf,38376,thousand megawatthours,CT,3.450233,96.606511,0.096607
1,Maine,13718,thousand Mcf,9308,thousand megawatthours,ME,1.442541,40.391148,0.040391
2,Massachusetts,128810,thousand Mcf,26263,thousand megawatthours,MA,4.800638,134.417872,0.134418
3,New Hampshire,21563,thousand Mcf,16988,thousand megawatthours,NH,1.242398,34.787138,0.034787
4,Rhode Island,57260,thousand Mcf,8170,thousand megawatthours,RI,6.859983,192.079518,0.19208
5,Vermont,11,thousand Mcf,2175,thousand megawatthours,VT,0.00495,0.138607,0.000139
6,New Jersey,267122,thousand Mcf,73727,thousand megawatthours,NJ,3.546311,99.296711,0.099297
7,New York,390742,thousand Mcf,130301,thousand megawatthours,NY,2.935189,82.185295,0.082185
8,Pennsylvania,530196,thousand Mcf,212285,thousand megawatthours,PA,2.444617,68.449284,0.068449
9,Illinois,135512,thousand Mcf,184791,thousand megawatthours,IL,0.717779,20.097809,0.020098


In [13]:
# NATURAL GAS LEAKAGE: NATURAL GAS USED IN ELECTRICITY GENERATION
if 'naturalGas_leakage_lbCO2e_perkWh' in df_margEmis_electricity.columns:
    df_margEmis_electricity.drop(columns=['naturalGas_leakage_lbCO2e_perkWh'], inplace=True)

df_margEmis_electricity = df_margEmis_electricity.merge(
    df_naturalGas_leakage_rate[['state', 'naturalGas_leakage_lbCO2e_perkWh']],
    how='left',  # Use a left join to keep all rows from df_margEmis_electricity
    on=['state']  # Merge on the 'state' column
)
# Set 'naturalGas_leakage_lbCO2e_perkWh' to zero where 'pollutant' is not 'co2'
df_margEmis_electricity.loc[df_margEmis_electricity['pollutant'] != 'co2', 'naturalGas_leakage_lbCO2e_perkWh'] = 0.0

# Calculate adjusted marginal emissions factore with natural gas fugitive emissions
df_margEmis_electricity['margEmis_factor_adjusted'] = df_margEmis_electricity['value'] + df_margEmis_electricity['naturalGas_leakage_lbCO2e_perkWh'] 

# Create a factor to multiply marginal damages by
df_margEmis_electricity['naturalGas_leakage_factor'] = df_margEmis_electricity['margEmis_factor_adjusted'] / df_margEmis_electricity['value']

# Reorder columns
df_margEmis_electricity = df_margEmis_electricity[['state', 'fuel_type', 'pollutant', 'value', 'unit', 'naturalGas_leakage_lbCO2e_perkWh', 'margEmis_factor_adjusted', 'naturalGas_leakage_factor']]
df_margEmis_electricity

Unnamed: 0,state,fuel_type,pollutant,value,unit,naturalGas_leakage_lbCO2e_perkWh,margEmis_factor_adjusted,naturalGas_leakage_factor
0,AL,electricity,so2,0.000131,[lb/kWh],0.000000,0.000131,1.000000
1,AL,electricity,nox,0.000440,[lb/kWh],0.000000,0.000440,1.000000
2,AL,electricity,pm25,0.000140,[lb/kWh],0.000000,0.000140,1.000000
3,AL,electricity,co2,1.172667,[lb/kWh],0.080872,1.253539,1.068964
4,AR,electricity,so2,0.002555,[lb/kWh],0.000000,0.002555,1.000000
...,...,...,...,...,...,...,...,...
183,WV,electricity,co2,1.713400,[lb/kWh],0.004440,1.717840,1.002591
184,WY,electricity,so2,0.001223,[lb/kWh],0.000000,0.001223,1.000000
185,WY,electricity,nox,0.001602,[lb/kWh],0.000000,0.001602,1.000000
186,WY,electricity,pm25,0.000259,[lb/kWh],0.000000,0.000259,1.000000


In [14]:
# NATURAL GAS LEAKAGE: NATURAL GAS INFRASTRUCTURE
# leakage rate for natural gas infrastructure
# 1 Therm = 29.30 kWh --> 1.27 kg CO2e/therm * (1 therm/29.30 kWh) = 0.043 kg CO2e/kWh = 0.095 lb CO2e/kWh
df_margEmis_factors['naturalGas_leakage_lbCO2e_perkWh'] = 0.095

# Set 'naturalGas_leakage_lbCO2e_perkWh' to zero where 'pollutant' is not 'co2'
df_margEmis_factors.loc[df_margEmis_factors['pollutant'] != 'co2', 'naturalGas_leakage_lbCO2e_perkWh'] = 0.0

# Set 'naturalGas_leakage_lbCO2e_perkWh' to zero where 'fuel_type' is not 'naturalGas'
df_margEmis_factors.loc[df_margEmis_factors['fuel_type'] != 'naturalGas', 'naturalGas_leakage_lbCO2e_perkWh'] = 0.0

# Calculate adjusted marginal emissions factor with natural gas fugitive emissions
df_margEmis_factors['margEmis_factor_adjusted'] = df_margEmis_factors['value'] + df_margEmis_factors['naturalGas_leakage_lbCO2e_perkWh'] 

# Create a factor to multiply marginal damages by
df_margEmis_factors['naturalGas_leakage_factor'] = df_margEmis_factors['margEmis_factor_adjusted'] / df_margEmis_factors['value']

# Reorder columns
df_margEmis_factors = df_margEmis_factors[['state', 'fuel_type', 'pollutant', 'value', 'unit', 'naturalGas_leakage_lbCO2e_perkWh', 'margEmis_factor_adjusted', 'naturalGas_leakage_factor']]
df_margEmis_factors

Unnamed: 0,state,fuel_type,pollutant,value,unit,naturalGas_leakage_lbCO2e_perkWh,margEmis_factor_adjusted,naturalGas_leakage_factor
0,National,fuelOil,so2,5.118e-06,[lb/kWh],0.0,5.118e-06,1.0
1,National,fuelOil,nox,0.00044356,[lb/kWh],0.0,0.00044356,1.0
2,National,fuelOil,pm25,2.044736e-05,[lb/kWh],0.0,2.044736e-05,1.0
3,National,fuelOil,co2,0.549332,[lb/kWh],0.0,0.549332,1.0
4,National,naturalGas,so2,2.0472e-06,[lb/kWh],0.0,2.0472e-06,1.0
5,National,naturalGas,nox,0.0003145864,[lb/kWh],0.0,0.0003145864,1.0
6,National,naturalGas,pm25,6.239461e-06,[lb/kWh],0.0,6.239461e-06,1.0
7,National,naturalGas,co2,0.4012512,[lb/kWh],0.095,0.4962512,1.236759
8,National,propane,so2,6.824e-07,[lb/kWh],0.0,6.824e-07,1.0
9,National,propane,nox,0.0004848452,[lb/kWh],0.0,0.0004848452,1.0


In [15]:
# Append df_margEmissions_electricity to df_margEmis_factors
# This produces a dataframe of marginal emissions rates for various fuel types
df_margEmis_factors = pd.concat([df_margEmis_factors, df_margEmis_electricity], ignore_index=True)
df_margEmis_factors

Unnamed: 0,state,fuel_type,pollutant,value,unit,naturalGas_leakage_lbCO2e_perkWh,margEmis_factor_adjusted,naturalGas_leakage_factor
0,National,fuelOil,so2,0.000005,[lb/kWh],0.000000,0.000005,1.000000
1,National,fuelOil,nox,0.000444,[lb/kWh],0.000000,0.000444,1.000000
2,National,fuelOil,pm25,0.000020,[lb/kWh],0.000000,0.000020,1.000000
3,National,fuelOil,co2,0.549332,[lb/kWh],0.000000,0.549332,1.000000
4,National,naturalGas,so2,0.000002,[lb/kWh],0.000000,0.000002,1.000000
...,...,...,...,...,...,...,...,...
195,WV,electricity,co2,1.713400,[lb/kWh],0.004440,1.717840,1.002591
196,WY,electricity,so2,0.001223,[lb/kWh],0.000000,0.001223,1.000000
197,WY,electricity,nox,0.001602,[lb/kWh],0.000000,0.001602,1.000000
198,WY,electricity,pm25,0.000259,[lb/kWh],0.000000,0.000259,1.000000


### Step 3: Quantify monitized damages using EASIUR Marginal Social Cost Factors
#### THE STEPS BELOW SUMMARIZE WHAT WAS DONE TO OBTAIN ALL NATIONAL EASIUR VALUES INCLUDED ON GITHUB
- Obtain all of the dwelling unit latitude and longitude values from the metadata columns
- Make a new dataframe of just the longitude and latitude values 
    - Make sure that the order is (longitude, latitude)
    - Do not include the index or column name when exporting 
- Export the CSV
- **Upload csv to EASIUR Website:**
    - Website: https://barney.ce.cmu.edu/~jinhyok/easiur/online/
    - See inputs in respective sections
- Download the file and put it in the 'easiur_batchConversion_download' folder
- Copy and paste the name of the file EASIUR generated when prompted
- Copy and paste the name of the filepath for the 'easiur_batchConversion_download' folder when prompted
- Match up the longitude and latitudes for each dwelling unit with the selected damages

### Fossil Fuels: EASIUR Marginal Damage (Social Cost) Factors Info
- Factor Type: Marginal Social Cost
- Calculation Method: Regression
- Metric: Marginal Social Cost [USD per metric ton]
- Dollar Year: 2010
- Income Year: 2018
- Population Year: 2018
- Aggregation: Longitude, and Latitude Coordinates
- Pollutants: Primary PM2.5, Sulfur Dioxide (SO2), Nitrogen Oxides (NOx), Ammonia (NH3)
- Elevation (Ground, 150m, 300m) and Seasons (Winter, Spring, Summer, Fall)

In [16]:
# Create a dataframe containing just the longitude and Latitude
df_EASIUR_batchConversion = pd.DataFrame({
    'Longitude':df_euss_am_baseline['in.weather_file_longitude'],
    'Latitude':df_euss_am_baseline['in.weather_file_latitude'],
})

# Drop duplicate rows based on 'Longitude' and 'Latitude' columns
df_EASIUR_batchConversion.drop_duplicates(subset=['Longitude', 'Latitude'], keep='first', inplace=True)

# Create a location ID for the name of the batch conversion file
while True:
    if menu_state == 'N':
        location_id = 'National'
        print("You chose to analyze all of the United States.")
        break
    elif menu_state == 'Y':
        if menu_city == 'N':
            try:
                location_id = str(input_state)
                print(f"Location ID is: {location_id}")
                break
            except ValueError:
                print("Invalid input for state!")
        elif menu_city == 'Y':
            try:
                location_id = input_cityFilter.replace(', ', '_').strip()
                print(f"Location ID is: {location_id}")
                break
            except AttributeError:
                print("Invalid input for city filter!")
        else:
            print("Incorrect state or city filter assignment!")
    else:
        print("Invalid data location. Check your inputs at the beginning of this notebook!")

# Updated GitHub code has EASIUR file with all unique latitude, longitude coordinates in the US
filename = 'easiur_National2024-06-1421-22.csv'
relative_path = os.path.join(r"margDamages_EASIUR\easiur_batchConversion_download", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_margSocialCosts = pd.read_csv(file_path)

# Convert from kg/MWh to lb/kWh
# Obtain value from the CSV file and convert to lbs pollutant per kWh 

# Define df_margSocialCosts_EASIUR DataFrame first
df_margSocialCosts_EASIUR = pd.DataFrame({
    'Longitude': df_margSocialCosts['Longitude'],
    'Latitude': df_margSocialCosts['Latitude']
})
df_margSocialCosts_EASIUR

Location ID is: PA
Retrieved data for filename: easiur_National2024-06-1421-22.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margDamages_EASIUR\easiur_batchConversion_download\easiur_National2024-06-1421-22.csv




Unnamed: 0,Longitude,Latitude
0,-87.04,31.42
1,-85.86,33.59
2,-86.39,32.30
3,-85.45,31.32
4,-86.75,33.56
...,...,...
1024,-106.72,44.38
1025,-105.54,44.34
1026,-107.95,43.97
1027,-108.08,44.52


### Step 4: Inflate Marginal Social Cost (Damage) Factors using BLS CPI for All Urban Consumers (CPI-U)
- Series Id:	CUUR0000SA0
- Not Seasonally Adjusted
- Series Title:	All items in U.S. city average, all urban consumers, not seasonally adjusted
- Area:	U.S. city average
- Item:	All items
- Base Period:	1982-84=100

### Use the updated Social Cost of Carbon (190 USD-2020/ton CO2) and inflate to USD-2022
- EPA Median for 2% near term discount rate and most commonly mentioned value is 190 USD-2020 using the GIVE model.
- 190 USD-2020 has some inconsistency with the VSL being used. An old study and 2008 VSL is noted
- 190 USD value and inflate to USD 2022 because there is a clear source and ease of replicability.

### Adjustment for VSL
- EASIUR uses a VSL of 8.8M USD-2010 
- New EPA VSL is 11.3M USD-2021
- INFLATE TO $USD-2022

### ALL DOLLAR VALUES ARE NOW IN USD2022, PREVIOUSLY USED $USD-2021

In [17]:
# Load the BLS Inflation Data
filename = 'bls_cpiu_2005-2023.xlsx'
relative_path = os.path.join(r"inflation_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# Create a pandas dataframe
df_bls_cpiu = pd.read_excel(file_path, sheet_name='bls_cpiu')

df_bls_cpiu = pd.DataFrame({
    'year': df_bls_cpiu['Year'],
    'cpiu_annual': df_bls_cpiu['Annual']
})

# Obtain the Annual CPIU values for the years of interest
bls_cpi_annual_2008 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2008)].item()
bls_cpi_annual_2010 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2010)].item()
bls_cpi_annual_2013 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2013)].item()
bls_cpi_annual_2018 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2018)].item()
bls_cpi_annual_2019 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2019)].item()
bls_cpi_annual_2020 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2020)].item()
bls_cpi_annual_2021 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2021)].item()
bls_cpi_annual_2022 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2022)].item()

# Precompute constant values
cpi_ratio_2022_2022 = bls_cpi_annual_2022 / bls_cpi_annual_2022
cpi_ratio_2022_2021 = bls_cpi_annual_2022 / bls_cpi_annual_2021  # For EPA VSL (11.3M USD-2021)
cpi_ratio_2022_2020 = bls_cpi_annual_2022 / bls_cpi_annual_2020  # For SCC
cpi_ratio_2022_2019 = bls_cpi_annual_2022 / bls_cpi_annual_2019 
cpi_ratio_2022_2018 = bls_cpi_annual_2022 / bls_cpi_annual_2018 
cpi_ratio_2022_2013 = bls_cpi_annual_2022 / bls_cpi_annual_2013
cpi_ratio_2022_2010 = bls_cpi_annual_2022 / bls_cpi_annual_2010
cpi_ratio_2022_2008 = bls_cpi_annual_2022 / bls_cpi_annual_2008  # For EPA VSL and SCC

# 2021 US EPA VSL is $11.3M in 2021 
# INFLATE TO USD2022, PREVIOUSLY USD2021
df_margSocialCosts_EASIUR['current_VSL_USD2022'] = 11.3 * cpi_ratio_2022_2021

# Easiur uses a VSL of $8.8 M USD2010
# Inflate to 2022 $USD
# PREVIOUSLY USD2021
df_margSocialCosts_EASIUR['easiur_VSL_USD2022'] = 8.8 * cpi_ratio_2022_2010

# Use df_margSocialCosts_EASIUR in the calculation of other columns
# Also adjust the VSL
df_margSocialCosts_EASIUR['margSocialCosts_pm25'] = round((df_margSocialCosts['PM25 Annual Ground'] * (1/2204.6) * (df_margSocialCosts_EASIUR['current_VSL_USD2022']/df_margSocialCosts_EASIUR['easiur_VSL_USD2022'])), 2)
df_margSocialCosts_EASIUR['margSocialCosts_so2'] = round((df_margSocialCosts['SO2 Annual Ground'] * (1/2204.6) * (df_margSocialCosts_EASIUR['current_VSL_USD2022']/df_margSocialCosts_EASIUR['easiur_VSL_USD2022'])), 2)
df_margSocialCosts_EASIUR['margSocialCosts_nox'] = round((df_margSocialCosts['NOX Annual Ground'] * (1/2204.6) * (df_margSocialCosts_EASIUR['current_VSL_USD2022']/df_margSocialCosts_EASIUR['easiur_VSL_USD2022'])), 2)

# Note that SCC of $190 USD-2020 has some inconsistency with the VSL being used. An old study and 2008 VSL is noted
# We use the $190 USD value and inflate to USD 2022 because there is a clear source and ease of replicability.
# PREVIOUSLY USED USD2021
df_margSocialCosts_EASIUR['margSocialCosts_co2'] = round((190 * cpi_ratio_2022_2020 * (1/2204.6)), 2)
df_margSocialCosts_EASIUR['unit'] = '[$USD2022/lb]'
df_margSocialCosts_EASIUR

Retrieved data for filename: bls_cpiu_2005-2023.xlsx
Located at filepath: c:\Users\14128\Research\cmu-tare-model\inflation_data\bls_cpiu_2005-2023.xlsx


Unnamed: 0,Longitude,Latitude,current_VSL_USD2022,easiur_VSL_USD2022,margSocialCosts_pm25,margSocialCosts_so2,margSocialCosts_nox,margSocialCosts_co2,unit
0,-87.04,31.42,12.204309,11.810562,38.00,9.35,1.70,0.1,[$USD2022/lb]
1,-85.86,33.59,12.204309,11.810562,57.22,10.19,2.64,0.1,[$USD2022/lb]
2,-86.39,32.30,12.204309,11.810562,52.07,9.70,2.13,0.1,[$USD2022/lb]
3,-85.45,31.32,12.204309,11.810562,43.54,9.76,1.86,0.1,[$USD2022/lb]
4,-86.75,33.56,12.204309,11.810562,98.71,11.19,3.00,0.1,[$USD2022/lb]
...,...,...,...,...,...,...,...,...,...
1024,-106.72,44.38,12.204309,11.810562,10.33,5.02,0.76,0.1,[$USD2022/lb]
1025,-105.54,44.34,12.204309,11.810562,11.44,6.05,0.96,0.1,[$USD2022/lb]
1026,-107.95,43.97,12.204309,11.810562,8.85,4.47,0.61,0.1,[$USD2022/lb]
1027,-108.08,44.52,12.204309,11.810562,9.29,5.74,0.82,0.1,[$USD2022/lb]


## Electricity CEDM-EASIUR Marginal Damages: Current and Decarbonizing Grid
- Factor Type: Marginal
- Calculation Method: Regression
- Metric: Marginal Damages EASIUR [USD per MWh or kWh]
- Year: 2018
- Regional Aggregation: eGRID subregion (all regions)
- Pollutants: SO2, NOx, PM2.5 CO2

SCC Adjustment: We use the EPA suggested 190 USD-2020 value for the social cost of carbon and inflate to 2022 USD. **PREVIOUSLY USED 2021 USD**

VSL: "We use a value of a statistical life (VSL) of USD 8.8 million (in 2010 dollars) for both our AP2 and EASIUR calculations. EASIUR reports damage intensities in USD/metric ton using this VSL and dollar year."

Bistline et al. 2023 (ERL Paper): 
- ERL Paper: https://iopscience.iop.org/article/10.1088/1748-9326/ad0d3b/meta#erlad0d3bs6
- Data on Zenodo: https://zenodo.org/records/8322973
- Health related emissions reductions from 2021 levels (assumed mostly coal):
    - "In 2035, unabated coal reductions from 2021 levels range from 44%–100% with IRA (84% average) versus 12%–63% in the reference (38% average)."
    - Pre-IRA Scenario: -38% from 2021 levels
    - IRA-Ref Scenario: -84% from 2021 levels
    - IRA-High Scenario: -100% from 2021 levels
- CO2 reductions from 2005 (see image):
    - Pre-IRA Scenario: -53% from 2005 levels
    - IRA-Ref Scenario: -78% from 2005 levels
    - IRA-High Scenario: -87% from 2005 levels

![grid_decarb_assumptions.png](attachment:grid_decarb_assumptions.png)

In [18]:
# For CO2 adjust SCC
# Create an adjustment factor for the new Social Cost of Carbon (SCC)
epa_scc = 190 * cpi_ratio_2022_2020
old_scc = 40 * cpi_ratio_2022_2010
scc_adjustment_factor = epa_scc / old_scc

# For Health-Related Emissions Adjust for different Value of a Statistical Life (VSL) values
# Current VSL is $11.3 M USD2021
# INFLATE TO USD2022, PREVIOUSLY USD2021
current_VSL_USD2022 = 11.3 * cpi_ratio_2022_2021

# Easiur uses a VSL of $8.8 M USD2010
# INFLATE TO USD2022, PREVIOUSLY USD2021
easiur_VSL_USD2022 = 8.8 * (cpi_ratio_2022_2010)

# Calculate VSL adjustment factor
vsl_adjustment_factor = current_VSL_USD2022 / easiur_VSL_USD2022

### Damages from Climate Related Emissions

In [19]:
# Note only 2006 data available, used in place of 2005
filename = 'Generation-MARREG-DAMEASIUR-egrid-byYear_climate2006.csv'
relative_path = os.path.join(r"margDamages_EASIUR", filename)
file_path = os.path.join(project_root, relative_path)
df_margDamages_climate2006 = pd.read_csv(file_path, index_col=0)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

Retrieved data for filename: Generation-MARREG-DAMEASIUR-egrid-byYear_climate2006.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margDamages_EASIUR\Generation-MARREG-DAMEASIUR-egrid-byYear_climate2006.csv


In [20]:
# Note 2018 start year
filename = 'Generation-MARREG-DAMEASIUR-egrid-byYear_climate2018.csv'
relative_path = os.path.join(r"margDamages_EASIUR", filename)
file_path = os.path.join(project_root, relative_path)
df_margDamages_climate2018 = pd.read_csv(file_path, index_col=0)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

Retrieved data for filename: Generation-MARREG-DAMEASIUR-egrid-byYear_climate2018.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margDamages_EASIUR\Generation-MARREG-DAMEASIUR-egrid-byYear_climate2018.csv


In [21]:
# # Marginal damages [$/kWh]
# # Inflate from 2010 to 2021
# # Note only 2006 data available, used in place of 2005
# df_margDamages_EASIUR_climate = pd.DataFrame({
#     'subregion_eGRID': df_margDamages_climate2006['region'],
#     'pollutant': df_margDamages_climate2006['pollutant'],
#     'unit': '[$/kWh]',
#     '2030_decarb': '68% from 2005',
#     'margDamages_dollarPerkWh_adjustVSL_ref': (df_margDamages_climate2006['factor'] * (scc_adjustment_factor) * (1/1000)) * (cpi_ratio_2021_2010),
#     'margDamages_dollarPerkWh_adjustVSL_2018': (df_margDamages_climate2018['factor'] * (scc_adjustment_factor) * (1/1000)) * (cpi_ratio_2021_2010)
# })
# df_margDamages_EASIUR_climate['margDamages_decarb_2030'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.68)
# df_margDamages_EASIUR_climate['reduction_margDamages_2030'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_climate['margDamages_decarb_2030']
# df_margDamages_EASIUR_climate['reduction_margDamages_annual'] = df_margDamages_EASIUR_climate['reduction_margDamages_2030'] / 12 # Relative to 2018, 
# # df_margDamages_EASIUR_climate

In [22]:
# Inflate from $USD2010 to $USD2022
# Note only 2006 data available, used in place of 2005
df_margDamages_EASIUR_climate = pd.DataFrame({
    'subregion_eGRID': df_margDamages_climate2006['region'],
    'pollutant': df_margDamages_climate2006['pollutant'],
    'unit': '[$USD2022/kWh]',
    'preIRA_2035_decarb': '53% from 2005',
    'iraRef_2035_decarb': '78% from 2005',
    'iraHigh_2035_decarb': '87% from 2005',
    'margDamages_dollarPerkWh_adjustVSL_ref': (df_margDamages_climate2006['factor'] * (scc_adjustment_factor) * (1/1000)) * (cpi_ratio_2022_2010),
    'margDamages_dollarPerkWh_adjustVSL_2018': (df_margDamages_climate2018['factor'] * (scc_adjustment_factor) * (1/1000)) * (cpi_ratio_2022_2010)
})

# Pre-IRA Scenario: CO2 emissions -53% from 2005 levels
df_margDamages_EASIUR_climate['preIRA_margDamages_decarb_2035'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.53)
df_margDamages_EASIUR_climate['preIRA_reduction_margDamages_2035'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_climate['preIRA_margDamages_decarb_2035']
df_margDamages_EASIUR_climate['preIRA_reduction_margDamages_annual'] = df_margDamages_EASIUR_climate['preIRA_reduction_margDamages_2035'] / 17 # Relative to 2018, 

# IRA-Ref Scenario: CO2 emissions -78% from 2005 levels
df_margDamages_EASIUR_climate['iraRef_margDamages_decarb_2035'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.78)
df_margDamages_EASIUR_climate['iraRef_reduction_margDamages_2035'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_climate['iraRef_margDamages_decarb_2035']
df_margDamages_EASIUR_climate['iraRef_reduction_margDamages_annual'] = df_margDamages_EASIUR_climate['iraRef_reduction_margDamages_2035'] / 17 # Relative to 2018, 

# IRA-High Scenario: CO2 emissions -87% from 2005 levels
df_margDamages_EASIUR_climate['iraHigh_margDamages_decarb_2035'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.87)
df_margDamages_EASIUR_climate['iraHigh_reduction_margDamages_2035'] = df_margDamages_EASIUR_climate['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_climate['iraHigh_margDamages_decarb_2035']
df_margDamages_EASIUR_climate['iraHigh_reduction_margDamages_annual'] = df_margDamages_EASIUR_climate['iraHigh_reduction_margDamages_2035'] / 17 # Relative to 2018, 

print(df_margDamages_EASIUR_climate)

   subregion_eGRID pollutant            unit preIRA_2035_decarb  \
1             AZNM       co2  [$USD2022/kWh]      53% from 2005   
2             CAMX       co2  [$USD2022/kWh]      53% from 2005   
3             ERCT       co2  [$USD2022/kWh]      53% from 2005   
4             FRCC       co2  [$USD2022/kWh]      53% from 2005   
5             MROE       co2  [$USD2022/kWh]      53% from 2005   
6             MROW       co2  [$USD2022/kWh]      53% from 2005   
7             NEWE       co2  [$USD2022/kWh]      53% from 2005   
8             NWPP       co2  [$USD2022/kWh]      53% from 2005   
9             NYCW       co2  [$USD2022/kWh]      53% from 2005   
10            NYLI       co2  [$USD2022/kWh]      53% from 2005   
11            NYUP       co2  [$USD2022/kWh]      53% from 2005   
12            RFCE       co2  [$USD2022/kWh]      53% from 2005   
13            RFCM       co2  [$USD2022/kWh]      53% from 2005   
14            RFCW       co2  [$USD2022/kWh]      53% from 200

### Damages from Health Related Emissions

In [23]:
filename = 'Generation-MARREG-DAMEASIUR-egrid-byYear_health2018.csv'
relative_path = os.path.join(r"margDamages_EASIUR", filename)
file_path = os.path.join(project_root, relative_path)
df_margDamages_health2018 = pd.read_csv(file_path, index_col=0)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

Retrieved data for filename: Generation-MARREG-DAMEASIUR-egrid-byYear_health2018.csv
Located at filepath: c:\Users\14128\Research\cmu-tare-model\margDamages_EASIUR\Generation-MARREG-DAMEASIUR-egrid-byYear_health2018.csv


In [24]:
# # Marginal damages [$/kWh]
# # Inflate from 2010 to 2021
# # Note only 2018 data available, used in place of 2021
# df_margDamages_EASIUR_health = pd.DataFrame({
#     'subregion_eGRID': df_margDamages_health2018['region'],
#     'pollutant': df_margDamages_health2018['pollutant'],
#     'unit': '[$/kWh]',
#     '2030_decarb': '65% from 2021',
#     'margDamages_dollarPerkWh_adjustVSL_ref': (df_margDamages_health2018['factor'] * (vsl_adjustment_factor) * (1/1000)) * (cpi_ratio_2021_2010),
#     'margDamages_dollarPerkWh_adjustVSL_2018': (df_margDamages_health2018['factor'] * (vsl_adjustment_factor) * (1/1000)) * (cpi_ratio_2021_2010)
# })
# df_margDamages_EASIUR_health['margDamages_decarb_2030'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.65)
# df_margDamages_EASIUR_health['reduction_margDamages_2030'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_health['margDamages_decarb_2030']
# df_margDamages_EASIUR_health['reduction_margDamages_annual'] = df_margDamages_EASIUR_health['reduction_margDamages_2030'] / 9
# # df_margDamages_EASIUR_health

In [25]:
# Marginal damages [$/kWh]
# Inflate from 2010 to 2022
# Note only 2018 data available, used in place of 2021
df_margDamages_EASIUR_health = pd.DataFrame({
    'subregion_eGRID': df_margDamages_health2018['region'],
    'pollutant': df_margDamages_health2018['pollutant'],
    'unit': '[$USD2022/kWh]',
    'preIRA_2035_decarb': '38% from 2021',
    'iraRef_2035_decarb': '84% from 2021',
    'iraHigh_2035_decarb': '100% from 2021',   
    'margDamages_dollarPerkWh_adjustVSL_ref': (df_margDamages_health2018['factor'] * (vsl_adjustment_factor) * (1/1000)) * (cpi_ratio_2022_2010),
    'margDamages_dollarPerkWh_adjustVSL_2018': (df_margDamages_health2018['factor'] * (vsl_adjustment_factor) * (1/1000)) * (cpi_ratio_2022_2010)
})

# Pre-IRA Scenario: Health-Related emissions (Unabated Coal) -38% from 2021 levels
df_margDamages_EASIUR_health['preIRA_margDamages_decarb_2035'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.38)
df_margDamages_EASIUR_health['preIRA_reduction_margDamages_2035'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_health['preIRA_margDamages_decarb_2035']
df_margDamages_EASIUR_health['preIRA_reduction_margDamages_annual'] = df_margDamages_EASIUR_health['preIRA_reduction_margDamages_2035'] / 14 # Relative to 2021, 

# IRA-Ref Scenario: Health-Related emissions (Unabated Coal) -84% from 2021 levels
df_margDamages_EASIUR_health['iraRef_margDamages_decarb_2035'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] * 0.84)
df_margDamages_EASIUR_health['iraRef_reduction_margDamages_2035'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_health['iraRef_margDamages_decarb_2035']
df_margDamages_EASIUR_health['iraRef_reduction_margDamages_annual'] = df_margDamages_EASIUR_health['iraRef_reduction_margDamages_2035'] / 14 # Relative to 2021, 

# IRA-High Scenario: Health-Related emissions (Unabated Coal) -100% from 2021 levels
df_margDamages_EASIUR_health['iraHigh_margDamages_decarb_2035'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] - (df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_ref'] * 1.0)
df_margDamages_EASIUR_health['iraHigh_reduction_margDamages_2035'] = df_margDamages_EASIUR_health['margDamages_dollarPerkWh_adjustVSL_2018'] - df_margDamages_EASIUR_health['iraHigh_margDamages_decarb_2035']
df_margDamages_EASIUR_health['iraHigh_reduction_margDamages_annual'] = df_margDamages_EASIUR_health['iraHigh_reduction_margDamages_2035'] / 14 # Relative to 2021, 

df_margDamages_EASIUR_health

Unnamed: 0,subregion_eGRID,pollutant,unit,preIRA_2035_decarb,iraRef_2035_decarb,iraHigh_2035_decarb,margDamages_dollarPerkWh_adjustVSL_ref,margDamages_dollarPerkWh_adjustVSL_2018,preIRA_margDamages_decarb_2035,preIRA_reduction_margDamages_2035,preIRA_reduction_margDamages_annual,iraRef_margDamages_decarb_2035,iraRef_reduction_margDamages_2035,iraRef_reduction_margDamages_annual,iraHigh_margDamages_decarb_2035,iraHigh_reduction_margDamages_2035,iraHigh_reduction_margDamages_annual
1,AZNM,so2,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.001691,0.001691,0.001048,0.000643,0.000046,0.000271,0.001420,0.000101,0.0,0.001691,0.000121
2,AZNM,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.000552,0.000552,0.000342,0.000210,0.000015,0.000088,0.000464,0.000033,0.0,0.000552,0.000039
3,AZNM,pm25,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.001946,0.001946,0.001206,0.000739,0.000053,0.000311,0.001635,0.000117,0.0,0.001946,0.000139
4,CAMX,so2,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.000575,0.000575,0.000356,0.000218,0.000016,0.000092,0.000483,0.000034,0.0,0.000575,0.000041
5,CAMX,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.000685,0.000685,0.000425,0.000260,0.000019,0.000110,0.000576,0.000041,0.0,0.000685,0.000049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,SRTV,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.002670,0.002670,0.001656,0.001015,0.000072,0.000427,0.002243,0.000160,0.0,0.002670,0.000191
63,SRTV,pm25,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.007134,0.007134,0.004423,0.002711,0.000194,0.001142,0.005993,0.000428,0.0,0.007134,0.000510
64,SRVC,so2,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.007901,0.007901,0.004899,0.003002,0.000214,0.001264,0.006637,0.000474,0.0,0.007901,0.000564
65,SRVC,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.002335,0.002335,0.001448,0.000887,0.000063,0.000374,0.001961,0.000140,0.0,0.002335,0.000167


In [26]:
# Combine them top to bottom
df_margDamages_EASIUR = pd.concat([df_margDamages_EASIUR_climate, df_margDamages_EASIUR_health], ignore_index=True)
df_margDamages_EASIUR

Unnamed: 0,subregion_eGRID,pollutant,unit,preIRA_2035_decarb,iraRef_2035_decarb,iraHigh_2035_decarb,margDamages_dollarPerkWh_adjustVSL_ref,margDamages_dollarPerkWh_adjustVSL_2018,preIRA_margDamages_decarb_2035,preIRA_reduction_margDamages_2035,preIRA_reduction_margDamages_annual,iraRef_margDamages_decarb_2035,iraRef_reduction_margDamages_2035,iraRef_reduction_margDamages_annual,iraHigh_margDamages_decarb_2035,iraHigh_reduction_margDamages_2035,iraHigh_reduction_margDamages_annual
0,AZNM,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.100830,0.116539,0.047390,0.069149,0.004068,0.022183,0.094356,0.005550,0.013108,0.103431,0.006084
1,CAMX,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.095056,0.095870,0.044676,0.051194,0.003011,0.020912,0.074958,0.004409,0.012357,0.083513,0.004913
2,ERCT,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.115046,0.119261,0.054071,0.065190,0.003835,0.025310,0.093951,0.005527,0.014956,0.104305,0.006136
3,FRCC,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.132399,0.100881,0.062228,0.038653,0.002274,0.029128,0.071753,0.004221,0.017212,0.083669,0.004922
4,MROE,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.171261,0.170812,0.080492,0.090320,0.005313,0.037677,0.133135,0.007831,0.022264,0.148548,0.008738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,SRTV,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.002670,0.002670,0.001656,0.001015,0.000072,0.000427,0.002243,0.000160,0.000000,0.002670,0.000191
84,SRTV,pm25,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.007134,0.007134,0.004423,0.002711,0.000194,0.001142,0.005993,0.000428,0.000000,0.007134,0.000510
85,SRVC,so2,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.007901,0.007901,0.004899,0.003002,0.000214,0.001264,0.006637,0.000474,0.000000,0.007901,0.000564
86,SRVC,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.002335,0.002335,0.001448,0.000887,0.000063,0.000374,0.001961,0.000140,0.000000,0.002335,0.000167


In [27]:
# # Marginal Damages for a Gradually Decarbonizing Grid
# df_margDamages_gridDecarb = df_margDamages_EASIUR.copy()

# years = list(range(2019, 2051))

# # Apply reductions
# for year in years:
#     column_name = f'margDamages_dollarPerkWh_adjustVSL_{year}'
#     df_margDamages_gridDecarb[column_name] = df_margDamages_gridDecarb['margDamages_dollarPerkWh_adjustVSL_ref']  # Initialize

#     for index, row in df_margDamages_gridDecarb.iterrows():  # Correctly unpack the index and row
#         if year <= 2030:
#             # Climate reduction (C02) applicable from 2019 to 2030
#             # No Health reductions before 2022
#             if 2019 <= year < 2022:
#                 if row['pollutant'] == 'co2':
#                     df_margDamages_gridDecarb.at[index, column_name] = df_margDamages_gridDecarb.at[index, f'margDamages_dollarPerkWh_adjustVSL_{year-1}'] - df_margDamages_gridDecarb.at[index, 'reduction_margDamages_annual']
#                 else:
#                     df_margDamages_gridDecarb.at[index, column_name] = df_margDamages_gridDecarb.at[index, f'margDamages_dollarPerkWh_adjustVSL_{year-1}']
            
#             # Health reduction applicable from 2022 to 2030
#             # Climate reductions continue
#             elif year >= 2022:
#                 df_margDamages_gridDecarb.at[index, column_name] = df_margDamages_gridDecarb.at[index, f'margDamages_dollarPerkWh_adjustVSL_{year-1}'] - df_margDamages_gridDecarb.at[index, 'reduction_margDamages_annual']

#         # Post-2030, damage values should be at the 2030 level
#         else:
#             df_margDamages_gridDecarb.at[index, column_name] = df_margDamages_gridDecarb.at[index, f'margDamages_dollarPerkWh_adjustVSL_2030']

# df_margDamages_gridDecarb

In [28]:
# Marginal Damages for a Gradually Decarbonizing Grid
df_margDamages_gridDecarb = df_margDamages_EASIUR.copy()

# Assuming df_margDamages_gridDecarb is already a copy of df_margDamages_EASIUR
years = list(range(2018, 2051))
scenario_list = ['preIRA_', 'iraRef_', 'iraHigh_']

# Initialize columns for each scenario and year based on 2018 reference
for scenario in scenario_list:
    for year in years:
        # Define the name of the new column for the current year and scenario
        column_name = f'{scenario}margDamages_dollarPerkWh_adjustVSL_{year}'
        
        if year == 2018:
            # For the base year 2018, use the existing marginal damages as the starting point
            df_margDamages_gridDecarb[column_name] = df_margDamages_gridDecarb['margDamages_dollarPerkWh_adjustVSL_2018']
        else:
            # Initialize other years' columns to None, to be filled in later steps
            df_margDamages_gridDecarb[column_name] = None

# Apply reductions iteratively for each year and scenario
for scenario in scenario_list:
    for year in years:
        # Define the name of the current year's column and the previous year's column
        column_name = f'{scenario}margDamages_dollarPerkWh_adjustVSL_{year}'
        previous_year_column = f'{scenario}margDamages_dollarPerkWh_adjustVSL_{year-1}'

        # Handle reductions for years between 2019 and 2035
        if year > 2018 and year <= 2035:
            # Determine if the pollutant is CO2 (climate-related) for applying early reductions
            is_climate = df_margDamages_gridDecarb['pollutant'] == 'co2'
            
            # Determine if the year is 2022 or later, when health-related reductions begin
            is_health_period = year >= 2022

            # Retrieve the annual reduction factor for the current scenario
            reduction = df_margDamages_gridDecarb[f'{scenario}reduction_margDamages_annual']
            
            # Apply the reduction only if it's relevant:
            # - For CO2 (climate) reductions start from 2019 onwards.
            # - For health-related pollutants, reductions start from 2022 onwards.
            df_margDamages_gridDecarb[column_name] = df_margDamages_gridDecarb[previous_year_column] - reduction * (is_climate | is_health_period)

        # For years after 2035, no further reductions are applied
        elif year > 2035:
            # Set the value to be the same as the 2035 value, maintaining it constant
            df_margDamages_gridDecarb[column_name] = df_margDamages_gridDecarb[f'{scenario}margDamages_dollarPerkWh_adjustVSL_2035']

# Ensure all values in the numeric columns are non-negative by clipping at zero
numeric_columns = df_margDamages_gridDecarb.select_dtypes(include=['float64', 'int64']).columns
df_margDamages_gridDecarb[numeric_columns] = df_margDamages_gridDecarb[numeric_columns].clip(lower=0)

# Output the final DataFrame to check the calculated marginal damages
df_margDamages_gridDecarb

  df_margDamages_gridDecarb[column_name] = None


Unnamed: 0,subregion_eGRID,pollutant,unit,preIRA_2035_decarb,iraRef_2035_decarb,iraHigh_2035_decarb,margDamages_dollarPerkWh_adjustVSL_ref,margDamages_dollarPerkWh_adjustVSL_2018,preIRA_margDamages_decarb_2035,preIRA_reduction_margDamages_2035,...,iraHigh_margDamages_dollarPerkWh_adjustVSL_2041,iraHigh_margDamages_dollarPerkWh_adjustVSL_2042,iraHigh_margDamages_dollarPerkWh_adjustVSL_2043,iraHigh_margDamages_dollarPerkWh_adjustVSL_2044,iraHigh_margDamages_dollarPerkWh_adjustVSL_2045,iraHigh_margDamages_dollarPerkWh_adjustVSL_2046,iraHigh_margDamages_dollarPerkWh_adjustVSL_2047,iraHigh_margDamages_dollarPerkWh_adjustVSL_2048,iraHigh_margDamages_dollarPerkWh_adjustVSL_2049,iraHigh_margDamages_dollarPerkWh_adjustVSL_2050
0,AZNM,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.100830,0.116539,0.047390,0.069149,...,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02,1.310794e-02
1,CAMX,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.095056,0.095870,0.044676,0.051194,...,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02,1.235723e-02
2,ERCT,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.115046,0.119261,0.054071,0.065190,...,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02,1.495594e-02
3,FRCC,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.132399,0.100881,0.062228,0.038653,...,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02,1.721193e-02
4,MROE,co2,[$USD2022/kWh],53% from 2005,78% from 2005,87% from 2005,0.171261,0.170812,0.080492,0.090320,...,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02,2.226387e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,SRTV,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.002670,0.002670,0.001656,0.001015,...,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20,5.421011e-20
84,SRTV,pm25,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.007134,0.007134,0.004423,0.002711,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
85,SRVC,so2,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.007901,0.007901,0.004899,0.003002,...,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18,1.517883e-18
86,SRVC,nox,[$USD2022/kWh],38% from 2021,84% from 2021,100% from 2021,0.002335,0.002335,0.001448,0.000887,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00


In [29]:
# Create a lookup dictionary for the state-specific emissions factors for electricity
electricity_factors = df_margEmis_factors[df_margEmis_factors['fuel_type'] == 'electricity']
emis_electricity_lookup = {(row['pollutant'], row['state']): row['margEmis_factor_adjusted'] for _, row in electricity_factors.iterrows()}

# ELECTRICITY DAMAGES LOOKUP: Previously damages_CEDM_lookup
# Create empty dictionaries to store the lookup data for each scenario
preIRA_damages_electricity_lookup = {}
iraRef_damages_electricity_lookup = {}
iraHigh_damages_electricity_lookup = {}

# Assuming df_margDamages_gridDecarb is already a copy of df_margDamages_EASIUR
years = list(range(2018, 2051))
scenario_list = ['preIRA_', 'iraRef_', 'iraHigh_']

# Define a mapping for scenario descriptions
scenario_description_map = {
    'preIRA_': 'No Inflation Reduction Act',
    'iraRef_': 'AEO2023 Reference Case',
    'iraHigh_': 'High Uptake of Inflation Reduction Act'
}

for scenario in scenario_list:
    for year in years:
        # Create an empty dictionary for the current year
        year_lookup = {}

        for _, row in df_margDamages_gridDecarb.iterrows():
            # Include the scenario description in the key
            key = (row['pollutant'], row['subregion_eGRID'], scenario_description_map[scenario])

            if scenario == 'preIRA_':
                year_lookup[key] = row[f'preIRA_margDamages_dollarPerkWh_adjustVSL_{year}']
                preIRA_damages_electricity_lookup[year] = year_lookup

            elif scenario == 'iraRef_':
                year_lookup[key] = row[f'iraRef_margDamages_dollarPerkWh_adjustVSL_{year}']
                iraRef_damages_electricity_lookup[year] = year_lookup

            elif scenario == 'iraHigh_':
                year_lookup[key] = row[f'iraHigh_margDamages_dollarPerkWh_adjustVSL_{year}']
                iraHigh_damages_electricity_lookup[year] = year_lookup

# Now, you have three dictionaries for lookup:
# preIRA_damages_electricity_lookup, iraRef_damages_electricity_lookup, iraHigh_damages_electricity_lookup
# iraRef_damages_electricity_lookup

In [30]:
# # Create an empty dictionary to store the lookup data
# dict_margDamages_gridDecarb = {}

# for year in years:
#     # Create an empty dictionary for the current year
#     year_lookup = {}
    
#     for _, row in df_margDamages_gridDecarb.iterrows():
#         year_lookup[(row['subregion_eGRID'], row['pollutant'])] = row[f'margDamages_dollarPerkWh_adjustVSL_{str(year)}']
    
#     # Add the year-specific lookup to the main lookup_data dictionary
#     dict_margDamages_gridDecarb[year] = year_lookup

# # Now, you have a lookup_data dictionary containing emissions factors for each state and year
# dict_margDamages_gridDecarb

In [31]:
print("""
-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------
""")
pollutants = ['so2', 'nox', 'pm25', 'co2']

# Create a lookup dictionary of emissions factors for fossil fuels
fossilFuel_factors = df_margEmis_factors[df_margEmis_factors['state'] == 'National']
emis_fossilFuel_lookup = {(row['fuel_type'], row['pollutant']): row['margEmis_factor_adjusted'] for _, row in fossilFuel_factors.iterrows()}

# FOSSIL FUELS DAMAGES LOOKUP
# Create a damages_fossilFuel_lookup dictionary from df_margSocialCosts_EASIUR
damages_fossilFuel_lookup = df_margSocialCosts_EASIUR.groupby(['Longitude', 'Latitude']).first().to_dict()


-------------------------------------------------------------------------------------------------------
Calculate Emissions Factors: FOSSIL FUELS
-------------------------------------------------------------------------------------------------------



In [32]:
# print("""
# -------------------------------------------------------------------------------------------------------
# Calculate Emissions Factors: FOSSIL FUELS
# -------------------------------------------------------------------------------------------------------
# """)

# # Create a lookup dictionary for the national emissions factors
# national_factors = df_margEmis_factors[df_margEmis_factors['state'] == 'National']
# national_lookup = {(row['fuel_type'], row['pollutant']): row['margEmis_factor_adjusted'] for _, row in national_factors.iterrows()}

# # Create a lookup dictionary for the state-specific emissions factors for electricity
# electricity_factors = df_margEmis_factors[df_margEmis_factors['fuel_type'] == 'electricity']
# electricity_lookup = {(row['pollutant'], row['state']): row['margEmis_factor_adjusted'] for _, row in electricity_factors.iterrows()}

# pollutants = ['so2', 'nox', 'pm25', 'co2']

# # ELECTRICITY CEDM DAMAGES LOOKUP
# damages_CEDM_lookup = {(row['pollutant'], row['subregion_eGRID']): row['margDamages_dollarPerkWh_adjustVSL_ref'] for _, row in df_margDamages_EASIUR.iterrows()}

# # FOSSIL FUELS DAMAGES LOOKUP
# # Create a damages_fossilFuel_lookup dictionary from df_margSocialCosts_EASIUR
# damages_fossilFuel_lookup = df_margSocialCosts_EASIUR.groupby(['Longitude', 'Latitude']).first().to_dict()

In [63]:
def calculate_marginal_damages(df, scenario, emis_fossilFuel_lookup, damages_electricity_lookup, grid_decarb=False):
    """
    Calculate the marginal damages of different pollutants based on various conditions and mappings.
    
    Parameters:
    - df (DataFrame): The primary data frame containing pollutant emissions data and other relevant attributes.
    - scenario (str): The decarbonization scenario name.
    - emis_fossilFuel_lookup (dict): Lookup table for fossil fuel emission factors.
    - damages_electricity_lookup (dict): Lookup table for electricity-related emission damages.
    - grid_decarb (bool): Flag to determine if grid decarbonization calculations are to be applied.
    
    Returns:
    - DataFrame: The updated data frame with calculated marginal damages and potentially new columns.
    """
    df_copy = df.copy()
    
    pollutants = ['so2', 'nox', 'pm25', 'co2']
    
    # Map GEA regions to eGRID subregions
    df_copy['subregion_eGRID'] = df_copy['cambium_GEA_region'].map(gea_eGRID_mapping)
    
    # Map state to natural gas leakage factor
    state_to_factor = dict(zip(df_margEmis_factors['state'], df_margEmis_factors['naturalGas_leakage_factor']))
    df_copy['naturalGas_leakage_factor'] = df_copy['state'].map(state_to_factor)

    # Ensure the marginal social costs are present
    for pollutant in pollutants:
        if f'margSocialCosts_{pollutant}' not in df_copy.columns:
            df_copy[f'margSocialCosts_{pollutant}'] = df_copy.apply(
                lambda row: emis_fossilFuel_lookup.get(f'margSocialCosts_{pollutant}', {}).get(
                    (row['Longitude'], row['Latitude']), np.nan), axis=1)
    
    td_losses = 0.06  # Transmission and distribution losses

    if grid_decarb:
        new_columns_df = calculate_damages_decarb_grid(df_copy, menu_mp, years, td_losses, damages_electricity_lookup, scenario)
    else:
        new_columns_df = calculate_damages_current_grid(df_copy, menu_mp, pollutants, td_losses, emis_fossilFuel_lookup, damages_electricity_lookup, scenario)

    columns_to_add = new_columns_df.columns.difference(df_copy.columns)
    df_copy = pd.concat([df_copy, new_columns_df[columns_to_add]], axis=1)

    return df_copy


In [64]:

def calculate_damages_current_grid(df_copy, menu_mp, pollutants, td_losses, emis_fossilFuel_lookup, damages_electricity_lookup, scenario):
    """
    Calculate damages for the current electricity grid scenario.

    Parameters:
        df_copy (DataFrame): The DataFrame containing consumption data.
        menu_mp (int): The menu number for the measure package.
        pollutants (list): List of pollutants.
        td_losses (float): Transmission and distribution losses.
        emis_fossilFuel_lookup (dict): Lookup table for national emissions factors.
        damages_electricity_lookup (dict): Lookup table for damages.

    Returns:
        DataFrame: The DataFrame with calculated damages.
    """
    new_columns = {}  # Dictionary to hold new columns for efficient concatenation

    if menu_mp == 0:
        for category, lifetime in equipment_specs.items():
            for year in range(1, lifetime + 1):
                year_label = year + 2021

                hdd_factor = 1.0  # Default heating degree day factor
                if category in ['heating', 'waterHeating']:
                    hdd_factor = df_copy['census_division'].map(lambda x: hdd_factor_lookup.get(x, {}).get(year_label, hdd_factor_lookup['National'][year_label]))

                for pollutant in pollutants:
                    emis_naturalGas = (df_copy[f'base_naturalGas_{category}_consumption'] * hdd_factor) * emis_fossilFuel_lookup.get(('naturalGas', pollutant), np.nan)
                    emis_propane = (df_copy[f'base_propane_{category}_consumption'] * hdd_factor) * emis_fossilFuel_lookup.get(('propane', pollutant), np.nan)

                    if 'cooking' in category or 'clothesDrying' in category:
                        total_emissions = emis_naturalGas.fillna(0) + emis_propane.fillna(0)
                    else:
                        emis_fuelOil = (df_copy[f'base_fuelOil_{category}_consumption'] * hdd_factor) * emis_fossilFuel_lookup.get(('fuelOil', pollutant), np.nan)
                        total_emissions = emis_naturalGas.fillna(0) + emis_propane.fillna(0) + emis_fuelOil.fillna(0)

                    fossilFuel_damages = total_emissions * df_copy[f'margSocialCosts_{pollutant}']
                    
                    def calculate_electricity_damage(row):
                        damage_value = (row[f'base_electricity_{category}_consumption'] * hdd_factor * row['naturalGas_leakage_factor']) * (1/(1-td_losses)) * damages_electricity_lookup.get(year_label, {}).get(
                            (pollutant, row['subregion_eGRID'], scenario), np.nan)
                        return damage_value if np.isscalar(damage_value) else np.nan  # Ensure scalar

                    electricity_damages = df_copy.apply(calculate_electricity_damage, axis=1).fillna(0)

                    # Calculate and store total damages in the new_columns dictionary
                    result_damages = fossilFuel_damages + electricity_damages
                    new_columns[f'baseline_{year_label}_{category}_damages_{pollutant}'] = round(result_damages, 2)

                # Sum health-related damages
                new_columns[f'baseline_{year_label}_{category}_damages_health'] = round(
                    new_columns[f'baseline_{year_label}_{category}_damages_so2'] + 
                    new_columns[f'baseline_{year_label}_{category}_damages_nox'] + 
                    new_columns[f'baseline_{year_label}_{category}_damages_pm25'], 2)
                
                # Climate-related damages
                new_columns[f'baseline_{year_label}_{category}_damages_climate'] = round(new_columns[f'baseline_{year_label}_{category}_damages_co2'], 2)
    
    else:
        for category, lifetime in equipment_specs.items():
            for year in range(1, lifetime + 1):
                year_label = year + 2021
                
                for pollutant in pollutants:
                    new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_{pollutant}'] = df_copy.apply(
                        lambda row: row[f'mp{menu_mp}_{year_label}_{category}_consumption'] * (1/(1-td_losses)) * damages_electricity_lookup.get(year_label, {}).get(
                            (pollutant, row['subregion_eGRID'], scenario), np.nan), axis=1).fillna(0).round(2)

                    new_columns[f'mp{menu_mp}_{year_label}_{category}_reduction_damages_{pollutant}'] = round(
                        new_columns[f'baseline_{year_label}_{category}_damages_{pollutant}'] - new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_{pollutant}'], 2)

                new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_health'] = round(
                    new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_so2'] + 
                    new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_nox'] + 
                    new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_pm25'], 2)
                
                new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_climate'] = round(new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_co2'], 2)

                new_columns[f'mp{menu_mp}_{year_label}_{category}_reduction_damages_health'] = round(
                    new_columns[f'baseline_{year_label}_{category}_damages_health'] - new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_health'], 2)
                
                new_columns[f'mp{menu_mp}_{year_label}_{category}_reduction_damages_climate'] = round(
                    new_columns[f'baseline_{year_label}_{category}_damages_climate'] - new_columns[f'mp{menu_mp}_{year_label}_{category}_damages_climate'], 2)

    # Add all new columns at once to avoid fragmentation
    df_copy = pd.concat([df_copy, pd.DataFrame(new_columns, index=df_copy.index)], axis=1)

    return df_copy

In [65]:
print("""
-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------
""")

# # calculate_marginal_damages(df, scenario, emis_fossilFuel_lookup, damages_electricity_lookup, grid_decarb=False)
# df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
#                                                       grid_decarb=False
#                                                      )
# df_euss_am_baseline_home

df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
                                                      scenario= 'No Inflation Reduction Act',
                                                      emis_fossilFuel_lookup=emis_fossilFuel_lookup,
                                                      damages_electricity_lookup=preIRA_damages_electricity_lookup,
                                                      grid_decarb=False,
                                                     )
df_euss_am_baseline_home


-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------



Unnamed: 0,bldg_id,square_footage,census_region,census_division,census_division_recs,building_america_climate_zone,cambium_GEA_region,state,city,county,...,baseline_2035_cooking_damages_pm25,baseline_2035_cooking_damages_co2,baseline_2035_cooking_damages_health,baseline_2035_cooking_damages_climate,baseline_2036_cooking_damages_so2,baseline_2036_cooking_damages_nox,baseline_2036_cooking_damages_pm25,baseline_2036_cooking_damages_co2,baseline_2036_cooking_damages_health,baseline_2036_cooking_damages_climate
410846,119,2152.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200450,...,,,,,,,,,,
410847,122,2176.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,In another census Place,G4200450,...,,,,,,,,,,
410848,150,1690.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,In another census Place,G4201190,...,,,,,,,,,,
410850,162,2663.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200450,...,,,,,,,,,,
410851,181,885.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,Not in a census Place,G4200270,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433884,549748,3301.0,Northeast,Middle Atlantic,Middle Atlantic,Mixed-Humid,RFCEc,PA,Not in a census Place,G4200910,...,,,,,,,,,,
433891,549915,2176.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCEc,PA,Lancaster,G4200710,...,,,,,,,,,,
433892,549937,885.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCWc,PA,In another census Place,G4200050,...,,,,,,,,,,
433893,549963,1690.0,Northeast,Middle Atlantic,Middle Atlantic,Cold,RFCWc,PA,In another census Place,G4201290,...,,,,,,,,,,


In [60]:
print(df_euss_am_baseline_home)

        bldg_id  square_footage census_region  census_division  \
410846      119          2152.0     Northeast  Middle Atlantic   
410847      122          2176.0     Northeast  Middle Atlantic   
410848      150          1690.0     Northeast  Middle Atlantic   
410850      162          2663.0     Northeast  Middle Atlantic   
410851      181           885.0     Northeast  Middle Atlantic   
...         ...             ...           ...              ...   
433884   549748          3301.0     Northeast  Middle Atlantic   
433891   549915          2176.0     Northeast  Middle Atlantic   
433892   549937           885.0     Northeast  Middle Atlantic   
433893   549963          1690.0     Northeast  Middle Atlantic   
433894   549989          1220.0     Northeast  Middle Atlantic   

       census_division_recs building_america_climate_zone cambium_GEA_region  \
410846      Middle Atlantic                   Mixed-Humid              RFCEc   
410847      Middle Atlantic                   M

### Step 5: Calculate End-use specific marginal damages
**I used the total emissions column for each of the end uses for the following reasons:**
- Most homes only have 1 of each end-use, so it is unlikely that the homes have a significant consumption values from different fuel types. Thus, the total consumption and total emissions column (sum of each dwelling units consumption by end-use for each fuel) is fine to use to calculate marginal damages (social cost)
- We can visualize the emissions in 2 by 2 grid (CO2, PM25, SO2, NOx) with each appliance's heating fuel in a different shape or color. 

### Baseline Marginal Damages: WHOLE-HOME

In [None]:
# Set columns in display
pd.set_option('display.max_columns', None)
# pd.reset_option('display.max_columns') # Reset options to default

# Set rows in display
# pd.set_option('display.max_rows', None)
# pd.reset_option('display.max_rows') # Reset options to default

In [None]:
# # Specifications for equipment lifetimes in years
# equipment_specs = {
#     'heating': 15,
#     'waterHeating': 12,
#     'clothesDrying': 13,
#     'cooking': 15
# }

# # List of considered pollutants
# pollutants = ['so2', 'nox', 'pm25', 'co2']

# # Transmission and distribution losses
# td_losses = 0.06

# # def calculate_marginal_damages(df, grid_decarb=False, scenario="No Inflation Reduction Act"):
# def calculate_marginal_damages(df, scenario, emis_fossilFuel_lookup, damages_electricity_lookup, grid_decarb=False):
#     """
#     Calculate the marginal damages of different pollutants based on various conditions and mappings.
    
#     Parameters:
#     - df (DataFrame): The primary data frame containing pollutant emissions data and other relevant attributes.
#     - grid_decarb (bool): Flag to determine if grid decarbonization calculations are to be applied.
#     - scenario (str): The decarbonization scenario name.

#     Returns:
#     - DataFrame: The updated data frame with calculated marginal damages and potentially new columns.
#     """
#     df_copy = df.copy()

#     # Map 'cambium_GEA_region' to 'subregion_eGRID' for regional breakdown
#     df_copy['subregion_eGRID'] = df_copy['cambium_GEA_region'].map(gea_eGRID_mapping)

#     # Map 'state' to 'naturalGas_leakage_factor'
#     state_to_factor = dict(zip(df_margEmis_factors['state'], df_margEmis_factors['naturalGas_leakage_factor']))
#     df_copy['naturalGas_leakage_factor'] = df_copy['state'].map(state_to_factor)

#     for pollutant in pollutants:
#         if f'margSocialCosts_{pollutant}' not in df_copy.columns:
#             df_copy[f'margSocialCosts_{pollutant}'] = df_copy.apply(
#                 lambda row: damages_fossilFuel_lookup[f'margSocialCosts_{pollutant}'].get(
#                     (row['Longitude'], row['Latitude']), np.nan), axis=1)
        
#         if f'margDamage_factor_{pollutant}' not in df_copy.columns:
#             df_copy[f'margDamage_factor_{pollutant}'] = df_copy.apply(
#                 lambda row: preIRA_damages_electricity_lookup.get(2022, {}).get(
#                     (pollutant, row['subregion_eGRID'], scenario), None), axis=1)

#     new_columns_df = pd.DataFrame(index=df_copy.index)

#     if grid_decarb:
#         new_columns_df = calculate_damages_decarb_grid(df_copy, menu_mp, equipment_specs.keys(), range(2022, 2022 + max(equipment_specs.values())), td_losses, dict_margDamages_gridDecarb)
#     else:
#         new_columns_df = calculate_damages_current_grid(df_copy, menu_mp, td_losses, national_lookup, electricity_lookup, preIRA_damages_electricity_lookup, scenario)

#     columns_to_add = new_columns_df.columns.difference(df_copy.columns)
#     df_copy = pd.concat([df_copy, new_columns_df[columns_to_add]], axis=1)

#     return df_copy

# def calculate_damages_current_grid(df_copy, menu_mp, td_losses, national_lookup, electricity_lookup, damages_lookup, scenario):
#     """
#     Calculate damages for the current electricity grid scenario.

#     Parameters:
#         df_copy (DataFrame): The DataFrame containing consumption data.
#         menu_mp (int): The menu number for the measure package.
#         td_losses (float): Transmission and distribution losses.
#         national_lookup (dict): Lookup table for national emissions factors.
#         electricity_lookup (dict): Lookup table for electricity emissions.
#         damages_lookup (dict): Lookup table for damages.
#         scenario (str): The decarbonization scenario name.

#     Returns:
#         DataFrame: The DataFrame with calculated damages.
#     """
#     if menu_mp == 0:
#         for category, lifetime in equipment_specs.items():
#             for year in range(1, lifetime + 1):
#                 year_label = year + 2021
#                 for pollutant in pollutants:
#                     emis_naturalGas = df_copy[f'base_naturalGas_{category}_consumption'] * national_lookup.get(('naturalGas', pollutant), np.nan)
#                     emis_propane = df_copy[f'base_propane_{category}_consumption'] * national_lookup.get(('propane', pollutant), np.nan)
                    
#                     df_copy['electricity_lookup_values'] = df_copy.apply(
#                         lambda row: electricity_lookup.get((pollutant, row['state']), np.nan), axis=1)
#                     emis_electricity = df_copy[f'base_electricity_{category}_consumption'] * (1 / (1 - td_losses)) * df_copy['electricity_lookup_values']

#                     if 'cooking' in category or 'clothesDrying' in category:
#                         total_emissions = emis_electricity.fillna(0) + emis_naturalGas.fillna(0) + emis_propane.fillna(0)
#                     else:
#                         emis_fuelOil = df_copy[f'base_fuelOil_{category}_consumption'] * national_lookup.get(('fuelOil', pollutant), np.nan)
#                         total_emissions = emis_electricity.fillna(0) + emis_naturalGas.fillna(0) + emis_propane.fillna(0) + emis_fuelOil.fillna(0)

#                     damage_factor = damages_lookup.get(year_label, {}).get(
#                         (pollutant, row['subregion_eGRID'], scenario), np.nan)
#                     df_copy[f'baseline_{category}_damages_{pollutant}_{year_label}'] = round(total_emissions * damage_factor, 2)

#             df_copy[f'baseline_{category}_damages_health'] = round(df_copy[[f'baseline_{category}_damages_so2_{year_label}' for year_label in range(2022, 2022 + lifetime)]].sum(axis=1), 2)
#             df_copy[f'baseline_{category}_damages_climate'] = round(df_copy[[f'baseline_{category}_damages_co2_{year_label}' for year_label in range(2022, 2022 + lifetime)]].sum(axis=1), 2)
#     else:
#         for category, lifetime in equipment_specs.items():
#             for year in range(1, lifetime + 1):
#                 year_label = year + 2021
#                 for pollutant in pollutants:                
#                     df_copy[f'mp{menu_mp}_{category}_damages_{pollutant}_{year_label}'] = df_copy.apply(
#                         lambda row: row[f'mp{menu_mp}_{category}_consumption'] * (1/(1-td_losses)) * damages_lookup.get(year_label, {}).get(
#                             (pollutant, row['subregion_eGRID'], scenario), np.nan), axis=1).fillna(0).round(2)

#                     df_copy[f'mp{menu_mp}_{category}_reduction_damages_{pollutant}_{year_label}'] = (
#                         df_copy[f'baseline_{category}_damages_{pollutant}_{year_label}'] - df_copy[f'mp{menu_mp}_{category}_damages_{pollutant}_{year_label}']).round(2)

#             df_copy[f'mp{menu_mp}_{category}_damages_health'] = df_copy[[f'mp{menu_mp}_{category}_damages_so2_{year_label}' for year_label in range(2022, 2022 + lifetime)]].sum(axis=1).round(2)
#             df_copy[f'mp{menu_mp}_{category}_damages_climate'] = df_copy[[f'mp{menu_mp}_{category}_damages_co2_{year_label}' for year_label in range(2022, 2022 + lifetime)]].sum(axis=1).round(2)

#             df_copy[f'mp{menu_mp}_{category}_reduction_damages_health'] = (
#                 df_copy[f'baseline_{category}_damages_health'] - df_copy[f'mp{menu_mp}_{category}_damages_health']).round(2)
#             df_copy[f'mp{menu_mp}_{category}_reduction_damages_climate'] = (
#                 df_copy[f'baseline_{category}_damages_climate'] - df_copy[f'mp{menu_mp}_{category}_damages_climate']).round(2)

#     return df_copy


In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 5: Calculate End-use specific marginal damages
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Baseline Marginal Damages: WHOLE-HOME
-------------------------------------------------------------------------------------------------------
""")

# calculate_marginal_damages(df, grid_decarb=False)
df_euss_am_baseline_home = calculate_marginal_damages(df=df_euss_am_baseline_home,
                                                      grid_decarb=False
                                                     )
df_euss_am_baseline_home

## Private Perspective: Annual Energy Costs

### Step 1: Obtain Level Energy Fuel Cost Data from the EIA
**Data Sources for Excel workbook containing state average Residential fuel cost for each fuel in 2018**
- EIA State Electricity Price: https://www.eia.gov/electricity/state/archive/2018/
- EIA Natural Gas Prices: https://www.eia.gov/dnav/ng/ng_pri_sum_dcu_SPA_a.htm
- Propane and Fuel Oil: EIA March 2023 Short Term Energy Outlook
    - https://www.eia.gov/outlooks/steo/pdf/wf01.pdf
    - Table WF01: Average Consumer Prices and Expenditures for Heating Fuels During the Winter
    - US Average: 2018-2019 Data

In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Private Perspective: Annual Energy Costs
-------------------------------------------------------------------------------------------------------
- Step 1: Obtain Level Energy Fuel Cost Data from the EIA
- Step 2: Calculate Annual Operating (Fuel) Costs
-------------------------------------------------------------------------------------------------------
      
-------------------------------------------------------------------------------------------------------
Step 1: Obtain Level Energy Fuel Cost Data from the EIA
-------------------------------------------------------------------------------------------------------
**Data Sources for Excel workbook containing state average Residential fuel cost for each fuel in 2018**
- EIA State Electricity Price: https://www.eia.gov/electricity/state/archive/2018/
- EIA Natural Gas Prices: https://www.eia.gov/dnav/ng/ng_pri_sum_dcu_SPA_a.htm
- Propane and Fuel Oil: EIA March 2023 Short Term Energy Outlook
    - https://www.eia.gov/outlooks/steo/pdf/wf01.pdf
    - Table WF01: Average Consumer Prices and Expenditures for Heating Fuels During the Winter
    - US Average: 2018-2019 Data
-------------------------------------------------------------------------------------------------------
""")

filename = 'fuel_prices_nominal.csv'
relative_path = os.path.join(r"fuel_prices", filename)
file_path = os.path.join(project_root, relative_path)
df_fuelPrices_perkWh = pd.read_csv(file_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# New units for the converted and inflated prices below
df_fuelPrices_perkWh['units'] = 'USD2021 per kWh'

years = ['2018', '2019', '2020', '2021', '2022']

# Take dataframe with nominal prices in their base units and convert to $/kWh equivalent
# https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php
for year in years:
    for index, row in df_fuelPrices_perkWh.iterrows():
        
        # Propane: (dollars per gallon) * (1 gallon propane/91,452 BTU) * (3412 BTU/1 kWh)
        if row['fuel_type'] == 'propane':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/91452) * (3412/1)
        
        # Fuel Oil: (dollars/gallon) * (1 gallon heating oil/138,500 BTU) * (3412 BTU/1 kWh)
        elif row['fuel_type'] == 'fuelOil':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/138500) * (3412/1)
        
        # Natural Gas: (dollars/cf) * (thousand cf/1000 cf) * (1 cf natural gas/1039 BTU) * (3412 BTU/1 kWh)
        elif row['fuel_type'] == 'naturalGas':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/1000) * (1/1039) * (3412/1)
        
        # Electricity: convert cents per kWh to $ per kWh
        elif row['fuel_type'] == 'electricity':
            df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] / 100

# Convert nominal dollars to real 2021 US dollars (USD2021)
df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] * cpi_ratio_2021_2018
df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] * cpi_ratio_2021_2019
df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] * cpi_ratio_2021_2020
df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] * cpi_ratio_2021_2021
df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] * cpi_ratio_2021_2022

# Display the df
df_fuelPrices_perkWh

In [None]:
filename = 'fuel_price_projection_factors.csv'
relative_path = os.path.join(r"fuel_prices", filename)
file_path = os.path.join(project_root, relative_path)
df_fuelPrices_projection_factors = pd.read_csv(file_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")

# # Display the df
# df_fuelPrices_projection_factors

# Convert the factors dataframe into a lookup dictionary
factor_dict = df_fuelPrices_projection_factors.set_index(['region', 'fuel_type']).to_dict('index')
factor_dict

In [None]:
# Original dictionary mapping census divisions to states
map_states_census_divisions = {
    "New England": ["CT", "ME", "MA", "NH", "RI", "VT"],
    "Middle Atlantic": ["NJ", "NY", "PA"],
    "East North Central": ["IN", "IL", "MI", "OH", "WI"],
    "West North Central": ["IA", "KS", "MN", "MO", "NE", "ND", "SD"],
    "South Atlantic": ["DE", "DC", "FL", "GA", "MD", "NC", "SC", "VA", "WV"],
    "East South Central": ["AL", "KY", "MS", "TN"],
    "West South Central": ["AR", "LA", "OK", "TX"],
    "Mountain": ["AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY"],
    "Pacific": ["AK", "CA", "HI", "OR", "WA"]
}

# Reverse the mapping to create a state-to-census-division map
state_to_census_division = {}
for division, states in map_states_census_divisions.items():
    for state in states:
        state_to_census_division[state] = division

# Function to map location to census division
def map_location_to_census_division(location):
    if location in state_to_census_division:
        return state_to_census_division[location]
    return location

# Apply the function to map locations using .loc
df_fuelPrices_perkWh.loc[:, 'census_division'] = df_fuelPrices_perkWh['location_map'].apply(map_location_to_census_division)

# Display the updated DataFrame
df_fuelPrices_perkWh

In [None]:
# Define function to project future prices
def project_future_prices(row, factor_dict):
    loc = row['census_division']
    fuel = row['fuel_type']
    price_2022 = row['2022_fuelPrice_perkWh']
    projection_factors = factor_dict.get((loc, fuel))

    # print(f"Processing location: {loc}, fuel: {fuel}, price 2022: {price_2022}")
    # if projection_factors:
    #     print(f"Found projection factors for {loc}, {fuel}: {projection_factors}")
    # else:
    #     print(f"No projection factors found for {loc}, {fuel}, using default 2022 price.")

    future_prices = {}
    if projection_factors:
        for year in range(2023, 2051):
            factor = projection_factors.get(str(year), 1.0)
            future_price = price_2022 * factor
            future_prices[f'{year}_fuelPrice_perkWh'] = future_price
            # print(f"Year: {year}, Factor: {factor}, Future Price: {future_price}")
    else:
        # If no factors found, use the 2022 price as default
        for year in range(2023, 2051):
            future_prices[f'{year}_fuelPrice_perkWh'] = price_2022
            # print(f"Year: {year}, Default Future Price: {price_2022}")
    
    return pd.Series(future_prices)

# Apply the function to each row in the DataFrame
projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict), axis=1)

# Concatenate the projected prices with the original DataFrame
df_fuelPrices_perkWh_projected = pd.concat([df_fuelPrices_perkWh, projected_prices_df], axis=1)

# Display the final DataFrame
df_fuelPrices_perkWh_projected

### Step 2: Calculate Annual Operating (Fuel) Costs

### Baseline Fuel Cost: WHOLE-HOME

In [None]:
# Assuming df is the DataFrame with the data provided
def create_fuel_price_lookup(df):
    lookup_dict = {}
    
    for _, row in df.iterrows():
        location = row['location_map']
        fuel_type = row['fuel_type']
        
        if location not in lookup_dict:
            lookup_dict[location] = {}
        
        if fuel_type not in lookup_dict[location]:
            lookup_dict[location][fuel_type] = {}
        
        for year in range(2018, 2051):
            column_name = f"{year}_fuelPrice_perkWh"
            lookup_dict[location][fuel_type][year] = row[column_name]
    
    return lookup_dict

# Example usage
# df = pd.read_csv('fuel_prices.csv')  # Load your DataFrame
fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_projected)
fuel_price_lookup


In [None]:
print("""
-------------------------------------------------------------------------------------------------------
Step 2: Calculate Annual Operating (Fuel) Costs
-------------------------------------------------------------------------------------------------------
- Create a mapping dictionary for fuel types
- Create new merge columns to ensure a proper match.
- Merge df_copy with df_fuel_prices to get fuel prices for electricity, natural gas, propane, and fuel oil
- Calculate the per kWh fuel costs for each fuel type and region
- Calculate the baseline fuel cost 
-------------------------------------------------------------------------------------------------------
""")
# df_euss_am_baseline_home = df_euss_am_baseline_home.copy()
# calculate_annual_fuelCost(df, fuel_price_lookup, lifetime)
df_euss_am_baseline_home = calculate_annual_fuelCost(df=df_euss_am_baseline_home,
                                                     fuel_price_lookup=fuel_price_lookup,
                                                     )
df_euss_am_baseline_home

### Area Median Income Data Used to determine LMI Designation and IRA Rebates Eligibility/Amount

In [None]:
# Collect Area Median Income Data at PUMA-resolution
filename = "nhgis0002_ds239_20185_puma.csv"
relative_path = os.path.join(r"equity_data", filename)
file_path = os.path.join(project_root, relative_path)

print(f"Retrieved data for filename: {filename}")
print(f"Located at filepath: {file_path}")
print("\n")

df_county_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
df_county_medianIncome = df_county_medianIncome.drop(0)
df_county_medianIncome = df_county_medianIncome.reset_index(drop=True)

cols_interest = ['GISJOIN', 'STUSAB', 'STATE', 'PUMAA', 'GEOID', 'NAME_E', 'AJZAE001', 'AJZAM001']
df_county_medianIncome = df_county_medianIncome[cols_interest]
df_county_medianIncome = df_county_medianIncome.rename(columns={"GISJOIN": "gis_joinID_puma", "STUSAB": "state_abbrev", "STATE": "state", "PUMAA": "puma_code", "GEOID": "census_geoID", "NAME_E": "name_estimate", "AJZAE001": "median_income_USD2018", "AJZAM001": "median_income_USD2018_marginOfError"})
df_county_medianIncome

# Model Runtime

In [None]:
# Get the current datetime again
end_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Calculate the elapsed time
elapsed_time = datetime.strptime(end_time, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_time, "%Y-%m-%d_%H-%M-%S")

# Format the elapsed time
elapsed_seconds = elapsed_time.total_seconds()
elapsed_minutes = int(elapsed_seconds // 60)
elapsed_seconds = int(elapsed_seconds % 60)

# Print the elapsed time
print(f"The code took {elapsed_minutes} minutes and {elapsed_seconds} seconds to execute.")