In [19]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
import json

In [3]:
def load_resstock_run_annual_metadata(data_folder_file_path, input_mp, previous_data=None, baseline_run=1, upgrade_run=2):
    # Have this in a standard format required for my data parsing functions
    metadata_dict = dict()
    metadata_dict["applicable_upgrade"] = True
    
    if input_mp == 0:
        # csv_file_path = os.path.join(csv_folder_file_path, "run1", "run", )
        json_file_path = os.path.join(data_folder_file_path, f"run{baseline_run}", "run", "data_point_out.json")

        # List of required properties

        # from data_point_out.json
        json_required_properties = {
            "bldg_id":("BuildExistingModel","building_id"),
            "census_division":("BuildExistingModel","census_division"),
            "state":("BuildExistingModel","state"),
            "city":("BuildExistingModel","city"),
            "puma":("BuildExistingModel","puma"),
            "county":("BuildExistingModel","county"),
            "reeds_balancing_area":("BuildExistingModel","reeds_balancing_area"),
            "income":("BuildExistingModel","income"),
            "hvac_heating_efficiency":("BuildExistingModel","hvac_heating_efficiency"),
            "hvac_cooling_type":("BuildExistingModel","hvac_cooling_type"),
            "hvac_cooling_efficiency":("BuildExistingModel","hvac_cooling_efficiency"),
            "hvac_has_ducts":("BuildExistingModel","hvac_has_ducts"),
            "heating_type":("BuildExistingModel","hvac_heating_type_and_fuel"),
            "base_heating_fuel":("BuildExistingModel","heating_fuel"),
        }

        if not os.path.exists(json_file_path):
            print("Baseline run failed, returning empty row")
            metadata_dict["applicable_upgrade"] = False
            metadata_dict = {k:[v] for k,v in metadata_dict.items()}
            df_mp = pd.DataFrame.from_dict(metadata_dict, orient="columns")
            df_mp = pd.concat([previous_data, df_mp])
            return df_mp

        with open(json_file_path, "r") as f:
            json_data = json.load(f)
            for k,v in json_required_properties.items():
                try:
                    metadata_dict[k] = json_data[v[0]][v[1]]
                except Exception as e:
                    print(f"couldn't find {k} by indexing {v} from json_data")

        # from results_annual.csv
        csv_required_properties = {
            "base_electricity_heating_consumption":"End Use: Electricity: Heating (MBtu)",
            "base_naturalGas_heating_consumption": "End Use: Natural Gas: Heating (MBtu)",
            "base_propane_heating_consumption":"End Use: Propane: Heating (MBtu)",
            "base_fuelOil_heating_consumption":"End Use: Fuel Oil: Heating (MBtu)",
            "base_electricity_cooling_consumption":"End Use: Electricity: Cooling (MBtu)",
        }

        csv_file_path = os.path.join(data_folder_file_path, f"run{baseline_run}", "run", "results_annual.csv")
        

        if not os.path.exists(csv_file_path):
            print("Baseline run failed, returning empty row")
            metadata_dict["applicable_upgrade"] = False
            metadata_dict = {k:[v] for k,v in metadata_dict.items()}
            df_mp = pd.DataFrame.from_dict(metadata_dict, orient="columns")
            df_mp = pd.concat([previous_data, df_mp])
            return df_mp

        csv_data = pd.read_csv(csv_file_path, header=None)
        csv_data = csv_data.set_index(0)
        for k,v in csv_required_properties.items():
            try:
                metadata_dict[k] = csv_data.loc[v].iloc[0] * 293.07107 # convert MMBtu to kWh? or Wh?
            except Exception as e:
                print(f"couldn't find {k} by indexing {v} from csv_data")
        metadata_dict["baseline_heating_consumption"] = metadata_dict["base_naturalGas_heating_consumption"] + metadata_dict["base_electricity_heating_consumption"] + metadata_dict["base_propane_heating_consumption"] + metadata_dict["base_fuelOil_heating_consumption"]
        metadata_dict["baseline_cooling_consumption"] = metadata_dict["base_electricity_cooling_consumption"]

        # # List of required properties
        # required_properties = {
        #     "bldg_id":"bldg_id",
        #     "census_division":"in.census_division",
        #     "base_electricity_heating_consumption":"out.electricity.heating.energy_consumption.kwh",
        #     "baseline_heating_consumption": required_properties["base_naturalGas_heating_consumption"] + required_properties["base_electricity_heating_consumption"],
        #     "state":"in.state",
        #     "reeds_balancing_area":"in.reeds_balancing_area",
        #     "base_naturalGas_heating_consumption": "out.natural_gas.heating.energy_consumption.kwh",
        #     "base_propane_heating_consumption":"out.propane.heating.energy_consumption.kwh",
        #     "income":"in.income",
        #     "hvac_heating_efficiency":"in.hvac_heating_efficiency",
        #     "hvac_cooling_type":"in.hvac_cooling_type",
        #     "hvac_has_ducts":"in.hvac_has_ducts",
        #     "heating_type":"in.hvac_heating_type_and_fuel",
        # }

    if input_mp != 0:
        # print("loading data from an upgrade")
        upgrade_json_properties = {
            "bldg_id":("BuildExistingModel","building_id"),
            "baseline_heating_type":("BuildExistingModel","hvac_heating_type_and_fuel"),
            "size_heating_system_primary_k_btu_h":("UpgradeCosts","size_heating_system_primary_k_btu_h"),
            "size_cooling_system_primary_k_btu_h":("UpgradeCosts","size_cooling_system_primary_k_btu_h"),
            "size_heat_pump_backup_primary_k_btu_h":("UpgradeCosts","size_heat_pump_backup_primary_k_btu_h"),
            "size_heating_system_secondary_k_btu_h":("UpgradeCosts","size_heating_system_secondary_k_btu_h"),
            "upgrade_hvac_heating_efficiency":("BuildExistingModel","hvac_heating_efficiency"),
        }

        json_file_path = os.path.join(data_folder_file_path, f"run{upgrade_run}", "run", "data_point_out.json")
        # print(json_file_path)

        if not os.path.exists(json_file_path):
            print("Upgrade not applicable, returning empty row")
            metadata_dict["applicable_upgrade"] = False
            metadata_dict = {k:[v] for k,v in metadata_dict.items()}
            df_mp = pd.DataFrame.from_dict(metadata_dict, orient="columns")
            df_mp = pd.concat([previous_data, df_mp])
            return df_mp

        with open(json_file_path, "r") as f:
            json_data = json.load(f)
            # json_data["UpgradeCosts"]["applicable"] and print("Testing upgrade")
            #  = json_data["UpgradeCosts"]["applicable"]
            if "UpgradeCosts" not in json_data.keys() or json_data["UpgradeCosts"]["applicable"] != True:
                print("Upgrade not applicable, returning empty row")
                metadata_dict["applicable_upgrade"] = False
                metadata_dict = {k:[v] for k,v in metadata_dict.items()}
                df_mp = pd.DataFrame.from_dict(metadata_dict, orient="columns")
                df_mp = pd.concat([previous_data, df_mp])
                return df_mp
            #     raise Exception("This upgrade does not apply to this building")
            for k,v in upgrade_json_properties.items():
                try:
                    metadata_dict[k] = json_data[v[0]][v[1]]
                except Exception as e:
                    print(f"couldn't find {k} by indexing {v} from json_data")

        upgrade_csv_properties = {
            "upgrade_natgas_heating_consumption":"End Use: Natural Gas: Heating (MBtu)",
            "upgrade_electricity_heating_consumption":"End Use: Electricity: Heating (MBtu)",
            "upgrade_electricity_cooling_consumption":"End Use: Electricity: Cooling (MBtu)",
        }

        csv_file_path = os.path.join(data_folder_file_path, f"run{upgrade_run}", "run", "results_annual.csv")
        if not os.path.exists(csv_file_path):
            print("Upgrade not applicable, returning empty row")
            metadata_dict["applicable_upgrade"] = False
            metadata_dict = {k:[v] for k,v in metadata_dict.items()}
            df_mp = pd.DataFrame.from_dict(metadata_dict, orient="columns")
            df_mp = pd.concat([previous_data, df_mp])
            return df_mp
        csv_data = pd.read_csv(csv_file_path, header=None)
        csv_data = csv_data.set_index(0)
        for k,v in upgrade_csv_properties.items():
            try:
                metadata_dict[k] = csv_data.loc[v].iloc[0] * 293.07107
            except Exception as e:
                print(f"couldn't find {k} by indexing {v} from csv_data")
        metadata_dict[f"mp{input_mp}_heating_consumption"] = metadata_dict["upgrade_natgas_heating_consumption"] + metadata_dict["upgrade_electricity_heating_consumption"]
        metadata_dict[f"mp{input_mp}_cooling_consumption"] = metadata_dict["upgrade_electricity_cooling_consumption"]

        # upgrade_properties = {
        #     "bldg_id":"bldg_id",
        #     "upgrade_natgas_heating_consumption":"out.natural_gas.heating.energy_consumption.kwh",
        #     "upgrade_electricity_heating_consumption":"out.electricity.heating.energy_consumption.kwh",
        #     f"mp{input_mp}_heating_consumption":upgrade_properties["upgrade_natgas_heating_consumption"] + upgrade_properties["upgrade_electricity_heating_consumption"],
        #     "size_heating_system_primary_k_btu_h":"out.params.size_heating_system_primary_k_btu_h",
        #     "size_heat_pump_backup_primary_k_btu_h":"out.params.size_heat_pump_backup_primary_k_btu_h",
        #     "size_heating_system_secondary_k_btu_h":"out.params.size_heating_system_secondary_k_btu_h",
        #     "base_heating_fuel":"in.hvac_heating_type_and_fuel",
        #     "baseline_heating_type":"in.hvac_heating_type",
        # }
    
    metadata_dict = {k:[v] for k,v in metadata_dict.items()}
    df_mp = pd.DataFrame.from_dict(metadata_dict, orient="columns")
    
    # Load the JSON data
    # data = pd.read_csv(csv_file_path)
    
    # # Placeholder dictionary to store the extracted properties
    # df_mp = pd.DataFrame(columns=[
    #     'bldg_id',
    #     'in.hvac_has_ducts',
    #     'in.hvac_heating_type_and_fuel',
    #     'in.hvac_heating_efficiency',
    #     'in.hvac_heating_type_and_fuel',
    #     'out.params.size_heat_pump_backup_primary_k_btu_h',
    #     'out.params.size_heating_system_primary_k_btu_h',
    #     'out.params.size_heating_system_secondary_k_btu_h',
    #     'upgrade.hvac_heating_efficiency',
    # ])

    # # Assuming 'BuildExistingModel' is the primary section of interest:
    # if 'BuildExistingModel' in data:
    #     model_data = data['BuildExistingModel']
        
    #     # Extract each required property if it exists in the JSON
    #     for prop in required_properties:
    #         df_mp[prop] = model_data.get(prop, None)  # Use None if property is missing
    

    df_mp = pd.concat([previous_data, df_mp])
    # print(df_mp.shape)
    # if baseline_data is not None:
    #     if stack_not_merge:
    #         df_mp = df_mp.append(baseline_data)
    #     else:
    #         df_mp = pd.merge(baseline_data, df_mp, how='inner', on = 'bldg_id')
    #     # Run the enduse_compare function as well
    #     # df_mp = df_enduse_compare(df_mp, input_mp, 0, df_resstock_run_am)

    return df_mp

# load_resstock_run_annual_metadata(os.path.abspath(os.getcwd()), 8)

In [None]:
def load_multiple_resstock_run_annual_metadata(data_folder_file_path, input_mp, num_datapoints, baseline_data=None):
    df_mp = None

    for i in range(num_datapoints):
        df_mp = load_resstock_run_annual_metadata(data_folder_file_path, input_mp, previous_data = df_mp, baseline_run=i+1, upgrade_run=i+1) # +num_datapoints
    
    if baseline_data is not None:
        df_mp = pd.merge(baseline_data, df_mp, how='inner', on = 'bldg_id')
    df_mp.reset_index(inplace=True, drop=True)
    # print(df_mp)
    return df_mp

In [4]:
def load_hdd_factors(project_root):
    # Factors for 2022 to 2050
    filename = 'aeo_projections_2022_2050.xlsx'
    relative_path = os.path.join(r"projections", filename)
    file_path = os.path.join(project_root, relative_path)
    df_hdd_projection_factors = pd.read_excel(io=file_path, sheet_name='hdd_factors_2022_2050')

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")

    # Convert the factors dataframe into a lookup dictionary
    hdd_factor_lookup = df_hdd_projection_factors.set_index(['census_division']).to_dict('index')

    return hdd_factor_lookup

In [None]:
def load_cdd_factors(project_root):
    # Factors for 2022 to 2050
    filename = 'aeo_projections_2022_2050.xlsx'
    relative_path = os.path.join(r"projections", filename)
    file_path = os.path.join(project_root, relative_path)
    df_cdd_projection_factors = pd.read_excel(io=file_path, sheet_name='cdd_factors_2022_2050')

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")

    # Convert the factors dataframe into a lookup dictionary
    cdd_factor_lookup = df_cdd_projection_factors.set_index(['census_division']).to_dict('index')

    return cdd_factor_lookup

In [6]:
def load_df_puma_medianIncome(project_root, cpi_ratio_2023_2022):
    # Collect Area Median Income Data at PUMA-resolution
    filename = "nhgis0003_ds261_2022_puma.csv"
    relative_path = os.path.join(r"equity_data", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print("\n")

    df_puma_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
    # df_puma_medianIncome = df_puma_medianIncome.drop(0)
    df_puma_medianIncome = df_puma_medianIncome.reset_index(drop=True)

    cols_interest = ['GISJOIN', 'STUSAB', 'PUMAA', 'NAME_E', 'AP2PE001', 'AP2PM001']
    df_puma_medianIncome = df_puma_medianIncome[cols_interest]
    df_puma_medianIncome = df_puma_medianIncome.rename(columns={"GISJOIN": "gis_joinID_puma", "STUSAB": "state_abbrev", "PUMAA": "puma_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
    df_puma_medianIncome['median_income_USD2023'] = round((df_puma_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
    return df_puma_medianIncome

In [5]:
def load_df_county_medianIncome(project_root, cpi_ratio_2023_2022):
    # Collect Area Median Income Data at county-resolution
    filename = "nhgis0005_ds261_2022_county.csv"
    relative_path = os.path.join(r"equity_data", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print("\n")

    df_county_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
    # df_county_medianIncome = df_county_medianIncome.drop(0)
    df_county_medianIncome = df_county_medianIncome.reset_index(drop=True)

    cols_interest = ['GISJOIN', 'STUSAB', 'COUNTYA', 'NAME_E', 'AP2PE001', 'AP2PM001']
    df_county_medianIncome = df_county_medianIncome[cols_interest]
    df_county_medianIncome = df_county_medianIncome.rename(columns={"GISJOIN": "gis_joinID_county", "STUSAB": "state_abbrev", "COUNTYA": "county_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
    df_county_medianIncome['median_income_USD2023'] = round((df_county_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
    return df_county_medianIncome

In [None]:
def load_df_state_medianIncome(project_root, cpi_ratio_2023_2022):
    # Collect Area Median Income Data at state-resolution
    filename = "nhgis0004_ds261_2022_state.csv"
    relative_path = os.path.join(r"equity_data", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print("\n")

    df_state_medianIncome = pd.read_csv(file_path, encoding='ISO-8859-1')
    # df_state_medianIncome = df_state_medianIncome.drop(0)
    df_state_medianIncome = df_state_medianIncome.reset_index(drop=True)

    cols_interest = ['GISJOIN', 'STUSAB','STATEA', 'NAME_E', 'AP2PE001', 'AP2PM001']
    df_state_medianIncome = df_state_medianIncome[cols_interest]
    df_state_medianIncome = df_state_medianIncome.rename(columns={"GISJOIN": "gis_joinID_state", "STUSAB": "state_abbrev", "STATEA": "state_code", "NAME_E": "name_estimate", "AP2PE001": "median_income_USD2022", "AP2PM001": "median_income_USD2022_marginOfError"})
    df_state_medianIncome['median_income_USD2023'] = round((df_state_medianIncome['median_income_USD2022'] * cpi_ratio_2023_2022), 2)
    return df_state_medianIncome

In [7]:
def load_rsMeans_CCI_values(project_root, df_resstock_run):
    # Adjust for regional cost differences with RSMeans
    filename = "rsMeans_cityCostIndex.csv"
    relative_path = os.path.join(r"inflation_data", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print("\n")

    df_rsMeans_cityCostIndex = pd.read_csv(file_path)

    df_rsMeans_cityCostIndex = pd.DataFrame({
        'State': df_rsMeans_cityCostIndex['State'],
        'City': df_rsMeans_cityCostIndex['City'],
        'Material': (df_rsMeans_cityCostIndex['Material']).round(2),
        'Installation': (df_rsMeans_cityCostIndex['Installation']).round(2),
        'Average': (df_rsMeans_cityCostIndex['Average']).round(2),
    })
    average_cost_map = df_rsMeans_cityCostIndex.set_index('City')['Average'].to_dict()

    def map_average_cost(city):
        if city in average_cost_map:
            return average_cost_map[city]
        elif city == 'Not in a census Place' or city == 'In another census Place':
            return average_cost_map.get('+30 City Average')
        else:
            return average_cost_map.get('+30 City Average')

    # Use CCI to adjust for cost differences when compared to the national average
    # Call the function and map the values for CCI adjustment
    df_resstock_run['rsMeans_CCI_avg'] = df_resstock_run['city'].apply(map_average_cost)
    return df_resstock_run

In [8]:
def load_rsMeans_national_avg(cpi_ratio_2023_2019):
    return round((3.00 * (cpi_ratio_2023_2019)), 2)

In [None]:
# Define function to create a fuel price lookup dictionary without policy_scenario from row
def create_fuel_price_lookup(df, policy_scenario):
    lookup_dict = {}
    
    for _, row in df.iterrows():
        location = row['location_map']
        fuel_type = row['fuel_type']
        
        if location not in lookup_dict:
            lookup_dict[location] = {}
        
        if fuel_type not in lookup_dict[location]:
            lookup_dict[location][fuel_type] = {}
        
        if policy_scenario not in lookup_dict[location][fuel_type]:
            lookup_dict[location][fuel_type][policy_scenario] = {}
        
        for year in range(2022, 2051):
            column_name = f"{year}_fuelPrice_perkWh"
            lookup_dict[location][fuel_type][policy_scenario][year] = row[column_name]
    
    return lookup_dict

# Define function to project future prices with fallback to 'National'
def project_future_prices(row, factor_dict, policy_scenario):
    loc = row['census_division']
    fuel = row['fuel_type']
    price_2022 = row['2022_fuelPrice_perkWh']

    # print(f"\nProcessing location: {loc}, fuel: {fuel}, policy_scenario: {policy_scenario}")
    # print(f"Initial price for 2022: {price_2022}")

    # First, try to fetch the projection factors for the specific region
    projection_factors = factor_dict.get((loc, fuel, policy_scenario))
    
    # If no factors are found for the specific region, default to 'National'
    if not projection_factors:
        # print(f"No projection factors found for {loc}, {fuel}, {policy_scenario}. Defaulting to 'National'.")
        projection_factors = factor_dict.get(('National', fuel, policy_scenario))
        
    if projection_factors:
        pass
        # print(f"Using projection factors for {loc if projection_factors else 'National'}, {fuel}, {policy_scenario}: {projection_factors}")
    else:
        # print(f"No projection factors found for 'National', {fuel}, {policy_scenario} either. Cannot project future prices.")
        return pd.Series()  # Return an empty Series if no factors are found

    future_prices = {}
    for year in range(2022, 2051):
        if projection_factors and year in projection_factors:
            factor = projection_factors[year]
            future_price = price_2022 * factor
            future_prices[f'{year}_fuelPrice_perkWh'] = future_price
            # print(f"Year: {year}, Factor: {factor}, Future Price: {future_price}")
        else:
            print(f"Missing factor for year {year} in {loc if projection_factors else 'National'}, {fuel}, {policy_scenario}. Skipping this year.")
    
    return pd.Series(future_prices)

In [9]:
def load_fuel_price_lookups(project_root, cpi_ratio_2023_2018, cpi_ratio_2023_2019, cpi_ratio_2023_2020, cpi_ratio_2023_2021, cpi_ratio_2023_2022):
    filename = 'fuel_prices_nominal.csv'
    relative_path = os.path.join(r"fuel_prices", filename)
    file_path = os.path.join(project_root, relative_path)
    df_fuelPrices_perkWh = pd.read_csv(file_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")

    # New units for the converted and inflated prices below
    # $USD-2023, PREVIOUSLY USED $USD-2021
    df_fuelPrices_perkWh['units'] = 'USD2022 per kWh'

    years = ['2018', '2019', '2020', '2021', '2022']

    # Take dataframe with nominal prices in their base units and convert to $/kWh equivalent
    # https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php
    for year in years:
        for index, row in df_fuelPrices_perkWh.iterrows():
            
            # Propane: (dollars per gallon) * (1 gallon propane/91,452 BTU) * (3412 BTU/1 kWh)
            if row['fuel_type'] == 'propane':
                df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/91452) * (3412/1)
            
            # Fuel Oil: (dollars/gallon) * (1 gallon heating oil/138,500 BTU) * (3412 BTU/1 kWh)
            elif row['fuel_type'] == 'fuelOil':
                df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/138500) * (3412/1)
            
            # Natural Gas: (dollars/cf) * (thousand cf/1000 cf) * (1 cf natural gas/1039 BTU) * (3412 BTU/1 kWh)
            elif row['fuel_type'] == 'naturalGas':
                # print("SENSITIVITY ANALYSIS: MAKING NATURAL GAS COSTS 3% LOWER")
                df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] * (1/1000) * (1/1039) * (3412/1) # * 0.97
            
            # Electricity: convert cents per kWh to $ per kWh
            elif row['fuel_type'] == 'electricity':
                # print("SENSITIVITY ANALYSIS: MAKING ELECTRICITY COSTS 3% LOWER")
                df_fuelPrices_perkWh.at[index, f'{year}_fuelPrice_perkWh'] = row[f'{year}_nominal_unit_price'] / 100 # * 0.97

    # Convert nominal dollars to real 2022 US dollars (USD2022)
    # $USD-2023, PREVIOUSLY USED $USD-2021
    df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2018_fuelPrice_perkWh'] * cpi_ratio_2023_2018
    df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2019_fuelPrice_perkWh'] * cpi_ratio_2023_2019
    df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2020_fuelPrice_perkWh'] * cpi_ratio_2023_2020
    df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2021_fuelPrice_perkWh'] * cpi_ratio_2023_2021
    df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] = df_fuelPrices_perkWh['2022_fuelPrice_perkWh'] * cpi_ratio_2023_2022

    # Original dictionary mapping census divisions to states
    map_states_census_divisions = {
        "New England": ["CT", "ME", "MA", "NH", "RI", "VT"],
        "Middle Atlantic": ["NJ", "NY", "PA"],
        "East North Central": ["IN", "IL", "MI", "OH", "WI"],
        "West North Central": ["IA", "KS", "MN", "MO", "NE", "ND", "SD"],
        "South Atlantic": ["DE", "DC", "FL", "GA", "MD", "NC", "SC", "VA", "WV"],
        "East South Central": ["AL", "KY", "MS", "TN"],
        "West South Central": ["AR", "LA", "OK", "TX"],
        "Mountain": ["AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY"],
        "Pacific": ["AK", "CA", "HI", "OR", "WA"]
    }

    # Reverse the mapping to create a state-to-census-division map
    state_to_census_division = {}
    for division, states in map_states_census_divisions.items():
        for state in states:
            state_to_census_division[state] = division

    # Function to map location to census division
    def map_location_to_census_division(location):
        if location in state_to_census_division:
            return state_to_census_division[location]
        return location

    # Apply the function to map locations using .loc
    df_fuelPrices_perkWh.loc[:, 'census_division'] = df_fuelPrices_perkWh['location_map'].apply(map_location_to_census_division)

    # Project Fuel Prices from 2022 to 2050
    filename = 'aeo_projections_2022_2050.xlsx'
    relative_path = os.path.join(r"projections", filename)
    file_path = os.path.join(project_root, relative_path)
    df_fuelPrices_projection_factors = pd.read_excel(io=file_path, sheet_name='fuel_price_factors_2022_2050')

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print(df_fuelPrices_projection_factors)

    # Convert the factors dataframe into a lookup dictionary including policy_scenario
    factor_dict = df_fuelPrices_projection_factors.set_index(['region', 'fuel_type', 'policy_scenario']).to_dict('index')

    # Pre-IRA policy_scenario: No Inflation Reduction Act
    # Pass the desired policy_scenario as a parameter when applying the function
    preIRA_projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict, 'No Inflation Reduction Act'), axis=1)

    # Concatenate the projected prices with the original DataFrame
    df_fuelPrices_perkWh_preIRA = pd.concat([df_fuelPrices_perkWh, preIRA_projected_prices_df], axis=1)

    # Create Fuel Price Lookup with the policy_scenario included
    preIRA_fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_preIRA, 'No Inflation Reduction Act')

    # IRA-Reference policy_scenario: AEO2023 Reference Case
    # Pass the desired policy_scenario as a parameter when applying the function
    iraRef_projected_prices_df = df_fuelPrices_perkWh.apply(lambda row: project_future_prices(row, factor_dict, 'AEO2023 Reference Case'), axis=1)

    # Concatenate the projected prices with the original DataFrame
    df_fuelPrices_perkWh_iraRef = pd.concat([df_fuelPrices_perkWh, iraRef_projected_prices_df], axis=1)

    # Create Fuel Price Lookup with the policy_scenario included
    iraRef_fuel_price_lookup = create_fuel_price_lookup(df_fuelPrices_perkWh_iraRef, 'AEO2023 Reference Case')

    return preIRA_fuel_price_lookup, iraRef_fuel_price_lookup

In [11]:
def load_cambium_lookup(project_root):
    # CAMBIUM 2021 FOR PRE-IRA SCENARIO
    filename = 'cambium21_midCase_ba.xlsx'
    relative_path = os.path.join(r"projections", filename)
    file_path = os.path.join(project_root, relative_path)
    df_cambium21_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='cambium21_midCase_ba')

    # print(f"""
    # Retrieved data for filename: {filename}
    # Located at filepath: {file_path}

    # Loading dataframe ...
    # Creating lookup dictionary for LRMER ...
    # -------------------------------------------------------------------------------------------------------
    # """)

    # Create a new DataFrame to store interpolated results
    interpolated_data = []

    # Group by 'scenario' and 'reeds_balancing_area'
    grouped = df_cambium21_margEmis_electricity.groupby(['scenario', 'state', 'reeds_balancing_area'])

    for (scenario, state, reeds_balancing_area), group in grouped:
        years = group['year'].values
        values = group['lrmer_co2e_kg_per_MWh'].values

        # Define a function for interpolation over the known points
        f = interp1d(years, values, kind='linear')

        # Generate years in 1-year increments between the minimum and maximum
        new_years = np.arange(years.min(), years.max() + 1)

        # Interpolate the values for these new years
        new_values = f(new_years)

        # Store the results
        interpolated_group = pd.DataFrame({
            'scenario': scenario,
            'state': state,
            'reeds_balancing_area': reeds_balancing_area,
            'year': new_years,
            'lrmer_co2e_kg_per_MWh': new_values
        })

        interpolated_data.append(interpolated_group)

    # Concatenate all the interpolated data into a single DataFrame
    df_cambium21_margEmis_electricity = pd.concat(interpolated_data).reset_index(drop=True)

    # Convert to $USD2023 per lb
    df_cambium21_margEmis_electricity['lrmer_co2e_ton_per_MWh'] = df_cambium21_margEmis_electricity['lrmer_co2e_kg_per_MWh'] * (1/1000)
    df_cambium21_margEmis_electricity['lrmer_co2e_ton_per_kWh'] = df_cambium21_margEmis_electricity['lrmer_co2e_kg_per_MWh'] * (1/1000) * (1/1000)

    # Create the nested lookup dictionary for mt CO2e per MWh
    emis_preIRA_cambium21_lookup = {}

    # Populate the dictionary
    for _, row in df_cambium21_margEmis_electricity.iterrows():
        outer_key = (row['scenario'], row['state'], row['reeds_balancing_area'])
        year = row['year']
        co2e_value = row['lrmer_co2e_ton_per_kWh']
        
        # Initialize the outer key if not already present
        if outer_key not in emis_preIRA_cambium21_lookup:
            emis_preIRA_cambium21_lookup[outer_key] = {}
        
        # Assign the year and co2e value in the inner dictionary
        emis_preIRA_cambium21_lookup[outer_key][year] = co2e_value
    
    # CAMBIUM 2021 FOR PRE-IRA SCENARIO
    filename = 'cambium22_allScenarios_ba.xlsx'
    relative_path = os.path.join(r"projections", filename)
    file_path = os.path.join(project_root, relative_path)
    df_cambium22_2024_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='cambium22_scenarios_2024_ba')

    # print(f"""
    # Retrieved data for filename: {filename}
    # Located at filepath: {file_path}

    # Loading dataframe ...
    # Creating lookup dictionary for 2024 LRMER ...
    # -------------------------------------------------------------------------------------------------------
    # """)

    # Convert to $USD2023 per lb
    df_cambium22_2024_margEmis_electricity['lrmer_co2e_ton_per_MWh'] = df_cambium22_2024_margEmis_electricity['lrmer_co2e_kg_per_MWh'] * (1/1000)
    df_cambium22_2024_margEmis_electricity['lrmer_co2e_ton_per_kWh'] = df_cambium22_2024_margEmis_electricity['lrmer_co2e_kg_per_MWh'] * (1/1000) * (1/1000)

    emis_IRA_2024_cambium22_lookup = {}

    # Populate the dictionary
    for _, row in df_cambium22_2024_margEmis_electricity.iterrows():
        outer_key = (row['scenario'], row['state'], row['reeds_balancing_area'])
        year = row['year']
        co2e_value = row['lrmer_co2e_ton_per_kWh']
        
        # Initialize the outer key if not already present
        if outer_key not in emis_IRA_2024_cambium22_lookup:
            emis_IRA_2024_cambium22_lookup[outer_key] = {}
        
        # Assign the year and co2e value in the inner dictionary
        emis_IRA_2024_cambium22_lookup[outer_key][year] = co2e_value
    
    # CAMBIUM 2023 FOR IRA REFERENCE SCENARIO
    filename = 'cambium23_allScenarios_ba.xlsx'
    relative_path = os.path.join(r"projections", filename)
    file_path = os.path.join(project_root, relative_path)
    df_cambium23_margEmis_electricity = pd.read_excel(io=file_path, sheet_name='cambium23_allScenarios_ba')

    # print(f"""
    # Retrieved data for filename: {filename}
    # Located at filepath: {file_path}

    # Loading dataframe ...
    # Creating lookup dictionary for 2025-2050 LRMER ...
    # -------------------------------------------------------------------------------------------------------
    # """)
    # Create a new DataFrame to store interpolated results
    interpolated_data = []

    # Group by 'scenario' and 'reeds_balancing_area'
    grouped = df_cambium23_margEmis_electricity.groupby(['scenario', 'state', 'reeds_balancing_area'])

    for (scenario, state, reeds_balancing_area), group in grouped:
        years = group['year'].values
        values = group['lrmer_co2e_kg_per_MWh'].values

        # Define a function for interpolation over the known points
        f = interp1d(years, values, kind='linear')

        # Generate years in 1-year increments between the minimum and maximum
        new_years = np.arange(years.min(), years.max() + 1)

        # Interpolate the values for these new years
        new_values = f(new_years)

        # Store the results
        interpolated_group = pd.DataFrame({
            'scenario': scenario,
            'state': state,
            'reeds_balancing_area': reeds_balancing_area,
            'year': new_years,
            'lrmer_co2e_kg_per_MWh': new_values
        })

        interpolated_data.append(interpolated_group)

    # Concatenate all the interpolated data into a single DataFrame
    df_cambium23_margEmis_electricity = pd.concat(interpolated_data).reset_index(drop=True)

    # Convert to $USD2023 per lb
    df_cambium23_margEmis_electricity['lrmer_co2e_ton_per_MWh'] = df_cambium23_margEmis_electricity['lrmer_co2e_kg_per_MWh'] * (1/1000)
    df_cambium23_margEmis_electricity['lrmer_co2e_ton_per_kWh'] = df_cambium23_margEmis_electricity['lrmer_co2e_kg_per_MWh'] * (1/1000) * (1/1000)

    # Create the nested lookup dictionary for mt CO2e per MWh
    emis_IRA_2025_2050_cambium23_lookup = {}

    # Populate the dictionary
    for _, row in df_cambium23_margEmis_electricity.iterrows():
        outer_key = (row['scenario'], row['state'], row['reeds_balancing_area'])
        year = row['year']
        co2e_value = row['lrmer_co2e_ton_per_kWh']
        
        # Initialize the outer key if not already present
        if outer_key not in emis_IRA_2025_2050_cambium23_lookup:
            emis_IRA_2025_2050_cambium23_lookup[outer_key] = {}
        
        # Assign the year and co2e value in the inner dictionary
        emis_IRA_2025_2050_cambium23_lookup[outer_key][year] = co2e_value
    
    return emis_preIRA_cambium21_lookup, emis_IRA_2024_cambium22_lookup, emis_IRA_2025_2050_cambium23_lookup

In [12]:
def load_emis_factors():
    emis_factor_co2e_naturalGas_ton_perkWh = 228.5 * (1/1000) * (1/1000)
    emis_factor_co2e_propane_ton_perkWh = 275.8 * (1/1000) * (1/1000)
    emis_factor_co2e_fuelOil_ton_perkWh = 303.9 * (1/1000) * (1/1000)

    return emis_factor_co2e_naturalGas_ton_perkWh, emis_factor_co2e_propane_ton_perkWh, emis_factor_co2e_fuelOil_ton_perkWh


In [13]:
def load_scc(cpi_ratio_2023_2020):
    epa_scc_usd2023_per_ton = 190 * cpi_ratio_2023_2020
    return epa_scc_usd2023_per_ton

In [14]:
def load_cpi_data(project_root):
    # Load the BLS Inflation Data
    filename = 'bls_cpiu_2005-2023.xlsx'
    relative_path = os.path.join(r"inflation_data", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")

    # Create a pandas dataframe
    df_bls_cpiu = pd.read_excel(file_path, sheet_name='bls_cpiu')

    df_bls_cpiu = pd.DataFrame({
        'year': df_bls_cpiu['Year'],
        'cpiu_annual': df_bls_cpiu['Annual']
    })

    # Obtain the Annual CPIU values for the years of interest
    bls_cpi_annual_2008 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2008)].item()
    bls_cpi_annual_2010 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2010)].item()
    bls_cpi_annual_2013 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2013)].item()
    bls_cpi_annual_2018 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2018)].item()
    bls_cpi_annual_2019 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2019)].item()
    bls_cpi_annual_2020 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2020)].item()
    bls_cpi_annual_2021 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2021)].item()
    bls_cpi_annual_2022 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2022)].item()
    bls_cpi_annual_2023 = df_bls_cpiu['cpiu_annual'].loc[(df_bls_cpiu['year'] == 2023)].item()

    # Precompute constant values
    cpi_ratio_2023_2023 = bls_cpi_annual_2023 / bls_cpi_annual_2023
    cpi_ratio_2023_2022 = bls_cpi_annual_2023 / bls_cpi_annual_2022
    cpi_ratio_2023_2021 = bls_cpi_annual_2023 / bls_cpi_annual_2021  # For EPA VSL (11.3M USD-2021)
    cpi_ratio_2023_2020 = bls_cpi_annual_2023 / bls_cpi_annual_2020  # For SCC
    cpi_ratio_2023_2019 = bls_cpi_annual_2023 / bls_cpi_annual_2019 
    cpi_ratio_2023_2018 = bls_cpi_annual_2023 / bls_cpi_annual_2018 
    cpi_ratio_2023_2013 = bls_cpi_annual_2023 / bls_cpi_annual_2013
    cpi_ratio_2023_2010 = bls_cpi_annual_2023 / bls_cpi_annual_2010
    cpi_ratio_2023_2008 = bls_cpi_annual_2023 / bls_cpi_annual_2008  # For EPA VSL and SCC

    return cpi_ratio_2023_2023, cpi_ratio_2023_2022, cpi_ratio_2023_2021, cpi_ratio_2023_2020, cpi_ratio_2023_2019, cpi_ratio_2023_2018, cpi_ratio_2023_2013, cpi_ratio_2023_2010, cpi_ratio_2023_2008

In [16]:
def load_dict_heating_equipment_cost(project_root):
    # Collect Capital Cost Data for different End-uses
    filename = "tare_retrofit_costs_cpi.xlsx"
    relative_path = os.path.join(r"retrofit_costs", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print("\n")

    df_heating_retrofit_costs = pd.read_excel(io=file_path, sheet_name='heating_costs')
    dict_heating_equipment_cost = df_heating_retrofit_costs.set_index(['technology', 'efficiency']).to_dict(orient='index')
    return dict_heating_equipment_cost

In [None]:
def load_dict_cooling_equipment_cost(project_root):
    # Collect Capital Cost Data for different End-uses
    filename = "tare_retrofit_costs_cpi.xlsx"
    relative_path = os.path.join(r"retrofit_costs", filename)
    file_path = os.path.join(project_root, relative_path)

    # print(f"Retrieved data for filename: {filename}")
    # print(f"Located at filepath: {file_path}")
    # print("\n")

    df_cooling_retrofit_costs = pd.read_excel(io=file_path, sheet_name='cooling_costs')
    dict_cooling_equipment_cost = df_cooling_retrofit_costs.set_index(['technology', 'efficiency']).to_dict(orient='index')
    return dict_cooling_equipment_cost

In [None]:
def write_TARE_results(df, output_file_path):
    # Have this in a standard format required for my data parsing functions
    os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
    df.to_csv(output_file_path)
