# Function Definitions

In [349]:
import pandas as pd
import numpy as np
from pathlib import Path

def list_available_columns(df):
    print(f"Dimensions of data: {df.shape}")
    print(f'optional_cols = [')
    for col in df.columns.to_list():
        print(f"\t\"{col}\",")
    print("\t]")
    
    return None


def take_differences_over_columns(base_data, simulation_results, base_scenario, columns_to_take_difference):
    differences = []
    simulation_results = simulation_results.copy(deep=True)
    # for each scenario in senarios minus historical 
    for scenario in list(set(simulation_results['Weather Scenario'].unique()) - set([base_scenario])):
        scenario_data = simulation_results[simulation_results['Weather Scenario'] == scenario].copy(deep=True)
        scenario_data = scenario_data.sort_values(by=['bldg_id', 'Year', 'Month']).reset_index(drop=True)
        base_data = base_data.sort_values(by=['bldg_id', 'Year', 'Month']).reset_index(drop=True)
        scenario_difference = scenario_data.copy(deep=True)
        scenario_difference[columns_to_take_difference] = scenario_data[columns_to_take_difference] - base_data[columns_to_take_difference]
        differences.append(scenario_difference)

    differences = pd.concat(differences)
    return differences

# Join buildstock metadata onto the simulation results 
### Choose which metadata fields to keep 

In [350]:
# join buildstock metadata onto the simulation results 
buildstock_file = Path("/Users/camilotoruno/Documents/local_research_data/buildings_LA_Detroit/buildstock.csv")
simulation_file = Path("/Users/camilotoruno/Documents/local_research_data/simulations_five_cities/results_summary.csv")

simulation_results = pd.read_csv(simulation_file)
buildstock = pd.read_csv(buildstock_file)

print("Buildstock available columns:")
list_available_columns(buildstock)

Buildstock available columns:
Dimensions of data: (739, 158)
optional_cols = [
	"bldg_id",
	"upgrade",
	"weight",
	"applicability",
	"in.sqft",
	"in.ahs_region",
	"in.ashrae_iecc_climate_zone_2004",
	"in.ashrae_iecc_climate_zone_2004_2_a_split",
	"in.bathroom_spot_vent_hour",
	"in.bedrooms",
	"in.building_america_climate_zone",
	"in.cec_climate_zone",
	"in.ceiling_fan",
	"in.census_division",
	"in.census_division_recs",
	"in.census_region",
	"in.city",
	"in.clothes_dryer",
	"in.clothes_washer",
	"in.clothes_washer_presence",
	"in.cooking_range",
	"in.cooling_setpoint",
	"in.cooling_setpoint_has_offset",
	"in.cooling_setpoint_offset_magnitude",
	"in.cooling_setpoint_offset_period",
	"in.corridor",
	"in.county",
	"in.county_and_puma",
	"in.dehumidifier",
	"in.dishwasher",
	"in.door_area",
	"in.doors",
	"in.ducts",
	"in.eaves",
	"in.electric_vehicle",
	"in.emissions_electricity_folders",
	"in.emissions_electricity_units",
	"in.emissions_electricity_values_or_filepaths",
	"in.emissions_fos

### Keep desired columns of buildstock metadata
Copy and paste the available optional columns below, then delete any columns you don't want saved with the table

In [351]:
buildstock_keep_columns = [
	"bldg_id",
	"in.cec_climate_zone",
	"in.city",
	"in.cooling_setpoint",
	"in.federal_poverty_level",
	"in.heating_fuel",
	"in.heating_setpoint",
	"in.hvac_cooling_efficiency",
	"in.hvac_cooling_type",
	"in.hvac_heating_efficiency",
	"in.hvac_heating_type",
	"in.income",
	"in.state",
	"in.income_recs_2015",
	"in.income_recs_2020",
	"in.iso_rto_region",
	"in.location_region",
	"in.occupants",
	"in.schedules",
	"in.vintage",
	]

buildstock = buildstock[buildstock_keep_columns]

print("Simulations results available columns:")
list_available_columns(simulation_results)

Simulations results available columns:
Dimensions of data: (1005408, 44)
optional_cols = [
	"Month",
	"Environment:Site Outdoor Air Drybulb Temperature [C](Monthly)",
	"Environment:Site Outdoor Air Wetbulb Temperature [C](Monthly)",
	"Heating:EnergyTransfer [J](Monthly)",
	"Heating:EnergyTransfer:Zone:CENTRAL AC AND FURNACE AIRLOOP RET AIR ZONE [J](Monthly)",
	"Heating:EnergyTransfer:Zone:LIVING SPACE [J](Monthly)",
	"Heating:NaturalGas [J](Monthly)",
	"Heating:Electricity [J](Monthly)",
	"Cooling:EnergyTransfer [J](Monthly)",
	"Cooling:EnergyTransfer:Zone:CENTRAL AC AND FURNACE AIRLOOP RET AIR ZONE [J](Monthly)",
	"Cooling:EnergyTransfer:Zone:LIVING SPACE [J](Monthly)",
	"Cooling:Electricity [J](Monthly)",
	"Electricity:Facility [J](Monthly)",
	"NaturalGas:Facility [J](Monthly)",
	"ElectricityPurchased:Facility [J](Monthly)",
	"ElectricitySurplusSold:Facility [J](Monthly)",
	"ElectricityNet:Facility [J](Monthly)",
	"bldg_id",
	"Year",
	"Weather Scenario",
	"DistrictHeating:Facility [J

### Choose the columns from the simulation results to keep 
Copy and paste the available optional columns below, then delete any columns you don't want. You can reorganize the list of columns if you'd like to reorganize them. 

In [352]:
optional_cols = [
	"bldg_id",
	"Year",
	"Month",
	"Weather Scenario",

	"ElectricityPurchased:Facility [J](Monthly)",
	"ElectricitySurplusSold:Facility [J](Monthly)",
	"ElectricityNet:Facility [J](Monthly)",

	"Heating:NaturalGas [J](Monthly)",
	"Heating:Electricity [J](Monthly)",
	"Heating:DistrictHeating [J](Monthly)",
	"Heating:Propane [J](Monthly)",
	"Heating:FuelOilNo2 [J](Monthly)",
    
	"Cooling:Electricity [J](Monthly)",
	"Cooling:DistrictCooling [J](Monthly)",
	]

simulation_results = simulation_results[optional_cols]
simulation_results['Month'] = simulation_results['Month'].str.strip()		# strip empty space from month strings

import calendar
month_dictionary = {month: index for index, month in enumerate(calendar.month_name) if month}	# create month name to number dictionary
if sum(simulation_results['Month'].isin(list(month_dictionary.keys()))) > 0:
    simulation_results['Month'] = simulation_results['Month'].map(month_dictionary)



### Join buildstock onto simulations

In [353]:
simulation_results = pd.merge(simulation_results, buildstock, how='left', on="bldg_id", 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# Cost Calculations
#### If pricing data not yet averaged and tabularized for joining onto simulation, perform that using pricing_data_cleaning.ipynb
#### Load data

In [354]:
pricing_directory = Path("/Volumes/seas-mtcraig/data_sharing/Energy Burdens Under Climate Change/Energy rates")

nat_gas_pricing_output_table_file = Path(pricing_directory, "Natural gas/natural_gas_pricing.csv")
fuel_oil_no2_pricing_file = Path(pricing_directory, "Oil/Monthly_No._2_Heating_Oil_Residential_Price_interpolated.csv") 
elec_pricing_file = Path(pricing_directory, "Electricity/aggregate_data_By_(City,_Month).csv")
propane_pricing_file = Path(pricing_directory, "Propane/Monthly_Propane_Price_Interpolated.csv") 

propane_pricing = pd.read_csv(propane_pricing_file)
electricity_pricing = pd.read_csv(elec_pricing_file)
fuel_oil_no2_pricing = pd.read_csv(fuel_oil_no2_pricing_file)
natural_gas_pricing = pd.read_csv(nat_gas_pricing_output_table_file)

### Join the pricing data onto simulation results 

In [355]:
# Join the electricity pricing on the simulation data 
simulation_results['in.city'] = simulation_results['in.city'].str.split(', ').str[1]
simulation_results = pd.merge(simulation_results, electricity_pricing, how='left', left_on=['in.city', 'Month'], right_on=['City', 'Month'], 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# Join the natural gas pricing on the simulation data
simulation_results = pd.merge(simulation_results, natural_gas_pricing, how='left', left_on=['in.state', 'Month'], right_on=['State', 'Month'], 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# Join the fuel_oil_no2_pricing pricing on the simulation data
simulation_results = pd.merge(simulation_results, fuel_oil_no2_pricing, how='left', left_on=['Month'], right_on=['Month'], 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# Join the propoane pricing on the simulation data 
simulation_results = pd.merge(simulation_results, propane_pricing, how='left', left_on=['in.state', 'Month'], right_on=['State', 'Month'], 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

### Unit Conversions

In [356]:
# Unit coversions 
j_to_kwh = 3600000 # Joule / kWh electricity https://www.rapidtables.com/convert/energy/Joule_to_kWh.html

# natural gas 
Gj_to_Mcf = 1.0551 # https://www.naturalgasintel.com/natural-gas-converter/
j_to_Mcf = 1e9 * Gj_to_Mcf  

# Propane 
propane_btu_per_gallon = 91452 # Btu #  https://www.eia.gov/energyexplained/units-and-calculators/
j_per_btu = 1055.05585262 # J / BTU
j_per_gallon_propane = propane_btu_per_gallon * j_per_btu

# number 2 fuel oil   
j_per_gallon_no2_fuel_oil = 146520000      # https://www.convertunits.com/from/gallon+[U.S.]+of+distillate+no.+2+fuel+oil/to/joule

# Electricity 
simulation_results['Cooling:Electricity [kWh](Monthly)'] = simulation_results['Cooling:Electricity [J](Monthly)'] / j_to_kwh
simulation_results['Heating:Electricity [kWh](Monthly)'] = simulation_results['Heating:Electricity [J](Monthly)'] / j_to_kwh
simulation_results['ElectricityPurchased:Facility [kWh](Monthly)'] = simulation_results['ElectricityPurchased:Facility [J](Monthly)'] / j_to_kwh

# Natural Gas
simulation_results['Heating:NaturalGas [Mcf](Monthly)'] = simulation_results['Heating:NaturalGas [J](Monthly)'] / j_to_Mcf

# Propane
simulation_results['Heating:Propane [Gal](Monthly)'] = simulation_results['Heating:Propane [J](Monthly)'] / j_per_gallon_propane

# Fuel oil 
simulation_results['Heating:FuelOilNo2 [Gal](Monthly)'] = simulation_results['Heating:FuelOilNo2 [J](Monthly)'] / j_per_gallon_no2_fuel_oil

### Cost Calculuations

In [357]:
# Electricity
simulation_results['Cost Cooling:Electricity [$](Monthly)'] = simulation_results['Cooling:Electricity [kWh](Monthly)'] * simulation_results["Mean(Price Electricity ($/kWh))"]
simulation_results['Cost Heating:Electricity [$](Monthly)'] = simulation_results['Heating:Electricity [kWh](Monthly)'] * simulation_results["Mean(Price Electricity ($/kWh))"]
simulation_results['Cost ElectricityPurchased:Facility [$](Monthly)'] = simulation_results['ElectricityPurchased:Facility [kWh](Monthly)'] * simulation_results["Mean(Price Electricity ($/kWh))"]

# Natural gas 
simulation_results['Cost Heating:NaturalGas [$](Monthly)'] = simulation_results['Heating:NaturalGas [Mcf](Monthly)'] * simulation_results["Price of Natural Gas Delivered to Residential Consumers (Dollars per Thousand Cubic Feet)"]

# Propane
simulation_results['Cost Heating:Propane [$](Monthly)'] = simulation_results['Heating:Propane [Gal](Monthly)'] * simulation_results['Monthly U.S. Propane Residential Price (Dollars per Gallon)']

# Fuel oil 
simulation_results['Cost Heating:FuelOilNo2 [$](Monthly)'] = simulation_results['Heating:FuelOilNo2 [Gal](Monthly)'] * simulation_results['Monthly No. 2 Heating Oil Residential Price Dollars per Gallon']

#### Define which columns to sum for total heating and cooling costs 

In [358]:
list_available_columns(simulation_results)

Dimensions of data: (1005408, 53)
optional_cols = [
	"bldg_id",
	"Year",
	"Month",
	"Weather Scenario",
	"ElectricityPurchased:Facility [J](Monthly)",
	"ElectricitySurplusSold:Facility [J](Monthly)",
	"ElectricityNet:Facility [J](Monthly)",
	"Heating:NaturalGas [J](Monthly)",
	"Heating:Electricity [J](Monthly)",
	"Heating:DistrictHeating [J](Monthly)",
	"Heating:Propane [J](Monthly)",
	"Heating:FuelOilNo2 [J](Monthly)",
	"Cooling:Electricity [J](Monthly)",
	"Cooling:DistrictCooling [J](Monthly)",
	"in.cec_climate_zone",
	"in.city",
	"in.cooling_setpoint",
	"in.federal_poverty_level",
	"in.heating_fuel",
	"in.heating_setpoint",
	"in.hvac_cooling_efficiency",
	"in.hvac_cooling_type",
	"in.hvac_heating_efficiency",
	"in.hvac_heating_type",
	"in.income",
	"in.state",
	"in.income_recs_2015",
	"in.income_recs_2020",
	"in.iso_rto_region",
	"in.location_region",
	"in.occupants",
	"in.schedules",
	"in.vintage",
	"City",
	"N Rows",
	"Mean(Price Electricity ($/kWh))",
	"State_x",
	"Price of Natur

In [359]:
heating_cols =	[
	"Cost Heating:Electricity [$](Monthly)",
	"Cost Heating:NaturalGas [$](Monthly)",
	"Cost Heating:Propane [$](Monthly)",
	"Cost Heating:FuelOilNo2 [$](Monthly)",
    ]

cooling_cols =	[
	"Cost Cooling:Electricity [$](Monthly)",
    ]

simulation_results['Total Cost Heating [$](Monthly)'] = simulation_results[heating_cols].fillna(0).sum(axis=1)      # replace NaN values with zero and sum all heating columns
simulation_results['Total Cost Cooling [$](Monthly)']  = simulation_results[cooling_cols].fillna(0).sum(axis=1)		# replace NaN values with zero and sum all cooling columns


total_cost_columns = ['Total Cost Heating [$](Monthly)', 'Total Cost Cooling [$](Monthly)']
simulation_results['Total Cost Space Conditioning [$](Monthly)'] = simulation_results[total_cost_columns].sum(axis=1) 


""" 
Why do nearly half the months have zero total space conditioning cost  	?????????????????????
"""
print(sum(simulation_results['Total Cost Space Conditioning [$](Monthly)'] > 0) / len(simulation_results))

0.33854017473503295


### Energy Burdens Calculations
Also Calculate More Granular Income

In [360]:
# Convert annual income bins to ranges
# Split the column on the delimiter - or < or >, replace empty cells with 0, and cast as integer
simulation_results[["Income - Low [Annual]", "Income - High [Annual]"]] = simulation_results["in.income"].str.split(expand=True, pat='[-<>]').replace('', 0).astype(int)  

# High monthly income = high end of income / 12 
simulation_results[["Income - Low [Monthly]", "Income - High [Monthly]"]] = simulation_results[["Income - Low [Annual]", "Income - High [Annual]"]] / 12 

# High income energy burden = total monthly energy costs / high monthly income 
# Low income energy burden = total monthly energy costs / low monthly income 
simulation_results["Energy Burden - Low [Monthly]"] = simulation_results['Total Cost Space Conditioning [$](Monthly)'] / simulation_results["Income - Low [Monthly]"]
simulation_results["Energy Burden - High [Monthly]"] = simulation_results['Total Cost Space Conditioning [$](Monthly)'] / simulation_results["Income - High [Monthly]"]
simulation_results  = simulation_results.replace([np.inf, -np.inf], '')   # replace infinity values with empty values

# simulation_results[["Energy Burden - Low [Monthly]", "Energy Burden - High [Monthly]"]] 

### Calculate More Granular Income
# Normalize income by number of residents
simulation_results["Income per Occupant -  High [Annual]"] = simulation_results["Income - High [Annual]"] / simulation_results["in.occupants"]
# simulation_results["Income per Occupant -  High [Annual]"]

# Take differences across scenarios
### Define which columns to take difference over
Copy and paste the available optional columns below, then delete any columns you don't want differenced with the base scenario
### Take differences over those columns between scenarios 

In [361]:
list_available_columns(simulation_results)

Dimensions of data: (1005408, 63)
optional_cols = [
	"bldg_id",
	"Year",
	"Month",
	"Weather Scenario",
	"ElectricityPurchased:Facility [J](Monthly)",
	"ElectricitySurplusSold:Facility [J](Monthly)",
	"ElectricityNet:Facility [J](Monthly)",
	"Heating:NaturalGas [J](Monthly)",
	"Heating:Electricity [J](Monthly)",
	"Heating:DistrictHeating [J](Monthly)",
	"Heating:Propane [J](Monthly)",
	"Heating:FuelOilNo2 [J](Monthly)",
	"Cooling:Electricity [J](Monthly)",
	"Cooling:DistrictCooling [J](Monthly)",
	"in.cec_climate_zone",
	"in.city",
	"in.cooling_setpoint",
	"in.federal_poverty_level",
	"in.heating_fuel",
	"in.heating_setpoint",
	"in.hvac_cooling_efficiency",
	"in.hvac_cooling_type",
	"in.hvac_heating_efficiency",
	"in.hvac_heating_type",
	"in.income",
	"in.state",
	"in.income_recs_2015",
	"in.income_recs_2020",
	"in.iso_rto_region",
	"in.location_region",
	"in.occupants",
	"in.schedules",
	"in.vintage",
	"City",
	"N Rows",
	"Mean(Price Electricity ($/kWh))",
	"State_x",
	"Price of Natur

In [362]:
columns_to_take_difference = [
	"ElectricityPurchased:Facility [J](Monthly)",
	"ElectricitySurplusSold:Facility [J](Monthly)",
	"ElectricityNet:Facility [J](Monthly)",
	"Heating:NaturalGas [J](Monthly)",
	"Heating:Electricity [J](Monthly)",
	"Heating:DistrictHeating [J](Monthly)",
	"Heating:Propane [J](Monthly)",
	"Heating:FuelOilNo2 [J](Monthly)",
	"Cooling:Electricity [J](Monthly)",
	"Cooling:DistrictCooling [J](Monthly)",
	"Cooling:Electricity [kWh](Monthly)",
	"Heating:Electricity [kWh](Monthly)",
	"ElectricityPurchased:Facility [kWh](Monthly)",
	"Heating:NaturalGas [Mcf](Monthly)",
	"Heating:Propane [Gal](Monthly)",
	"Heating:FuelOilNo2 [Gal](Monthly)",
	"Cost Cooling:Electricity [$](Monthly)",
	"Cost Heating:Electricity [$](Monthly)",
	"Cost ElectricityPurchased:Facility [$](Monthly)",
	"Cost Heating:NaturalGas [$](Monthly)",
	"Cost Heating:Propane [$](Monthly)",
	"Cost Heating:FuelOilNo2 [$](Monthly)",
	"Total Cost Heating [$](Monthly)",
	"Total Cost Cooling [$](Monthly)",
	"Total Cost Space Conditioning [$](Monthly)",
	"Energy Burden - Low [Monthly]",
	"Energy Burden - High [Monthly]",
    ]

base_scenario = "historical_1980-2020"

simulation_results[columns_to_take_difference] = simulation_results[columns_to_take_difference].replace('', 0).astype(float)
base_scenario_data = simulation_results[simulation_results['Weather Scenario'] == base_scenario].copy(deep=True)
differences = take_differences_over_columns(base_scenario_data, simulation_results, base_scenario, columns_to_take_difference)


### Save data for analysis

In [363]:
output_folder = Path("/Users/camilotoruno/Documents/local_research_data/simulations_five_cities")

simulation_results.to_csv(Path(output_folder, "simulations_costs.csv"), index=False)
differences.to_csv(Path(output_folder, "differences.csv"), index=False)