# Function Definitions

In [110]:
import pandas as pd
from pathlib import Path

def list_available_columns(df):
    print(f"Dimensions of data: {df.shape}")
    print(f'optional_cols = [')
    for col in df.columns.to_list():
        print(f"\t\"{col}\",")
    print("\t]")


def tabularize_natural_gas(nat_gas_pricing_file, nat_gas_pricing_table_file):
    natural_gas_pricing = pd.read_csv(nat_gas_pricing_file)
    # set the column names for the natural gas 
    national_and_cities_columns = set(natural_gas_pricing.columns) - set("Date")  
    dataframes_list = []
    for column in national_and_cities_columns: 
        for state in us_state_to_abbrev.keys():
            if state in column: 
                # print(f"state: {state} column: {column}")
                temp_df = pd.DataFrame()
                temp_df["Month"] = natural_gas_pricing["Month"].to_list()
                temp_df["State"] = us_state_to_abbrev.get(state)
                temp_df['Price of Natural Gas Delivered to Residential Consumers (Dollars per Thousand Cubic Feet)'] = natural_gas_pricing[state].to_list()
                dataframes_list.append(temp_df)
                del temp_df

    natural_gas_pricing = pd.concat(dataframes_list)
    natural_gas_pricing.to_csv(nat_gas_pricing_table_file, index=False)



def take_differences_over_columns(base_data, simulation_results, base_scenario, columns_to_take_difference):
    columns_to_NOT_take_difference = list(set(base_data.columns) - set(columns_to_take_difference))
    differences = []
    simulation_results = simulation_results.copy(deep=True)
    # for each scenario in senarios minus historical 
    for scenario in list(set(simulation_results['Weather Scenario'].unique()) - set([base_scenario])):
        scenario_data = simulation_results[simulation_results['Weather Scenario'] == scenario].copy(deep=True)

        scenario_data = scenario_data.sort_values(by=['bldg_id', 'Year', 'Month']).reset_index(drop=True)
        base_data = base_data.sort_values(by=['bldg_id', 'Year', 'Month']).reset_index(drop=True)

        print(f"scenario_data.iloc[:,1]: \n{scenario_data.iloc[:,1]}")
        print(f"base_data.iloc[:,1]: \n{base_data.iloc[:,1]}")
        print(f"base_data.iloc[:,1] - base_data.iloc[:,1]: \n{base_data.iloc[:,1] - base_data.iloc[:,1]}")
        print(f"scenario_data.iloc[:,1] - base_data.iloc[:,1]: \n{scenario_data.iloc[:,1] - base_data.iloc[:,1]}")
        print(f"scenario_data.iloc[:,1] - scenario_data.iloc[:,1]: \n{scenario_data.iloc[:,1] - scenario_data.iloc[:,1]}")

        scenario_difference = scenario_data.copy(deep=True)
        print(scenario_difference[columns_to_NOT_take_difference])
        scenario_difference[columns_to_take_difference] = scenario_data[columns_to_take_difference] - base_data[columns_to_take_difference]

        # # for each building 
        # for bldg in scenario_data['bldg_id'].unique():
        #     scenario_data = scenario_data[scenario_data['bldg_id'] == bldg]
        #     print(bldg)
        #     for year in scenario_data['Year'].unique():
        #         scenario_data = scenario_data[scenario_data['Year'] == year]

        #         for month in scenario_data['Month'].unique():
        #             scenario_bldg_year_month = scenario_data[scenario_data['Month'] == month]

        # # for each column of data to take difference 
        # for column in set(columns_to_take_difference):
        #     scenario_bldg_year_month.loc[scenario_bldg_year_month['bldg_id'] == bldg, column] = scenario_bldg_year_month.loc[scenario_bldg_year_month['bldg_id'] == bldg, column] - base_data.loc[base_data['bldg_id'] == bldg, column]
                        
        #                 # print(f"scenario_bldg_year_month.loc[scenario_bldg_year_month['bldg_id'] == {bldg}, {column}]: \n{scenario_bldg_year_month.loc[scenario_bldg_year_month['bldg_id'] == bldg, column]}")
        #                 # print(f"base_data.loc[base_data['bldg_id'] == {bldg}, {column}]: \n{base_data.loc[base_data['bldg_id'] == bldg, column]}")
        #                 # print(f"scenario_bldg_year_month.loc[scenario_bldg_year_month['bldg_id'] == {bldg}, {column}]: \n{scenario_bldg_year_month.loc[scenario_bldg_year_month['bldg_id'] == bldg, column]}")

        #             counter +=1
        #             # append differences for scenaario (and all building)
        #             differences.append(scenario_bldg_year_month)

        differences.append(scenario_difference)

    print(f"len(differences): {len(differences)}")
    differences = pd.concat(differences)
    return differences


# United States of America Python Dictionary to translate States,
# Districts & Territories to Two-Letter codes and vice versa.
#
# Canonical URL: https://gist.github.com/rogerallen/1583593
#
# Dedicated to the public domain.  To the extent possible under law,
# Roger Allen has waived all copyright and related or neighboring
# rights to this code.  Data originally from Wikipedia at the url:
# https://en.wikipedia.org/wiki/ISO_3166-2:US
#
# Automatically Generated 2021-09-11 18:04:36 via Jupyter Notebook from
# https://gist.github.com/rogerallen/d75440e8e5ea4762374dfd5c1ddf84e0 

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}
    
# invert the dictionary
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))


# Join buildstock metadata onto the simulation results 
### Choose which metadata fields to keep 

In [111]:
# join buildstock metadata onto the simulation results 
buildstock_file = Path("/Users/camilotoruno/Documents/local_research_data/buildings_LA_Detroit/buildstock.csv")
simulation_file = Path("/Users/camilotoruno/Documents/local_research_data/simulations_LA_Detroit/results_summary.csv")

simulation_results = pd.read_csv(simulation_file)
buildstock = pd.read_csv(buildstock_file)

print("Buildstock available columns:")
list_available_columns(buildstock)

Buildstock available columns:
Dimensions of data: (345, 158)
optional_cols = [
	"bldg_id",
	"upgrade",
	"weight",
	"applicability",
	"in.sqft",
	"in.ahs_region",
	"in.ashrae_iecc_climate_zone_2004",
	"in.ashrae_iecc_climate_zone_2004_2_a_split",
	"in.bathroom_spot_vent_hour",
	"in.bedrooms",
	"in.building_america_climate_zone",
	"in.cec_climate_zone",
	"in.ceiling_fan",
	"in.census_division",
	"in.census_division_recs",
	"in.census_region",
	"in.city",
	"in.clothes_dryer",
	"in.clothes_washer",
	"in.clothes_washer_presence",
	"in.cooking_range",
	"in.cooling_setpoint",
	"in.cooling_setpoint_has_offset",
	"in.cooling_setpoint_offset_magnitude",
	"in.cooling_setpoint_offset_period",
	"in.corridor",
	"in.county",
	"in.county_and_puma",
	"in.dehumidifier",
	"in.dishwasher",
	"in.door_area",
	"in.doors",
	"in.ducts",
	"in.eaves",
	"in.electric_vehicle",
	"in.emissions_electricity_folders",
	"in.emissions_electricity_units",
	"in.emissions_electricity_values_or_filepaths",
	"in.emissions_fos

### Keep desired columns of buildstock metadata
Copy and paste the available optional columns below, then delete any columns you don't want saved with the table

In [112]:
buildstock_keep_columns = [
	"bldg_id",
	"in.cec_climate_zone",
	"in.city",
	"in.cooling_setpoint",
	"in.federal_poverty_level",
	"in.heating_fuel",
	"in.heating_setpoint",
	"in.hvac_cooling_efficiency",
	"in.hvac_cooling_type",
	"in.hvac_heating_efficiency",
	"in.hvac_heating_type",
	"in.income",
	"in.state",
	"in.income_recs_2015",
	"in.income_recs_2020",
	"in.iso_rto_region",
	"in.location_region",
	"in.occupants",
	"in.schedules",
	"in.vintage",
	]

buildstock = buildstock[buildstock_keep_columns]

print("Simulations results available columns:")
list_available_columns(simulation_results)

Simulations results available columns:
Dimensions of data: (331200, 38)
optional_cols = [
	"Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)",
	"Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly)",
	"Heating:EnergyTransfer [J](Hourly)",
	"Heating:EnergyTransfer:Zone:CENTRAL AC AND FURNACE AIRLOOP RET AIR ZONE [J](Hourly)",
	"Heating:EnergyTransfer:Zone:LIVING SPACE [J](Hourly)",
	"Heating:NaturalGas [J](Hourly)",
	"Heating:Electricity [J](Hourly)",
	"Cooling:EnergyTransfer [J](Hourly)",
	"Cooling:EnergyTransfer:Zone:CENTRAL AC AND FURNACE AIRLOOP RET AIR ZONE [J](Hourly)",
	"Cooling:EnergyTransfer:Zone:LIVING SPACE [J](Hourly)",
	"Cooling:Electricity [J](Hourly)",
	"Electricity:Facility [J](Hourly)",
	"NaturalGas:Facility [J](Hourly)",
	"Date",
	"Month",
	"Day",
	"bldg_id",
	"Year",
	"Weather Scenario",
	"Cooling:EnergyTransfer:Zone:FURNACE AIRLOOP RET AIR ZONE [J](Hourly)",
	"Heating:EnergyTransfer:Zone:FURNACE AIRLOOP RET AIR ZONE [J](Hourly)",
	"Cooling:Distric

### Choose the columns from the simulation results to keep 
Copy and paste the available optional columns below, then delete any columns you don't want
### Join buildstock onto simulations

In [113]:
optional_cols = [
	"Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)",
	"Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly)",
	"Heating:NaturalGas [J](Hourly)",
	"Heating:Electricity [J](Hourly)",
	"Cooling:Electricity [J](Hourly)",
	"Electricity:Facility [J](Hourly)",
	"NaturalGas:Facility [J](Hourly)",
	"Month",
	"bldg_id",
	"Year",
	"Weather Scenario",
	"Cooling:DistrictCooling [J](Hourly)",
	"Heating:DistrictHeating [J](Hourly)",
	"Heating:Propane [J](Hourly)",
	]

simulation_results = simulation_results[optional_cols]

simulation_results = pd.merge(simulation_results, buildstock, how='left', on="bldg_id", 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# Cost Calculations
#### If pricing data for natural gas not yet aggregated for joining onto simulation - tabluarize natural gas data
#### Load natural gas price table 

In [114]:
nat_gas_pricing_source_file = Path("/Volumes/seas-mtcraig/data_sharing/Energy Burdens Under Climate Change/Energy rates/Natural gas/NG_PRI_SUM_A_EPG0_PRS_DMCF_M By (Month).csv")
nat_gas_pricing_output_table_file = Path("/Users/camilotoruno/Documents/local_research_data/simulations_LA_Detroit", "natural_gas_pricing.csv")


# # (If not done yet) Convert source pricing table (with some minor excel modifications) to table for merging on results 
# tabularize_natural_gas(nat_gas_pricing_source_file, nat_gas_pricing_output_table_file)     

# Load natural gas pricing table for merging onto results
natural_gas_pricing = pd.read_csv(nat_gas_pricing_output_table_file)

### Join the pricing data onto simulation results 

In [115]:
# Enter the pricing data files to join onto the simulation results
elec_pricing_file = Path("/Volumes/seas-mtcraig/data_sharing/Energy Burdens Under Climate Change/Energy rates/Electricity/aggregate_data_By_(City,_Month).csv")

propane_pricing_file = Path("")

# Join the electricity pricing on the simulation data 
electricity_pricing = pd.read_csv(elec_pricing_file)
simulation_results['in.city'] = simulation_results['in.city'].str.split(', ').str[1]
simulation_results = pd.merge(simulation_results, electricity_pricing, how='left', left_on=['in.city', 'Month'], right_on=['City', 'Month'], 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# Join the natural gas pricing on the simulation data
simulation_results = pd.merge(simulation_results, natural_gas_pricing, how='left', left_on=['in.state', 'Month'], right_on=['State', 'Month'], 
                                  left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

# # Join the propoane pricing on the simulation data 
# propane_pricing = pd.read_csv(propane_pricing_file)
# simulation_results = pd.merge(simulation_results, propane_pricing, how='left', left_on=['in.city', 'Month'], right_on=['City', 'Month'], 
#                                   left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

### Unit Conversions

In [116]:
# Unit coversions 
j_to_kwh = 36000 # Joule / kWh electricity
Gj_to_Mcf = 1.0551 # https://www.naturalgasintel.com/natural-gas-converter/

j_to_Mcf = 1e9 * Gj_to_Mcf

# Electricity 
simulation_results['Cooling:Electricity [kWh](Monthly)'] = simulation_results['Cooling:Electricity [J](Hourly)'] / j_to_kwh
simulation_results['Heating:Electricity [kWh](Monthly)'] = simulation_results['Heating:Electricity [J](Hourly)'] / j_to_kwh

# Natural Gas
simulation_results['Heating:NaturalGas [Mcf](Monthly)'] = simulation_results['Heating:NaturalGas [J](Hourly)'] / j_to_Mcf

# Propane

# Fuel oil 

### Cost Calculuations

In [117]:
# Electricity
simulation_results['Cost Cooling:Electricity [$](Monthly)'] = simulation_results['Cooling:Electricity [kWh](Monthly)'] * simulation_results["Mean(Price Electricity ($/kWh))"]
simulation_results['Cost Heating:Electricity [$](Monthly)'] = simulation_results['Heating:Electricity [kWh](Monthly)'] * simulation_results["Mean(Price Electricity ($/kWh))"]

# Natural gas 
simulation_results['Cost Heating:NaturalGas [$](Monthly)'] = simulation_results['Heating:NaturalGas [Mcf](Monthly)'] * simulation_results["Price of Natural Gas Delivered to Residential Consumers (Dollars per Thousand Cubic Feet)"]

# Propane

# Fuel oil 


# Take differences across scenarios
### Define which columns to take difference over
Copy and paste the available optional columns below, then delete any columns you don't want differenced with the base scenario
### Take differences over those columns between scenarios 

In [118]:
list_available_columns(simulation_results)

Dimensions of data: (331200, 44)
optional_cols = [
	"Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)",
	"Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly)",
	"Heating:NaturalGas [J](Hourly)",
	"Heating:Electricity [J](Hourly)",
	"Cooling:Electricity [J](Hourly)",
	"Electricity:Facility [J](Hourly)",
	"NaturalGas:Facility [J](Hourly)",
	"Month",
	"bldg_id",
	"Year",
	"Weather Scenario",
	"Cooling:DistrictCooling [J](Hourly)",
	"Heating:DistrictHeating [J](Hourly)",
	"Heating:Propane [J](Hourly)",
	"in.cec_climate_zone",
	"in.city",
	"in.cooling_setpoint",
	"in.federal_poverty_level",
	"in.heating_fuel",
	"in.heating_setpoint",
	"in.hvac_cooling_efficiency",
	"in.hvac_cooling_type",
	"in.hvac_heating_efficiency",
	"in.hvac_heating_type",
	"in.income",
	"in.state",
	"in.income_recs_2015",
	"in.income_recs_2020",
	"in.iso_rto_region",
	"in.location_region",
	"in.occupants",
	"in.schedules",
	"in.vintage",
	"City",
	"N Rows",
	"Mean(Price Electricity ($/kWh))",
	"Sta

In [119]:
columns_to_take_difference = [
    "Environment:Site Outdoor Air Drybulb Temperature [C](Hourly)",
	"Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly)",
	"Electricity:Facility [J](Hourly)",
	"NaturalGas:Facility [J](Hourly)",
	"Cooling:DistrictCooling [J](Hourly)",
	"Heating:DistrictHeating [J](Hourly)",
	"Heating:Propane [J](Hourly)",
	"Cooling:Electricity [kWh](Monthly)",
	"Heating:Electricity [kWh](Monthly)",
	"Heating:NaturalGas [Mcf](Monthly)",
	"Cost Cooling:Electricity [$](Monthly)",
	"Cost Heating:Electricity [$](Monthly)",
	"Cost Heating:NaturalGas [$](Monthly)",
]
base_scenario = "historical_1980-2020"

# print(base_scenario[columns_to_take_difference].subtract(base_scenario[columns_to_take_difference], fill_value = 0 ))

# df1 = pd.DataFrame([1, 2, 3])

# print(df1.subtract(df1))
simulation_results[columns_to_take_difference] = simulation_results[columns_to_take_difference].astype(float)
base_scenario_data = simulation_results[simulation_results['Weather Scenario'] == base_scenario].copy(deep=True)
differences = take_differences_over_columns(base_scenario_data, simulation_results, base_scenario, columns_to_take_difference)


scenario_data.iloc[:,1]: 
0         11.602538
1         12.131902
2         11.676407
3         12.504233
4         13.445647
            ...    
165595    18.330496
165596    18.503205
165597    15.076788
165598    14.175961
165599    12.899745
Name: Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly), Length: 165600, dtype: float64
base_data.iloc[:,1]: 
0         10.927370
1         11.555815
2         10.971812
3         11.844596
4         12.440647
            ...    
165595    17.710322
165596    17.616457
165597    14.145188
165598    12.717995
165599    11.339611
Name: Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly), Length: 165600, dtype: float64
base_data.iloc[:,1] - base_data.iloc[:,1]: 
0         0.0
1         0.0
2         0.0
3         0.0
4         0.0
         ... 
165595    0.0
165596    0.0
165597    0.0
165598    0.0
165599    0.0
Name: Environment:Site Outdoor Air Wetbulb Temperature [C](Hourly), Length: 165600, dtype: float64
scenario_data.ilo

### Save data for analysis

In [120]:
output_folder = Path("/Users/camilotoruno/Documents/local_research_data/simulations_LA_Detroit")
simulation_results.to_csv(Path(output_folder, "simulations_costs.csv"), index=False)
differences.to_csv(Path(output_folder, "differences.csv"), index=False)