In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam


In [2]:
from data_preprocessing import process_resstock_data
from dataset import ResStockSetpointPredictionDataset

from models import SetpointPredictionMLPModel

# Load data

### Setpoint data

In [3]:
# Load full heating and cooling timeseries
with open('/Users/adrian/Documents/ICAI/TFG/Space-Conditioning-Electrification/data/consumption/full_heating_timeseries.pkl', 'rb') as f:
    full_heating_timeseries = pickle.load(f)
with open('/Users/adrian/Documents/ICAI/TFG/Space-Conditioning-Electrification/data/consumption/full_cooling_timeseries.pkl', 'rb') as f:
    full_cooling_timeseries = pickle.load(f)

In [4]:
heating_timeseries = full_heating_timeseries[1000] # Builiding 1000
cooling_timeseries = full_cooling_timeseries[1000]

In [5]:
heating_timeseries = pd.DataFrame(heating_timeseries)
cooling_timeseries = pd.DataFrame(cooling_timeseries)

In [6]:
heating_timeseries.head()

Unnamed: 0_level_0,setpoint
timestamp,Unnamed: 1_level_1
2018-01-01 00:00:00,76.0
2018-01-01 01:00:00,76.0
2018-01-01 02:00:00,76.0
2018-01-01 03:00:00,76.0
2018-01-01 04:00:00,76.0


### Weather data

In [7]:
weather_data = pd.read_csv("/Users/adrian/Documents/ICAI/TFG/Space-Conditioning-Electrification/data/consumption/G2500170_2018.csv")

In [8]:
weather_data.head()

Unnamed: 0,date_time,Dry Bulb Temperature [°C],Relative Humidity [%],Wind Speed [m/s],Wind Direction [Deg],Global Horizontal Radiation [W/m2],Direct Normal Radiation [W/m2],Diffuse Horizontal Radiation [W/m2]
0,2018-01-01 01:00:00,-17.2,47.0,7.2,310.0,0.0,0.0,0.0
1,2018-01-01 02:00:00,-17.8,46.83,6.7,300.0,0.0,0.0,0.0
2,2018-01-01 03:00:00,-18.9,55.19,2.1,260.0,0.0,0.0,0.0
3,2018-01-01 04:00:00,-18.9,51.97,4.1,280.0,0.0,0.0,0.0
4,2018-01-01 05:00:00,-19.4,54.51,3.6,280.0,0.0,0.0,0.0


### ResStock

In [9]:
resstock = pd.read_parquet("/Users/adrian/Documents/ICAI/TFG/Space-Conditioning-Electrification/data/consumption/MA_baseline_metadata_and_annual_results.parquet")

In [10]:
resstock.head()

Unnamed: 0_level_0,upgrade,weight,applicability,in.sqft,in.representative_income,in.ahs_region,in.aiannh_area,in.area_median_income,in.ashrae_iecc_climate_zone_2004,in.ashrae_iecc_climate_zone_2004_2_a_split,...,out.emissions.propane.lrmer_high_re_cost_15.co2e_kg,out.emissions.propane.lrmer_low_re_cost_15.co2e_kg,out.emissions.propane.lrmer_mid_case_15.co2e_kg,out.emissions.propane.lrmer_mid_case_25.co2e_kg,out.bills.all_fuels.usd,out.bills.electricity.usd,out.bills.fuel_oil.usd,out.bills.natural_gas.usd,out.bills.propane.usd,out.energy_burden.percentage
bldg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0,252.301639,True,1228,268295.0,"CBSA Boston-Cambridge-Newton, MA-NH",No,150%+,5A,5A,...,0.0,0.0,0.0,0.0,3661.499038,1991.889038,1669.61,0.0,0.0,1.36
122,0,252.301639,True,623,64407.0,"CBSA Boston-Cambridge-Newton, MA-NH",No,80-100%,5A,5A,...,0.0,0.0,0.0,0.0,1911.148724,1216.668724,0.0,694.48,0.0,2.97
129,0,252.301639,True,623,74751.0,"CBSA Boston-Cambridge-Newton, MA-NH",No,80-100%,5A,5A,...,0.0,0.0,0.0,0.0,1920.134683,1032.114683,0.0,888.02,0.0,2.57
150,0,252.301639,True,1138,32204.0,"CBSA Boston-Cambridge-Newton, MA-NH",No,30-60%,5A,5A,...,3193.077096,3193.077096,3193.077096,3193.077096,3095.921472,1478.831472,0.0,174.22,1442.87,9.61
183,0,252.301639,True,1228,55104.0,"CBSA Boston-Cambridge-Newton, MA-NH",No,60-80%,5A,5A,...,0.0,0.0,0.0,0.0,3015.087949,675.007949,0.0,2340.08,0.0,5.47


### Building timeseries

In [11]:
building_data = pd.read_parquet("/Users/adrian/Documents/ICAI/TFG/Space-Conditioning-Electrification/data/consumption/1000-0.parquet")

In [12]:
building_data.head()

Unnamed: 0_level_0,timestamp,out.electricity.ceiling_fan.energy_consumption,out.electricity.ceiling_fan.energy_consumption_intensity,out.electricity.clothes_dryer.energy_consumption,out.electricity.clothes_dryer.energy_consumption_intensity,out.electricity.clothes_washer.energy_consumption,out.electricity.clothes_washer.energy_consumption_intensity,out.electricity.cooling.energy_consumption,out.electricity.cooling.energy_consumption_intensity,out.electricity.cooling_fans_pumps.energy_consumption,...,out.total.lrmer_midcase_15__electricity.co2e_kg,out.total.lrmer_midcase_15__fuel_oil.co2e_kg,out.total.lrmer_midcase_15__natural_gas.co2e_kg,out.total.lrmer_midcase_15__propane.co2e_kg,out.total.lrmer_midcase_15.co2e_kg,out.total.lrmer_midcase_25__electricity.co2e_kg,out.total.lrmer_midcase_25__fuel_oil.co2e_kg,out.total.lrmer_midcase_25__natural_gas.co2e_kg,out.total.lrmer_midcase_25__propane.co2e_kg,out.total.lrmer_midcase_25.co2e_kg
bldg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,2018-01-01 00:15:00,0.001,5.945303e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.016193,0.0,1.06894,0.0,1.085138,0.015404,0.0,1.06894,0.0,1.084344
1000,2018-01-01 00:30:00,0.001,5.945303e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.016166,0.0,1.063561,0.0,1.079731,0.015377,0.0,1.063561,0.0,1.078937
1000,2018-01-01 00:45:00,0.001,5.945303e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01623,0.0,1.076493,0.0,1.092727,0.015436,0.0,1.076493,0.0,1.091933
1000,2018-01-01 01:00:00,0.001,5.945303e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.016375,0.0,1.107155,0.0,1.123535,0.015576,0.0,1.107155,0.0,1.122732
1000,2018-01-01 01:15:00,0.001,5.945303e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.018466,0.0,1.153703,0.0,1.172169,0.017436,0.0,1.153703,0.0,1.171139


# Process ResStock data

In [None]:
columns = [
    "in.bedrooms",              # int
    "in.duct_leakage_and_insulation",    # Separate into leakage / insulation: X% / (Uniinsulated/R-n)
    "in.duct_location",         # Places of the house
    "in.geometry_floor_area",   # 1000-1499, ...
    "in.geometry_stories",      # int
    "in.geometry_wall_type",    # Materials
    "in.ground_thermal_conductivity",   # float
    "in.hvac_has_ducts",    # Yes / No
    "in.insulation_ceiling",    # R-n / Uninsulated
    "in.insulation_floor",      # R-n / Uninsulated
    "in.insulation_foundation_wall",    # Wall R-n, Exterior
    "in.insulation_roof",       # Finished / Unfinished, R-n
    "in.insulation_wall",       # Material, Uninsulated / R-n
    "in.occupants",         # int
    "in.orientation",       # North, Northwest...
    "in.roof_material",     # Material
    "in.sqft",              # int
    "in.windows"    # Single/Double/Triple, Low-E/Clear, Metal/Non-metal, Air/Exterior Clear Storm, (L/M-Gain)
    "in.window_areas",
    "in.vintage",
    "in.doors", # Ver cómo afecta
    "in.door_area", # Ver cómo afecta
]

In [None]:
columns_processed = [
    "in.area_median_income_processed",    # 0-30%, 80-100% ...
    "in.bedrooms_processed",              # int
    "in.duct_leakage_processed",
    "in.duct_insulation_processed",    # Separate into leakage / insulation: X% / (Uniinsulated/R-n)
    "in.duct_location_processed",         # Places of the house
    "in.federal_poverty_level_processed", # 100-150%, 400%+...
    "in.geometry_floor_area_processed",   # 1000-1499, ...
    "in.geometry_stories_processed",      # int
    "in.geometry_wall_type_processed",    # Materials
    "in.ground_thermal_conductivity_processed",   # float
    "in.heating_fuel_processed",      # Fuel type
    "in.hvac_cooling_efficiency_processed",   # Format: AC, (S)EER 10.7
    "in.hvac_has_ducts_processed",    # Yes / No
    "in.hvac_heating_efficiency_processed",   # Format: Heater type, 80% AFUE/EFFICIENCY/HSPF
    "in.income_processed",            # 70000-79999 
    "in.insulation_ceiling_processed",    # R-n / Uninsulated
    "in.insulation_floor_processed",      # R-n / Uninsulated
    "in.insulation_foundation_wall_processed",    # Wall R-n, Exterior
    "in.insulation_roof_processed",       # Finished / Unfinished, R-n
    "in.insulation_wall_processed",       # Material, Uninsulated / R-n
    "in.occupants_processed",         # int
    "in.orientation_processed",       # North, Northwest...
    "in.roof_material_processed",     # Material
    "in.sqft_processed",              # int
    "in.windows_processed",   # Single/Double/Triple, Low-E/Clear, Metal/Non-metal, Air/Exterior Clear Storm, (L/M-Gain)
    
]

In [15]:
resstock["in.heating_setpoint"].value_counts()

in.heating_setpoint
70F    3068
68F    2762
72F    1615
55F    1125
75F     921
67F     780
65F     702
60F     256
76F     150
62F     128
78F     104
80F      96
Name: count, dtype: int64

In [16]:
resstock_processed = process_resstock_data(resstock)

In [17]:
resstock_processed[columns_processed].head()

Unnamed: 0_level_0,in.area_median_income_processed,in.bedrooms_processed,in.duct_leakage_processed,in.duct_insulation_processed,in.duct_location_processed,in.federal_poverty_level_processed,in.geometry_floor_area_processed,in.geometry_stories_processed,in.geometry_wall_type_processed,in.ground_thermal_conductivity_processed,...,in.insulation_ceiling_processed,in.insulation_floor_processed,in.insulation_foundation_wall_processed,in.insulation_roof_processed,in.insulation_wall_processed,in.occupants_processed,in.orientation_processed,in.roof_material_processed,in.sqft_processed,in.windows_processed
bldg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,150,3,10,0,4,400,1749,1,1,1.4,...,5,1,1,0,0,6,1,3,1228,2
122,130,1,0,0,0,400,874,5,1,1.4,...,0,1,1,6,0,1,5,0,623,2
129,130,1,0,0,0,400,874,2,2,1.1,...,0,1,1,5,1,1,4,0,623,1
150,60,2,0,0,0,350,1749,3,1,1.1,...,0,0,0,3,4,1,3,0,1138,2
183,100,3,0,0,0,500,1749,1,1,1.4,...,1,1,1,0,0,2,3,1,1228,2


In [17]:
resstock["in.duct_leakage_and_insulation"].value_counts()

in.duct_leakage_and_insulation
None                                   5340
0% Leakage to Outside, Uninsulated     2834
20% Leakage to Outside, Uninsulated    1163
30% Leakage to Outside, Uninsulated     716
10% Leakage to Outside, Uninsulated     644
20% Leakage to Outside, R-4             330
10% Leakage to Outside, R-4             173
30% Leakage to Outside, R-4             171
20% Leakage to Outside, R-6              82
20% Leakage to Outside, R-8              67
30% Leakage to Outside, R-6              53
10% Leakage to Outside, R-6              51
30% Leakage to Outside, R-8              43
10% Leakage to Outside, R-8              40
Name: count, dtype: int64

# Torch Dataset Creation

In [18]:
resstock_processed[columns_processed].info()

<class 'pandas.core.frame.DataFrame'>
Index: 11707 entries, 10 to 549997
Data columns (total 25 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   in.area_median_income_processed           11707 non-null  int64  
 1   in.bedrooms_processed                     11707 non-null  int64  
 2   in.duct_leakage_processed                 11707 non-null  int64  
 3   in.duct_insulation_processed              11707 non-null  int64  
 4   in.duct_location_processed                11707 non-null  int64  
 5   in.federal_poverty_level_processed        11707 non-null  int64  
 6   in.geometry_floor_area_processed          11707 non-null  int64  
 7   in.geometry_stories_processed             11707 non-null  int64  
 8   in.geometry_wall_type_processed           11707 non-null  int64  
 9   in.ground_thermal_conductivity_processed  11707 non-null  float64
 10  in.heating_fuel_processed            

In [20]:
data_tensor = torch.from_numpy(resstock_processed[columns_processed].values)
heating_labels_tensor = torch.from_numpy(resstock_processed["heating_targets"].values)
cooling_labels_tensor = torch.from_numpy(resstock_processed["cooling_targets"].values)

In [22]:
resstock_heating_dataset = ResStockSetpointPredictionDataset(
    data=data_tensor, 
    labels=heating_labels_tensor
)
resstock_cooling_dataset = ResStockSetpointPredictionDataset(
    data=data_tensor, 
    labels=cooling_labels_tensor
)

In [25]:
batch_size = 32

resstock_heating_loader = DataLoader(
    resstock_heating_dataset, 
    batch_size=batch_size, 
    shuffle=True
)
resstock_cooling_loader = DataLoader(
    resstock_cooling_dataset, 
    batch_size=batch_size, 
    shuffle=True
)

# Model definition

In [30]:
input_size = resstock_heating_dataset[0][0].shape[0]
hidden_size = 64
output_size = 1

heating_model = SetpointPredictionMLPModel(
    input_size=input_size, 
    hidden_size=hidden_size, 
    output_size=output_size
)
cooling_model = SetpointPredictionMLPModel(
    input_size=input_size, 
    hidden_size=hidden_size, 
    output_size=output_size
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
heating_model.to(device)
cooling_model.to(device)

SetpointPredictionMLPModel(
  (fc1): Linear(in_features=25, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
)

In [32]:
# Define loss function and optimizer
criterion = nn.MSELoss()
heating_optimizer = Adam(heating_model.parameters(), lr=0.001)
cooling_optimizer = Adam(cooling_model.parameters(), lr=0.001)

In [33]:
# Heating train loop
num_epochs = 100
heating_losses = []
heating_model.train()

for epoch in range(num_epochs):
    for i, (data, labels) in enumerate(resstock_heating_loader):
        data = data.float().to(device)
        labels = labels.float().to(device)

        # Forward pass
        outputs = heating_model(data)
        loss = criterion(outputs, labels.view(-1, 1))

        # Backward and optimize
        heating_optimizer.zero_grad()
        loss.backward()
        heating_optimizer.step()

        heating_losses.append(loss.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 118.7272
Epoch [20/100], Loss: 318.1083
Epoch [30/100], Loss: 134.5502
Epoch [40/100], Loss: 57.9319
Epoch [50/100], Loss: 115.2597
Epoch [60/100], Loss: 60.7151
Epoch [70/100], Loss: 52.8330
Epoch [80/100], Loss: 95.6553
Epoch [90/100], Loss: 66.2456
Epoch [100/100], Loss: 69.9092
