In [5]:
import pandas as pd

import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

In [6]:
# Add project root to sys.path for module imports
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [7]:
from src.utils.data_processing import create_mlp_train_and_test_datasets, create_lr_train_and_test_datasets, process_weather_data
from src.utils.models import MLP, LinearRegressionModel
from src.utils.train_functions import train, evaluate, predict

# Load data

## Load parameter data

In [8]:
# Merge all dataframes
building_parameters = pd.read_csv("/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/data/building_parameters/building_parameters.csv")
building_parameters.set_index("building_id", inplace=True)

In [9]:
building_parameters.head()

Unnamed: 0_level_0,indoor_temp_param,consumption_param,ambient_temp_param,direct_solar_radiation_param,ambient_temp_lag_param
building_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,0.988916,0.163115,-0.074399,0.000153,0.083117
122,0.955652,1.843034,-0.022571,0.000265,0.067082
129,0.951854,1.66178,0.196741,-0.000472,-0.139513
150,0.975725,0.863512,-0.194279,0.000443,0.214467
183,0.924559,1.000348,-0.118723,0.000107,0.203062


In [10]:
building_parameters.to_csv("/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/data/building_parameters/building_parameters.csv")

## ResStock data

In [11]:
processed_resstock = pd.read_csv("/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/data/resstock/resstock_processed.csv")
processed_resstock.set_index("bldg_id", inplace=True)
processed_resstock.sort_index(inplace=True)

# Create datasets

In [15]:
train_dataset, test_dataset = create_mlp_train_and_test_datasets(
    features=processed_resstock,
    labels=building_parameters,
    test_size=0.2,
)

In [16]:
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define model

In [17]:
# HYPERPARAMETERS
# model hyperparameters
input_size = len(processed_resstock.columns)
hidden_sizes = [64, 128, 128]
output_size = len(building_parameters.columns)

# training hyperparameters
num_epochs = 60
learning_rate = 0.0004

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [None]:
parameter_prediction_model = MLP(
    input_size=input_size,
    hidden_sizes=hidden_sizes,
    output_size=output_size,
).to(device)

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(parameter_prediction_model.parameters(), lr=learning_rate)

## Train model

In [21]:
model_name = "parameter_prediction_model.pth"

In [None]:
writer = SummaryWriter(log_dir=f"/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/runs/{model_name.replace('.pth', '')}")

In [None]:
train(
    model=parameter_prediction_model,
    train_loader=train_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    device=device,
    writer=writer,
    verbose=True
)

## Evaluate model

In [None]:
eval_loss = evaluate(
    model=parameter_prediction_model,
    test_loader=test_loader,
    criterion=criterion,
    device=device,
)

In [None]:
eval_loss

## Save model

In [22]:
models_path = "/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/models"
if not os.path.exists(models_path):
    os.makedirs(models_path)

In [None]:
torch.save(parameter_prediction_model.state_dict(), os.path.join(models_path, model_name))
writer.close()

## Load model

In [23]:
model = MLP(
    input_size=input_size,
    hidden_sizes=hidden_sizes,
    output_size=output_size,
).to(device)
model.load_state_dict(torch.load(os.path.join(models_path, model_name)))

<All keys matched successfully>

## Predict

In [24]:
sequences, labels = test_dataset[0]

In [25]:
predictions = predict(
    model=model,
    sequences=sequences,
    device=device,
)

# Test against Linear Regression models

## Load model parameters

In [26]:
actual_model = LinearRegressionModel(
    input_size=5,
    output_size=1,
    bias=False,
).to(device)

predicted_model = LinearRegressionModel(
    input_size=5,
    output_size=1,
    bias=False,
).to(device)

In [27]:
with torch.no_grad():
    actual_model.linear.weight.copy_(labels)
    predicted_model.linear.weight.copy_(predictions)

In [28]:
predictions_bldg_id = building_parameters[building_parameters.values == labels.cpu().numpy()].dropna(how='all').index.item()

## Load data

In [29]:
resstock = pd.read_parquet(
        "/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/data/resstock/resstock.parquet"
)

weather = pd.read_csv(
    "/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/data/weather/G2500170_2018.csv",
)

weather = process_weather_data(weather)

building_data_path = "/Users/cocoloco/Library/Mobile Documents/com~apple~CloudDocs/Documents/ICAI/4o/AI Lab/data/building_data"
filename = f"{predictions_bldg_id}-16.parquet"
building_data = pd.read_parquet(os.path.join(building_data_path, filename))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  weather.rename(columns={"Dry Bulb Temperature [°C]": "temperature"}, inplace=True)


In [None]:
train_data, train_labels, _, _ = create_lr_train_and_test_datasets(
                    building_data, resstock, weather, predictions_bldg_id
                )