# Import Libraries

In [191]:
#Import libraries

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


# Define custom dataset class

In [192]:
class RetrofitDataset(Dataset):
    def __init__(self, data, targets):
        # Convert data and target into PyTorch tensors and store them
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = [torch.tensor(t, dtype=torch.float32) for t in targets]

    def __len__(self):
        # Return the size of the dataset
        return len(self.data)

    def __getitem__(self, idx):
        # Get the feature vector and corresponding targets for a given index
        x = self.data[idx]
        y = [t[idx] for t in self.targets]
        return x, y


# Load and preprocess data

In [193]:
## With the new input file given by Amin, these are the values: 
# windows_U_Factor
# groundfloor_thermal_resistance
# ext_walls_thermal_resistance
# roof_thermal_resistance


# Load data from CSV file into a Pandas DataFrame
def load_data(file_path=r"C:\Users\isabella.lucchese\Desktop\Graduation\Excel sheets\Inputs\Input Amin gives me for MTL\merged_output_SC_2024.csv"):
    data = pd.read_csv(file_path)
    return data

# Preprocess the data for MTL
def preprocess_data(data):
   
    # 1. Extract Features (X) - Only numeric columns
    
    X = data[['windows_U_Factor', 'groundfloor_thermal_resistance', 'ext_walls_thermal_resistance', 'roof_thermal_resistance']].select_dtypes(include='number')
    
    # Handle NaN and infinite values in X (replace NaN with 0, and infinite with large finite numbers)
    X = X.replace([np.inf, -np.inf], np.nan).fillna(0)

   
    # 2. Cost Calculation
    
    def calculate_window_cost(windows_U_Factor):
        if windows_U_Factor == 2.7:
            return 0
        elif windows_U_Factor == 1.0:
            return 184
        elif windows_U_Factor == 1.6:
            return 485
        elif windows_U_Factor == 1.2:
            return 295
        elif windows_U_Factor == 1.5:
            return 622
        else:
            return 0  # Default

    def calculate_floor_cost(groundfloor_thickness):
        if groundfloor_thickness == 0.08:
            return 59.7
        elif groundfloor_thickness == 0.09:
            return 77
        elif groundfloor_thickness == 0.1:
            return 87.9
        elif groundfloor_thickness == 0.13:
            return 108
        else:
            return 0

    def calculate_facade_cost(ext_walls_thickness):
        if ext_walls_thickness == 0.05:
            return 182
        elif ext_walls_thickness == 0.1:
            return 179
        elif ext_walls_thickness == 0.13:
            return 200
        elif ext_walls_thickness == 0.21:
            return 222
        else:
            return 0

    def calculate_roof_cost(roof_thickness):
        if roof_thickness == 0.08:
            return 89.5
        elif roof_thickness == 0.11:
            return 105
        elif roof_thickness == 0.185:
            return 101
        elif roof_thickness == 0.23:
            return 139
        else:
            return 0

    # Calculate total cost
    def calculate_total_cost(row):
        window_cost = calculate_window_cost(row['windows_U_Factor'])
        floor_cost = calculate_floor_cost(row['groundfloor_thermal_resistance'])
        facade_cost = calculate_facade_cost(row['ext_walls_thermal_resistance'])
        roof_cost = calculate_roof_cost(row['roof_thermal_resistance'])
        return window_cost + floor_cost + facade_cost + roof_cost

    # Apply cost calculation
    data['Total_Cost'] = data.apply(calculate_total_cost, axis=1)
    

    # 3. Carbon Calculation
    
    def calculate_window_carbon(windows_U_Factor):
        if windows_U_Factor == 2.7:
            return 0
        elif windows_U_Factor == 1.0:
            return 70
        elif windows_U_Factor == 1.6:
            return 50
        elif windows_U_Factor == 1.2:
            return 150
        elif windows_U_Factor == 1.5:
            return 120
        else:
            return 0

    def calculate_floor_carbon(groundfloor_thickness):
        if groundfloor_thickness == 0.08:
            return 10
        elif groundfloor_thickness == 0.09:
            return 6
        elif groundfloor_thickness == 0.1:
            return 11
        elif groundfloor_thickness == 0.13:
            return 7
        else:
            return 0

    def calculate_facade_carbon(ext_walls_thickness):
        if ext_walls_thickness == 0.05:
            return 9
        elif ext_walls_thickness == 0.1:
            return 5
        elif ext_walls_thickness == 0.13:
            return 17
        elif ext_walls_thickness == 0.21:
            return 9
        else:
            return 0

    def calculate_roof_carbon(roof_thickness):
        if roof_thickness == 0.08:
            return 23
        elif roof_thickness == 0.11:
            return 5
        elif roof_thickness == 0.185:
            return 18
        elif roof_thickness == 0.23:
            return 11
        else:
            return 0

    # Calculate total carbon
    def calculate_total_carbon(row):
        window_carbon = calculate_window_carbon(row['windows_U_Factor'])
        floor_carbon = calculate_floor_carbon(row['groundfloor_thermal_resistance'])
        facade_carbon = calculate_facade_carbon(row['ext_walls_thermal_resistance'])
        roof_carbon = calculate_roof_carbon(row['roof_thermal_resistance'])
        return window_carbon + floor_carbon + facade_carbon + roof_carbon

    # Apply carbon calculation
    data['Total_Carbon'] = data.apply(calculate_total_carbon, axis=1)
    
  
    # 4. Energy Consumption Calculation (Only electricity and Gas)
   
    electricity_building_columns = data['index'].str.contains("Electricity Building", case=False)
    electricity_facility_columns = data['index'].str.contains("Electricity Facility", case=False)
    gas_consumption_columns = data['index'].str.contains("Gas Consumption", case=False)

    # Sum numeric columns only
    data['Energy_Consumption'] = data[electricity_building_columns].select_dtypes(include='number').sum(axis=1) + \
                                 data[electricity_facility_columns].select_dtypes(include='number').sum(axis=1) + \
                                 data[gas_consumption_columns].select_dtypes(include='number').sum(axis=1)
    












        # 5. Indoor Comfort Calculation
    zone_temperature_rows = data[data['index'].str.contains(r"Building \d+: Zone Mean Air Temperature \[C\]\(Daily\)", na=False)]
    
    # Filtra gli edifici solo nel range desiderato
    zone_temperature_rows = zone_temperature_rows[(zone_temperature_rows['Building ID'] >= 17000000.0) & 
                                                  (zone_temperature_rows['Building ID'] <= 17000851.0)]
    
    # Verifica se le righe sono state trovate
    if zone_temperature_rows.empty:
        print("Nessuna riga trovata con 'Zone Mean Air Temperature [C](Daily)' e Building ID tra 17000000.0 e 17000851.0")
        return None

    # Seleziona solo le colonne delle date del 2024
    columns_to_consider = [col for col in data.columns if '2024-' in col]
    zone_temperature_numeric = zone_temperature_rows[columns_to_consider]

    # Gestione dei NaN nelle temperature
    zone_temperature_numeric = zone_temperature_numeric.fillna(0)

    # Calcola se ogni valore di temperatura è compreso tra 16 e 24 gradi
    indoor_comfort = zone_temperature_numeric.applymap(lambda x: 16 <= x <= 24)

    indoor_comfort['Building ID'] = zone_temperature_rows['Building ID'].values

    # Visualizza i risultati finali
    print("Comfort per ogni giorno del 2024 per ciascun edificio:")
    print(indoor_comfort.head())  # Mostra le prime righe per confermare

     # Add Indoor_Comfort column to the original data
    data.loc[zone_temperature_rows.index, 'Indoor_Comfort'] = indoor_comfort.all(axis=1)


















    # 6. Define Target Variables (y)
    y = {
        'Total_Cost': data['Total_Cost'].values,
        'Total_Carbon': data['Total_Carbon'].values,
        'Energy_Consumption': data['Energy_Consumption'].values,
        'Indoor_Comfort': data['Indoor_Comfort'].fillna(False).values  # Binary target
    }
    
    # 7. Feature and Target Normalization
    # Normalize the features (X)
    scaler_X = StandardScaler()
    X_scaled = scaler_X.fit_transform(X)

    # Normalize numeric target variables (skip Indoor_Comfort as it is binary)
    scalers_y = {key: StandardScaler() for key in y if key != 'Indoor_Comfort'}
    y_scaled = {key: scalers_y[key].fit_transform(y[key].reshape(-1, 1)) for key in y if key != 'Indoor_Comfort'}

    # Add Indoor_Comfort (binary) to y_scaled without normalization
    y_scaled['Indoor_Comfort'] = np.array(y['Indoor_Comfort']).reshape(-1, 1)
    
    return X_scaled, y_scaled, scalers_y, y

# Load and preprocess the data
data = load_data()
X, y_scaled, scalers_y, y = preprocess_data(data)





Comfort per ogni giorno del 2024 per ciascun edificio:
    2024-01-01  2024-01-02  2024-01-03  2024-01-04  2024-01-05  2024-01-06  \
4         True        True        True        True        True        True   
8         True        True        True        True        True        True   
12        True        True        True        True        True        True   
16        True        True        True        True        True        True   
20        True        True        True        True        True        True   

    2024-01-07  2024-01-08  2024-01-09  2024-01-10  ...  2024-12-23  \
4         True        True        True        True  ...        True   
8         True        True        True        True  ...        True   
12        True        True        True        True  ...        True   
16        True        True        True        True  ...        True   
20        True        True        True        True  ...        True   

    2024-12-24  2024-12-25  2024-12-26  2024-12-2

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction ** 2 / new_sample_count


In [194]:
data

Unnamed: 0,Building ID,index,2025-01-01,meestvoorkomendepostcode,function,building_type,age_range,height,area,perimeter,...,2024-12-26,2024-12-27,2024-12-28,2024-12-29,2024-12-30,2024-12-31,Total_Cost,Total_Carbon,Energy_Consumption,Indoor_Comfort
0,,Environment:Site Outdoor Air Drybulb Temperatu...,4.820312e+00,,,,,,,,...,5.902604e+00,2.542708e+00,1.693750e+00,-3.218750e-01,3.616146e+00,4.979167e+00,0,0,,
1,17000000.0,Building 17000000: Electricity Building [J](Da...,4.054158e+08,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,4.101213e+08,3.755554e+08,3.697181e+08,3.763681e+08,3.902811e+08,3.886045e+08,0,0,,
2,17000000.0,Building 17000000: Electricity Facility [J](Da...,5.721833e+08,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,5.733730e+08,5.413101e+08,5.436249e+08,5.618624e+08,5.684611e+08,5.470919e+08,0,0,,
3,17000000.0,Building 17000000: Gas Consumption [J](Daily),4.504524e+10,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,4.381733e+10,4.357936e+10,4.496502e+10,4.873113e+10,4.601358e+10,4.159716e+10,0,0,,
4,17000000.0,Building 17000000: Zone Mean Air Temperature [...,1.361695e+01,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,1.639317e+01,1.644141e+01,1.537377e+01,1.665507e+01,1.713764e+01,1.716735e+01,0,0,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12956,17003238.0,Building 17003238: Zone Mean Air Temperature [...,1.578443e+01,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,1.811956e+01,1.797301e+01,1.686782e+01,1.823636e+01,1.870731e+01,1.905176e+01,184,70,,
12957,17003239.0,Building 17003239: Electricity Building [J](Da...,4.075719e+08,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,4.181395e+08,3.839891e+08,3.845229e+08,3.804551e+08,3.928515e+08,4.002111e+08,184,70,,
12958,17003239.0,Building 17003239: Electricity Facility [J](Da...,5.988363e+08,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,6.128410e+08,5.695115e+08,5.699329e+08,5.716847e+08,5.778227e+08,5.860365e+08,184,70,,
12959,17003239.0,Building 17003239: Gas Consumption [J](Daily),4.726422e+10,4921AH,Residential,Apartment,1975 - 1991,7.4,162.0,53.0,...,4.789602e+10,4.672187e+10,4.793244e+10,4.948186e+10,4.642276e+10,4.593178e+10,184,70,,


# Define the MTL Model

In [195]:
class MultiTaskModel(nn.Module):
    def __init__(self, input_size):
        super(MultiTaskModel, self).__init__()
        
        # Define shared layers
        self.shared_fc1 = nn.Linear(input_size, 128)
        self.shared_fc2 = nn.Linear(128, 64)
        
        # Define task-specific layers for energy consumption
        self.energy_fc1 = nn.Linear(64, 32)
        self.energy_out = nn.Linear(32, 1)
        
        # Define task-specific layers for carbon emissions
        self.carbon_fc1 = nn.Linear(64, 32)
        self.carbon_out = nn.Linear(32, 1)
        
        # Define task-specific layers for costs
        self.costs_fc1 = nn.Linear(64, 32)
        self.costs_out = nn.Linear(32, 1)
        
        # Define task-specific layers for internal discomfort
        self.discomfort_fc1 = nn.Linear(64, 32)
        self.discomfort_out = nn.Linear(32, 1)

    def forward(self, x):
        # Pass input through shared layers
        x = torch.relu(self.shared_fc1(x))
        x = torch.relu(self.shared_fc2(x))
        
        # Pass shared representation through each task-specific head
        energy = torch.relu(self.energy_fc1(x))
        energy = self.energy_out(energy)
        
        carbon = torch.relu(self.carbon_fc1(x))
        carbon = self.carbon_out(carbon)
        
        costs = torch.relu(self.costs_fc1(x))
        costs = self.costs_out(costs)
        
        discomfort = torch.relu(self.discomfort_fc1(x))
        discomfort = self.discomfort_out(discomfort)
        
        # Return the outputs for all tasks
        return energy, carbon, costs, discomfort

# Example
input_size = X.shape[1]  # Number of input features
model = MultiTaskModel(input_size)  # Initialize the model


# Define the loss function and optimizer

In [196]:
def custom_loss_fn(predictions, targets, task_weights, indoor_comfort_weight=10.0):
    # Unpack predictions and targets
    energy_pred, carbon_pred, cost_pred, comfort_pred = predictions
    energy_target, carbon_target, cost_target, comfort_target = targets
    
    # Loss for energy, cost, and carbon (regression tasks)
    loss_energy = nn.MSELoss()(energy_pred, energy_target) * task_weights[0]
    loss_carbon = nn.MSELoss()(carbon_pred, carbon_target) * task_weights[1]
    loss_cost = nn.MSELoss()(cost_pred, cost_target) * task_weights[2]
    
    # Loss for indoor comfort (binary classification task)
    comfort_loss = nn.BCELoss()(torch.sigmoid(comfort_pred), comfort_target) * task_weights[3]
    
    # Penalize false indoor comfort values
    false_penalty = ((comfort_pred < 0.5) & (comfort_target == 1)).float().sum() * indoor_comfort_weight
    
    # Total loss is the sum of all losses
    total_loss = loss_energy + loss_carbon + loss_cost + comfort_loss + false_penalty
    return total_loss

def get_optimizer(model, lr=0.001):
    return optim.Adam(model.parameters(), lr=lr)




# Train the model

In [197]:
def train_model(model, dataloader, val_loader, optimizer, num_epochs=100, device='cuda', task_weights=None, indoor_comfort_weight=10.0):
    # Default task weights if none are provided
    if task_weights is None:
        task_weights = [1.0, 1.0, 1.0, 1.0]  # Modify based on importance of tasks

    # Move the model to the specified device (GPU or CPU)
    model = model.to(device)
    
    # Lists to store loss values
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        for batch in dataloader:
            X_batch, y_batch = batch
            X_batch = X_batch.to(device)
            y_batch = [target.to(device).unsqueeze(1) for target in y_batch]
            
            optimizer.zero_grad()
            
            # Forward pass
            predictions = model(X_batch)
            
            # Compute custom loss
            loss = custom_loss_fn(predictions, y_batch, task_weights, indoor_comfort_weight)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        avg_train_loss = total_loss / len(dataloader)
        train_losses.append(avg_train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for val_batch in val_loader:
                X_val, y_val = val_batch
                X_val = X_val.to(device)
                y_val = [target.to(device).unsqueeze(1) for target in y_val]
                
                val_predictions = model(X_val)
                
                val_loss += custom_loss_fn(val_predictions, y_val, task_weights, indoor_comfort_weight).item()

        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}')

    return train_losses, val_losses




# Evaluate the model

In [198]:
def evaluate_model(model, dataloader, loss_fn, device='cuda'):
    model.eval()  # Set the model to evaluation mode
    model = model.to(device)
    total_loss = 0
    
    with torch.no_grad():
        for batch in dataloader:
            X_batch, y_batch = batch
            X_batch = X_batch.to(device)
            y_batch = [target.to(device).unsqueeze(1) for target in y_batch]
            
            predictions = model(X_batch)
            losses = [loss_fn(pred, target) for pred, target in zip(predictions, y_batch)]
            total_loss += sum(losses).item()
    
    print(f'Evaluation Loss: {total_loss/len(dataloader)}')

# Save and load the model

In [199]:
def save_model(model, path):
    torch.save(model.state_dict(), path)

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    return model



# Main execution and running the pipeline

In [200]:
# Load and preprocess data
data = load_data()
X, y_scaled, scalers_y, y = preprocess_data(data)

# Define custom dataset class
class RetrofitDataset(Dataset):
    def __init__(self, data, targets):
        # Convert data and target into PyTorch tensors and store them
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = [torch.tensor(t, dtype=torch.float32) for t in targets.values()]

    def __len__(self):
        # Return the size of the dataset
        return len(self.data)

    def __getitem__(self, idx):
        # Get the feature vector and corresponding targets for a given index
        x = self.data[idx]
        y = [t[idx] for t in self.targets]
        return x, y

# Create dataset and dataloaders
# Create dataset and dataloaders
dataset = RetrofitDataset(X, y_scaled)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Build, compile, and train the model
input_size = X.shape[1]  # Number of input features
model = MultiTaskModel(input_size)  # Initialize the model
optimizer = get_optimizer(model)  # Get the optimizer

# Define the task weights and indoor comfort penalty
task_weights = [1.5, 1.0, 1.0, 1.0]  # Adjust based on priority for tasks
indoor_comfort_weight = 20.0  # Strong penalty for incorrect indoor comfort

# Train the model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
train_losses, val_losses = train_model(model, train_loader, val_loader, optimizer, num_epochs=100, device=device, task_weights=task_weights, indoor_comfort_weight=indoor_comfort_weight)

# Evaluate the model on the validation set
evaluate_model(model, val_loader, custom_loss_fn, device=device)

# Save the trained model
save_model(model, 'mtl_retrofit_model.pth')

# Load the model and make predictions on new data
model = load_model(MultiTaskModel(input_size), 'mtl_retrofit_model.pth')
model = model.to(device)
model.eval()

# Replace with actual new data (formatted similarly to the input data)
new_data = torch.tensor([[1, 2, 3, 0]], dtype=torch.float32).to(device)

with torch.no_grad():
    predictions = model(new_data)

# Inverse transform the predictions to real-world values using the correct scaler for each task
predictions_real = []
for i, (scaler_key, pred) in enumerate(zip(scalers_y.keys(), predictions)):
    if scaler_key != 'Indoor_Comfort':
        real_pred = scalers_y[scaler_key].inverse_transform(pred.cpu().numpy().flatten().reshape(-1, 1))
    else:
        real_pred = pred.cpu().numpy().flatten()
    predictions_real.append(real_pred)

# Print predictions for each task
print("Predictions (energy consumption, carbon emissions, costs, indoor comfort):")
for i, pred in enumerate(predictions_real):
    print(f"Task {i + 1} prediction: {pred.flatten()}")





Comfort per ogni giorno del 2024 per ciascun edificio:
    2024-01-01  2024-01-02  2024-01-03  2024-01-04  2024-01-05  2024-01-06  \
4         True        True        True        True        True        True   
8         True        True        True        True        True        True   
12        True        True        True        True        True        True   
16        True        True        True        True        True        True   
20        True        True        True        True        True        True   

    2024-01-07  2024-01-08  2024-01-09  2024-01-10  ...  2024-12-23  \
4         True        True        True        True  ...        True   
8         True        True        True        True  ...        True   
12        True        True        True        True  ...        True   
16        True        True        True        True  ...        True   
20        True        True        True        True  ...        True   

    2024-12-24  2024-12-25  2024-12-26  2024-12-2

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction ** 2 / new_sample_count
  return F.mse_loss(input, target, reduction=self.reduction)


ValueError: Using a target size (torch.Size([32, 1, 1])) that is different to the input size (torch.Size([32, 1])) is deprecated. Please ensure they have the same size.

# Printing graphs

In [134]:
# Train the model and collect the training and validation losses
train_losses, val_losses = train_model(model, train_loader, val_loader, custom_loss_fn, optimizer, num_epochs=100, device=device)

# Plot the training and validation loss over epochs
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.show()




TypeError: train_model() got multiple values for argument 'num_epochs'