In [29]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly_express as px

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

from preprocessing import *
from feature_engineering import *


device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [30]:
df = pd.read_csv("E-Building_Data.csv")
df['date_time'] = pd.to_datetime(df['date_time'])

In [31]:
fe = FeatureEngineering(df) # helper class for feature enginerring
df = fe.feature_engineering(n=False, categorical_features=["season"])

In [32]:
# lat, long for weather station in karlsruhe
latitude = 49.0069
longitude = 8.4037
start_date = datetime(2022, 6, 2)
end_date = datetime(2023, 9, 30)
wf = WeatherFetcher(latitude, longitude, start_date, end_date) # helper class to fetch weather with api

In [33]:
df = fe.filter_rooms_by_prefix()
prefixes = ['e0', 'e1', 'e2', 'e3']
dataframes = [df[prefix] for prefix in prefixes]


dfe0, dfe1, dfe2, dfe3 = dataframes

In [34]:
dataframes = [dfe0, dfe1, dfe2, dfe3]

for i in range(len(dataframes)):
    dataframes[i] = wf.combine_weather(dataframes[i])
dfe0, dfe1, dfe2, dfe3 = dataframes

def one_hot_encode_room_number(df):
    return pd.get_dummies(df, columns=['room_number'], dtype="int")
encoded_dataframes = [one_hot_encode_room_number(df) for df in dataframes]

dfe0, dfe1, dfe2, dfe3 = encoded_dataframes



In [35]:

def sliding_window_forecarst(data, label_name, n_in, n_out, dropna=True):
    
    cols = []

    # Input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(data.shift(i))

    # Forecast sequence (t, t+1, ... t+n)
    forecast_cols = [data[[label_name]].shift(-i) for i in range(n_out)]
    names = [f"{col}(t-{i})" for i in range(n_in, 0, -1) for col in data.columns] + \
            [f"{label_name}(t+{i})" if i > 0 else f"{label_name}(t)" for i in range(n_out)]

    
    all_cols = cols + forecast_cols
    data_reframed = pd.concat(all_cols, axis=1)
    data_reframed.columns = names

  
    if dropna:
        data_reframed.dropna(inplace=True)

    return data_reframed


In [36]:
dataframes = [dfe0, dfe1, dfe2, dfe3]
[df.set_index("date_time", inplace=True) for df in dataframes]

[None, None, None, None]

In [37]:
dataframes = [dfe0, dfe1, dfe2, dfe3]
transformed_dataframes = [sliding_window_forecarst(df, "tmp", 1, 1) for df in dataframes] # forecarst the next datapoint with the previous datapoint
dfe0, dfe1, dfe2, dfe3 = transformed_dataframes

In [43]:
# input for trained model to predict next datapoint
dfe_next_point0 = dfe0.iloc[[-1]]
dfe_next_point1 = dfe1.iloc[[-1]]
dfe_next_point2 = dfe2.iloc[[-1]]
dfe_next_point3 = dfe3.iloc[[-1]]


# exclude the last datapoint to for a valid model prediction of unseen data
dfe0 = dfe0.iloc[:-1]
dfe1 = dfe1.iloc[:-1]
dfe2 = dfe2.iloc[:-1]
dfe3 = dfe3.iloc[:-1]

In [44]:
def prepare_data(df):
    input_data = df.drop(["tmp(t)"], axis=1).values
    targets = df[["tmp(t)"]].values
    T = 1  # Number of timesteps to look while predicting
    D = input_data.shape[1]  # Dimensionality of the input
    N = len(input_data) - T

    # Train size: 80% of the total data size
    train_size = int(len(input_data) * 0.80)

    # Normalization of the inputs
    scaler = StandardScaler()
    scaler.fit(input_data[:train_size + T - 1])
    input_data = scaler.transform(input_data)

    # Preparing X_train and y_train
    X_train = np.zeros((train_size, T, D))
    y_train = np.zeros((train_size, 1))

    for t in range(train_size):
        X_train[t, :, :] = input_data[t:t+T]
        y_train[t] = (targets[t+T])

    # Preparing X_test and y_test
    X_test = np.zeros((N - train_size, T, D))
    y_test = np.zeros((N - train_size, 1))

    for i in range(N - train_size):
        t = i + train_size
        X_test[i, :, :] = input_data[t:t+T]
        y_test[i] = (targets[t+T])

    
    X_train = torch.from_numpy(X_train.astype(np.float32))
    y_train = torch.from_numpy(y_train.astype(np.float32))
    X_test = torch.from_numpy(X_test.astype(np.float32))
    y_test = torch.from_numpy(y_test.astype(np.float32))

    return X_train, y_train, X_test, y_test

In [45]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTM, self).__init__()
        self.M = hidden_dim
        self.L = layer_dim

        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=layer_dim,
            batch_first=True)
        
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, X):
        # Check if X is batched or unbatched
        if len(X.shape) == 2:  # Unbatched input: (sequence_length, input_dim)
            X = X.unsqueeze(0)  # Add batch dimension: (1, sequence_length, input_dim)

        batch_size = X.size(0)

        
        h0 = torch.zeros(self.L, batch_size, self.M).to(X.device)  # (num_layers, batch_size, hidden_dim)
        c0 = torch.zeros(self.L, batch_size, self.M).to(X.device)  # (num_layers, batch_size, hidden_dim)

       
        out, (hn, cn) = self.lstm(X, (h0.detach(), c0.detach()))

        
        out = self.dropout(out)

        
        out = self.fc(out[:, -1, :])
        return out


In [46]:

def train(model, learning_rate, X_train, y_train, X_test, y_test, batch_size=64, epochs=100):
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    train_losses = []
    test_losses = []
    
    
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    
    test_dataset = TensorDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    for epoch in range(epochs):
        model.train()
        
        epoch_train_loss = 0.0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            epoch_train_loss += loss.item()
        
        # Compute average training loss for the epoch
        train_loss = epoch_train_loss / len(train_loader)
        train_losses.append(train_loss)
        
        # Compute test loss
        model.eval()
        epoch_test_loss = 0.0
        with torch.no_grad():
            for batch_X, batch_y in test_loader:
                test_outputs = model(batch_X)
                loss = criterion(test_outputs, batch_y)
                epoch_test_loss += loss.item()
        
        # Compute average test loss for the epoch
        test_loss = epoch_test_loss / len(test_loader)
        test_losses.append(test_loss)
        
        if (epoch + 1) % 10 == 0 or epoch == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')
    
    return train_losses, test_losses


floor_dfs = [dfe0, dfe1, dfe2, dfe3]
for i, df in enumerate(floor_dfs, start=0):
    X_train, y_train, X_test, y_test = prepare_data(df)
# Define the LSTM model
    input_size = X_train.shape[2]  # shape of D, input data
    hidden_size = 64
    layer_size = 1
    output_size = 1
    model = LSTM(input_size, hidden_size, layer_size, output_size)
        
    # Train the model for the current floor
    learning_rate = 0.001
    epochs = 100
    batch_size = 32
    train_losses, test_losses = train(model, learning_rate, X_train, y_train, X_test, y_test, epochs=epochs, batch_size=batch_size)
    
    # Save the trained model and losses
    torch.save(model.state_dict(), f'floor{i}_lstm_model.pth')
    with open(f'floor{i}_losses.txt', 'w') as file:
        file.write("Train Losses:\n")
        for loss in train_losses:
            file.write(f"{loss}\n")
        file.write("\nTest Losses:\n")
        for loss in test_losses:
            file.write(f"{loss}\n")
    
    print(f'Model for floor {i} trained, saved, and losses recorded.')

Epoch [1/100], Train Loss: 172.0146, Test Loss: 13.2347
Epoch [10/100], Train Loss: 7.5735, Test Loss: 5.0160
Epoch [20/100], Train Loss: 5.4235, Test Loss: 4.2383
Epoch [30/100], Train Loss: 3.6968, Test Loss: 3.6978
Epoch [40/100], Train Loss: 2.5907, Test Loss: 2.4841
Epoch [50/100], Train Loss: 1.8231, Test Loss: 1.8563
Epoch [60/100], Train Loss: 1.4075, Test Loss: 1.5515
Epoch [70/100], Train Loss: 1.1897, Test Loss: 1.0763
Epoch [80/100], Train Loss: 1.0851, Test Loss: 0.7624
Epoch [90/100], Train Loss: 1.0487, Test Loss: 0.6946
Epoch [100/100], Train Loss: 1.0108, Test Loss: 0.6068
Model for floor 0 trained, saved, and losses recorded.
Epoch [1/100], Train Loss: 126.1628, Test Loss: 8.5189
Epoch [10/100], Train Loss: 6.8312, Test Loss: 2.8285
Epoch [20/100], Train Loss: 4.1913, Test Loss: 4.4376
Epoch [30/100], Train Loss: 2.5799, Test Loss: 3.3444
Epoch [40/100], Train Loss: 1.8085, Test Loss: 2.1939
Epoch [50/100], Train Loss: 1.4866, Test Loss: 1.4911
Epoch [60/100], Train L

In [47]:
# List of dataframes
dataframes = [dfe0, dfe1, dfe2, dfe3]

# Process each dataframe and print the shape
input_data_list = []
for i, df in enumerate(dataframes):
    input_data = df.drop(["tmp(t)"], axis=1)
    input_data_list.append(input_data)
    print(f"Shape of input_data{i+1}: {input_data.shape}") # depending of number of rooms in each floor

Shape of input_data1: (41831, 29)
Shape of input_data2: (54988, 31)
Shape of input_data3: (32907, 29)
Shape of input_data4: (53656, 31)


In [48]:
model_details = [
    {"path": "floor0_lstm_model.pth", "input_size": 29},
    {"path": "floor1_lstm_model.pth", "input_size": 31},
    {"path": "floor2_lstm_model.pth", "input_size": 29},
    {"path": "floor3_lstm_model.pth", "input_size": 31},
]


models = []
for details in model_details:
    path = details["path"]
    input_size = details["input_size"]
    hidden_size = 64
    num_layers = 1
    output_size = 1

    # Instantiate and load the model
    model = LSTM(input_size, hidden_size, num_layers, output_size)
    model.load_state_dict(torch.load(path))
    model.eval()
    models.append(model)

model1, model2, model3, model4 = models[:4]

In [49]:
for i, df in enumerate(dataframes, start=0):
    # Load losses from the file
    with open(f'floor{i}_losses.txt', 'r') as file:
        lines = file.readlines()
    
    
    train_losses = []
    test_losses = []
    is_test_loss = False
    
    for line in lines:
        if line.strip() == 'Train Losses:':
            is_test_loss = False
            continue
        elif line.strip() == 'Test Losses:':
            is_test_loss = True
            continue
        elif line.strip() == '':
            continue
        
        if not is_test_loss:
            train_losses.append(float(line.strip()))
        else:
            test_losses.append(float(line.strip()))
        
    epochs = range(1, len(train_losses) + 1)
    loss_data = {'Epochs': epochs, 'Train Loss': train_losses, 'Test Loss': test_losses}
    loss_df = pd.DataFrame(loss_data)
    
    # Plot using Plotly Express
    fig = px.line(loss_df, x='Epochs', y=['Train Loss', 'Test Loss'], title=f'Loss for Floor {i}')
    fig.update_layout(yaxis_title="Loss")
    fig.show()

In [50]:
dfe_next_point0 = dfe_next_point0.drop("tmp(t)", axis=1)
dfe_next_point1 = dfe_next_point1.drop("tmp(t)", axis=1)
dfe_next_point2 = dfe_next_point2.drop("tmp(t)", axis=1)
dfe_next_point3 = dfe_next_point3.drop("tmp(t)", axis=1)

In [51]:
dfe_next_point_test0 = torch.from_numpy(dfe_next_point0.values.astype(np.float32))
dfe_next_point_test1 = torch.from_numpy(dfe_next_point1.values.astype(np.float32))
dfe_next_point_test2 = torch.from_numpy(dfe_next_point2.values.astype(np.float32))
dfe_next_point_test3 = torch.from_numpy(dfe_next_point3.values.astype(np.float32))

In [54]:

dataframes = [dfe0, dfe1, dfe2, dfe3]
models = [model1, model2, model3, model4]


figures = []

# Iterate over each model and dataframe
for floor, (df, model) in enumerate(zip(dataframes, models)):
    
    X_train, y_train, X_test, y_test = prepare_data(df)

    # Set model to evaluation mode
    model.eval()

    # Perform prediction
    with torch.no_grad():
        if torch.cuda.is_available():
            X_test = X_test.cuda()
        y_pred = model(X_test)

    
    if torch.cuda.is_available():
        y_pred = y_pred.cpu().numpy()
        y_test = y_test.cpu().numpy()
    else:
        y_pred = y_pred.numpy()
        y_test = y_test.numpy()

    # Create a DataFrame with true values and predicted values
    df_results = pd.DataFrame({
        'Index': range(len(y_test)),
        'True Values': y_test.flatten(),
        'Predicted Values': y_pred.flatten()
    })

    
    df_melted = df_results.melt(id_vars=['Index'], value_vars=['True Values', 'Predicted Values'],
                                var_name='Type', value_name='Value')

    
    fig = px.line(df_melted, x='Index', y='Value', color='Type', title=f'Floor {floor}: True vs Predicted Values')
    figures.append(fig)

for fig in figures:
    fig.show()


In [104]:
def predict_next_values(models, dataframes, label_names):
    predictions = []

    # Iterate over each model, dataframe, and label_name
    for i, (model, data, label_name) in enumerate(zip(models, dataframes, label_names)):
        # Extract the last window for prediction (excluding the target variable)
        last_window = data.iloc[-1, :-1].values
        last_window = last_window.reshape(1, -1)

        # Extract the label (target variable) for prediction
        next_value_label = data.iloc[-1][label_name]

        # Extract room_number (assuming it's the last column in the dataframe)
        room_number = data.iloc[-1].index[data.iloc[-1].values == 1][-1]

        # Convert to PyTorch tensor
        last_window_tensor = torch.tensor(last_window, dtype=torch.float32)

        # Move tensor to GPU if available
        if torch.cuda.is_available():
            last_window_tensor = last_window_tensor.cuda()

        # Set model to evaluation mode
        model.eval()

        # Perform prediction
        with torch.no_grad():
            next_value_pred = model(last_window_tensor)

        # Move prediction to CPU if necessary
        if torch.cuda.is_available():
            next_value_pred = next_value_pred.cpu()
        

        # Store the prediction along with the true label and room_number
        predictions.append((next_value_pred, next_value_label, room_number))

        # Print the predicted next value, true label, and room_number for each floor
        print(f'Forecast for Floor {i}, Room: {room_number}: Predicted next value: {round(next_value_pred.item(), 2)}°C, True Label: {round(next_value_label, 2)}°C')

    return predictions

In [105]:

models = [model1, model2, model3, model4]
dataframes = [dfe0, dfe1, dfe2, dfe3]
label_names = ["tmp(t)"] * 4

predictions = predict_next_values(models, dataframes, label_names)

Forecast for Floor 0, Room: room_number_e003(t-1): Predicted next value: 18.68°C, True Label: 23.3°C
Forecast for Floor 1, Room: room_number_e103(t-1): Predicted next value: 20.1°C, True Label: 23.71°C
Forecast for Floor 2, Room: room_number_e206(t-1): Predicted next value: 20.77°C, True Label: 24.4°C
Forecast for Floor 3, Room: room_number_e302(t-1): Predicted next value: 19.59°C, True Label: 24.43°C
