In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly_express as px

import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler

from preprocessing import *
from feature_engineering import *


device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [3]:
df = pd.read_csv("E-Building_Data.csv")
df['date_time'] = pd.to_datetime(df['date_time'])

In [4]:
fe = FeatureEngineering(df) # helper class for feature enginerring
df = fe.feature_engineering(n=False, categorical_features=["season"])

In [5]:
# lat, long for weather station in karlsruhe
latitude = 49.0069
longitude = 8.4037
start_date = datetime(2022, 6, 2)
end_date = datetime(2023, 9, 30)
wf = WeatherFetcher(latitude, longitude, start_date, end_date) # helper class to fetch weather

In [6]:
df = fe.filter_rooms_by_prefix()
prefixes = ['e0', 'e1', 'e2', 'e3']
dataframes = [df[prefix] for prefix in prefixes]


dfe0, dfe1, dfe2, dfe3 = dataframes

In [7]:
dataframes = [dfe0, dfe1, dfe2, dfe3]

for i in range(len(dataframes)):
    dataframes[i] = wf.combine_weather(dataframes[i])
dfe0, dfe1, dfe2, dfe3 = dataframes

def one_hot_encode_room_number(df):
    return pd.get_dummies(df, columns=['room_number'], dtype="int")
encoded_dataframes = [one_hot_encode_room_number(df) for df in dataframes]

dfe0, dfe1, dfe2, dfe3 = encoded_dataframes



In [8]:
import pandas as pd

def sliding_window_forecarst(data, label_name, n_in, n_out, dropna=True):
    
    cols = []

    # Input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(data.shift(i))

    # Forecast sequence (t, t+1, ... t+n)
    forecast_cols = [data[[label_name]].shift(-i) for i in range(n_out)]
    names = [f"{col}(t-{i})" for i in range(n_in, 0, -1) for col in data.columns] + \
            [f"{label_name}(t+{i})" if i > 0 else f"{label_name}(t)" for i in range(n_out)]

    # Combine all columns
    all_cols = cols + forecast_cols
    data_reframed = pd.concat(all_cols, axis=1)
    data_reframed.columns = names

  
    if dropna:
        data_reframed.dropna(inplace=True)

    return data_reframed


In [9]:
dataframes = [dfe0, dfe1, dfe2, dfe3]
[df.set_index("date_time", inplace=True) for df in dataframes]

[None, None, None, None]

In [10]:
dataframes = [dfe0, dfe1, dfe2, dfe3]
transformed_dataframes = [sliding_window_forecarst(df, "tmp", 1, 1) for df in dataframes] # forecarst the next datapoint with the previous datapoint
dfe0, dfe1, dfe2, dfe3 = transformed_dataframes

In [56]:
dfe_next_point0 = dfe0.iloc[[-1]]
dfe_next_point1 = dfe1.iloc[[-1]]
dfe_next_point2 = dfe2.iloc[[-1]]
dfe_next_point3 = dfe3.iloc[[-1]]

In [15]:
def prepare_data(df):
    input_data = df.drop(["tmp(t)"], axis=1).values
    targets = df[["tmp(t)"]].values
    T = 1  # Number of timesteps to look while predicting
    D = input_data.shape[1]  # Dimensionality of the input
    N = len(input_data) - T

    # Train size: 80% of the total data size
    train_size = int(len(input_data) * 0.80)

    # Normalization of the inputs
    scaler = StandardScaler()
    scaler.fit(input_data[:train_size + T - 1])
    input_data = scaler.transform(input_data)

    # Preparing X_train and y_train
    X_train = np.zeros((train_size, T, D))
    y_train = np.zeros((train_size, 1))

    for t in range(train_size):
        X_train[t, :, :] = input_data[t:t+T]
        y_train[t] = (targets[t+T])

    # Preparing X_test and y_test
    X_test = np.zeros((N - train_size, T, D))
    y_test = np.zeros((N - train_size, 1))

    for i in range(N - train_size):
        t = i + train_size
        X_test[i, :, :] = input_data[t:t+T]
        y_test[i] = (targets[t+T])

    # Convert to torch tensors
    X_train = torch.from_numpy(X_train.astype(np.float32))
    y_train = torch.from_numpy(y_train.astype(np.float32))
    X_test = torch.from_numpy(X_test.astype(np.float32))
    y_test = torch.from_numpy(y_test.astype(np.float32))

    return X_train, y_train, X_test, y_test

In [51]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTM, self).__init__()
        self.M = hidden_dim
        self.L = layer_dim

        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=layer_dim,
            batch_first=True)
        
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, X):
        # Check if X is batched or unbatched
        if len(X.shape) == 2:  # Unbatched input: (sequence_length, input_dim)
            X = X.unsqueeze(0)  # Add batch dimension: (1, sequence_length, input_dim)

        batch_size = X.size(0)

        # Initialize hidden state and cell state
        h0 = torch.zeros(self.L, batch_size, self.M).to(X.device)  # (num_layers, batch_size, hidden_dim)
        c0 = torch.zeros(self.L, batch_size, self.M).to(X.device)  # (num_layers, batch_size, hidden_dim)

        # Forward pass through LSTM
        out, (hn, cn) = self.lstm(X, (h0.detach(), c0.detach()))

        # Apply dropout
        out = self.dropout(out)

        # Get output from the last time step
        out = self.fc(out[:, -1, :])
        return out


In [26]:
def train(model, learning_rate, X_train, y_train, X_test, y_test, epochs=100):
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    train_losses = []
    test_losses = []
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Compute train loss
        train_loss = loss.item()
        train_losses.append(train_loss)
        
        # Compute test loss
        model.eval()
        test_outputs = model(X_test)
        test_loss = criterion(test_outputs, y_test).item()
        test_losses.append(test_loss)
        
        if epoch % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')
    
    return train_losses, test_losses

floor_dfs = [dfe0, dfe1, dfe2, dfe3]
for i, df in enumerate(floor_dfs, start=0):
    # Prepare data for the current floor
    X_train, y_train, X_test, y_test = prepare_data(df)
    
    # Define the LSTM model
    input_size = X_train.shape[2]  # shape of D, input data
    hidden_size = 64
    layer_size = 1
    output_size = 1
    model = LSTM(input_size, hidden_size, layer_size, output_size)
    
    # Train the model for the current floor
    learning_rate = 0.001
    epochs = 2000
    train_losses, test_losses = train(model, learning_rate, X_train, y_train, X_test, y_test, epochs=epochs)
    
    # Save the trained model and losses
    torch.save(model.state_dict(), f'floor{i}_lstm_model.pth')
    with open(f'floor{i}_losses.txt', 'w') as file:
        file.write("Train Losses:\n")
        for loss in train_losses:
            file.write(f"{loss}\n")
        file.write("\nTest Losses:\n")
        for loss in test_losses:
            file.write(f"{loss}\n")
    
    print(f'Model for floor {i} trained, saved, and losses recorded.')

Epoch [1/2000], Train Loss: 501.0281, Test Loss: 651.7833
Epoch [101/2000], Train Loss: 412.6010, Test Loss: 493.1356
Epoch [201/2000], Train Loss: 258.9297, Test Loss: 298.0696
Epoch [301/2000], Train Loss: 144.0245, Test Loss: 175.7701
Epoch [401/2000], Train Loss: 74.1599, Test Loss: 101.6668
Epoch [501/2000], Train Loss: 36.3441, Test Loss: 59.0375
Epoch [601/2000], Train Loss: 17.5440, Test Loss: 34.5547
Epoch [701/2000], Train Loss: 8.8028, Test Loss: 20.4414
Epoch [801/2000], Train Loss: 4.8764, Test Loss: 12.3834
Epoch [901/2000], Train Loss: 3.1174, Test Loss: 7.8671
Epoch [1001/2000], Train Loss: 2.2529, Test Loss: 5.3738
Epoch [1101/2000], Train Loss: 1.7967, Test Loss: 3.9616
Epoch [1201/2000], Train Loss: 1.5490, Test Loss: 3.1558
Epoch [1301/2000], Train Loss: 1.4068, Test Loss: 2.6777
Epoch [1401/2000], Train Loss: 1.3159, Test Loss: 2.3740
Epoch [1501/2000], Train Loss: 1.2504, Test Loss: 2.1607
Epoch [1601/2000], Train Loss: 1.1991, Test Loss: 2.0074
Epoch [1701/2000],

In [17]:
# List of dataframes
dataframes = [dfe0, dfe1, dfe2, dfe3]

# Process each dataframe and print the shape
input_data_list = []
for i, df in enumerate(dataframes):
    input_data = df.drop(["tmp(t)"], axis=1)
    input_data_list.append(input_data)
    print(f"Shape of input_data{i+1}: {input_data.shape}") # depending of number of rooms in each floor

Shape of input_data1: (41833, 29)
Shape of input_data2: (54991, 31)
Shape of input_data3: (32910, 29)
Shape of input_data4: (53659, 31)


In [52]:
model_details = [
    {"path": "floor0_lstm_model.pth", "input_size": 29},
    {"path": "floor1_lstm_model.pth", "input_size": 31},
    {"path": "floor2_lstm_model.pth", "input_size": 29},
    {"path": "floor3_lstm_model.pth", "input_size": 31},
]


models = []
for details in model_details:
    path = details["path"]
    input_size = details["input_size"]
    hidden_size = 64
    num_layers = 1
    output_size = 1

    # Instantiate and load the model
    model = LSTM(input_size, hidden_size, num_layers, output_size)
    model.load_state_dict(torch.load(path))
    model.eval()
    models.append(model)

model1, model2, model3, model4 = models[:4]

In [53]:
for i, df in enumerate(dataframes, start=0):
    # Load losses from the file
    with open(f'floor{i}_losses.txt', 'r') as file:
        lines = file.readlines()
    
    
    train_losses = []
    test_losses = []
    is_test_loss = False
    
    for line in lines:
        if line.strip() == 'Train Losses:':
            is_test_loss = False
            continue
        elif line.strip() == 'Test Losses:':
            is_test_loss = True
            continue
        elif line.strip() == '':
            continue
        
        if not is_test_loss:
            train_losses.append(float(line.strip()))
        else:
            test_losses.append(float(line.strip()))
        
    epochs = range(1, len(train_losses) + 1)
    loss_data = {'Epochs': epochs, 'Train Loss': train_losses, 'Test Loss': test_losses}
    loss_df = pd.DataFrame(loss_data)
    
    # Plot using Plotly Express
    fig = px.line(loss_df, x='Epochs', y=['Train Loss', 'Test Loss'], title=f'Loss for Floor {i}')
    fig.update_layout(yaxis_title="Loss")
    fig.show()

In [57]:
dfe_next_point0 = dfe_next_point0.drop("tmp(t)", axis=1)
dfe_next_point1 = dfe_next_point1.drop("tmp(t)", axis=1)
dfe_next_point2 = dfe_next_point2.drop("tmp(t)", axis=1)
dfe_next_point3 = dfe_next_point3.drop("tmp(t)", axis=1)

In [58]:
dfe_next_point_test0 = torch.from_numpy(dfe_next_point0.values.astype(np.float32))
dfe_next_point_test1 = torch.from_numpy(dfe_next_point1.values.astype(np.float32))
dfe_next_point_test2 = torch.from_numpy(dfe_next_point2.values.astype(np.float32))
dfe_next_point_test3 = torch.from_numpy(dfe_next_point3.values.astype(np.float32))

In [78]:
y_pred0 = model1(dfe_next_point_test0)
y_pred1 = model2(dfe_next_point_test1)
y_pred2 = model3(dfe_next_point_test2)
y_pred3 = model4(dfe_next_point_test3)

for i in range(4):
    y_pred = locals()[f"y_pred{i}"]  
    temperature_forecast = y_pred.item()
    
    print(f'Temperature forecast of the next datapoint for floor {i}: {round(temperature_forecast, 2)}°C')


Temperature forecast of the next datapoint for floor 0: 13.03°C
Temperature forecast of the next datapoint for floor 1: 9.68°C
Temperature forecast of the next datapoint for floor 2: 5.81°C
Temperature forecast of the next datapoint for floor 3: 8.59°C
