In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import plotly_express as px
from sklearn.preprocessing import StandardScaler



from preprocessing import *
from feature_engineering import *

import warnings
warnings.filterwarnings('ignore')
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [2]:
# from preprocessing import *

# extract_data = DataExtractor("D:\Test\IoT\hka-aqm-e", "D:\Test\IoT\historical_data")
# data = extract_data.create_df()

# preprocessing = DataPreprocessing(get_outliers_out=True, roll=True)
# df_csv = preprocessing.preprocess_df(data, "60min", "60min")
# df_csv.to_csv("E-Building_Data.csv", index=False)

In [3]:
df = pd.read_csv("E-Building_Data.csv")
df['date_time'] = pd.to_datetime(df['date_time'])

In [4]:
fe = FeatureEngineering(df) # helper class for feature enginerring
df = fe.feature_engineering(n=2, categorical_features=["season"])

In [5]:
# lat, long for weather station in karlsruhe
latitude = 49.0069
longitude = 8.4037
start_date = datetime(2022, 6, 2)
end_date = datetime(2023, 9, 30)
wf = WeatherFetcher(latitude, longitude, start_date, end_date) # helper class to fetch weather
df = wf.combine_weather(df)

In [6]:
df = fe.filter_rooms_by_prefix() # 1 floor = 1 model
dfe0 = df['e0']
dfe1 = df['e1']
dfe2 = df['e2']
dfe3 = df['e3']

In [7]:
dfe0_plot = dfe0.copy(deep=True)
dfe1_plot = dfe1.copy(deep=True)
dfe2_plot = dfe2.copy(deep=True)
dfe3_plot = dfe3.copy(deep=True)

In [8]:
floor_dfs = [dfe0, dfe1, dfe2, dfe3]
for i, df in enumerate(floor_dfs):
        df.drop(['room_number', 'hum', 'CO2', 'vis', 'tmp_diff', 'hum_diff',
       'CO2_diff', 'vis_diff', 'year', 'month', 'dayofweek', 'hour',
       'VOC_CO2_ratio'], axis=1, inplace=True)
        df.dropna(inplace=True)
        print(f'Dropped columns from dataframe {i} and dropped NaN rows') # NaN are just the first 2 rows due created tmp shifts



Dropped columns from dataframe 0 and dropped NaN rows
Dropped columns from dataframe 1 and dropped NaN rows
Dropped columns from dataframe 2 and dropped NaN rows
Dropped columns from dataframe 3 and dropped NaN rows


In [9]:
class LSTM(nn.Module):
  def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
    super(LSTM, self).__init__()
    self.M = hidden_dim
    self.L = layer_dim

    self.lstm = nn.LSTM(
        input_size=input_dim,
        hidden_size=hidden_dim,
        num_layers=layer_dim,
        batch_first=True)
    
    # Dropout layer
    self.dropout = nn.Dropout(p=0.5)
    
    #batch_first to have (batch_dim, seq_dim, feature_dim)
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, X):
    # initial hidden state and cell state
    h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
    c0 = torch.zeros(self.L, X.size(0), self.M).to(device)

    out, (hn, cn) = self.lstm(X, (h0.detach(), c0.detach()))

    # h(T) at the final time step
    out = self.fc(out[:, -1, :])
    return out

In [10]:
# Data preparation function
def prepare_data(df):
    input_data = df.drop(['date_time', "tmp"], axis=1).values
    targets = df['tmp'].values
    T = 1  # Number of timesteps to look while predicting
    D = input_data.shape[1]  # Dimensionality of the input
    N = len(input_data) - T

    # Train size: 80% of the total data size
    train_size = int(len(input_data) * 0.80)

    # Normalization of the inputs
    scaler = StandardScaler()
    scaler.fit(input_data[:train_size + T - 1])
    input_data = scaler.transform(input_data)

    # Preparing X_train and y_train
    X_train = np.zeros((train_size, T, D))
    y_train = np.zeros((train_size, 1))

    for t in range(train_size):
        X_train[t, :, :] = input_data[t:t+T]
        y_train[t] = (targets[t+T])

    # Preparing X_test and y_test
    X_test = np.zeros((N - train_size, T, D))
    y_test = np.zeros((N - train_size, 1))

    for i in range(N - train_size):
        t = i + train_size
        X_test[i, :, :] = input_data[t:t+T]
        y_test[i] = (targets[t+T])

    # Convert to torch tensors
    X_train = torch.from_numpy(X_train.astype(np.float32))
    y_train = torch.from_numpy(y_train.astype(np.float32))
    X_test = torch.from_numpy(X_test.astype(np.float32))
    y_test = torch.from_numpy(y_test.astype(np.float32))

    return X_train, y_train, X_test, y_test

In [11]:
def train(model, learning_rate, X_train, y_train, X_test, y_test, epochs=100):
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    train_losses = []
    test_losses = []
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Compute train loss
        train_loss = loss.item()
        train_losses.append(train_loss)
        
        # Compute test loss
        model.eval()
        test_outputs = model(X_test)
        test_loss = criterion(test_outputs, y_test).item()
        test_losses.append(test_loss)
        
        if epoch % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')
    
    return train_losses, test_losses

for i, df in enumerate(floor_dfs, start=0):
    # Prepare data for the current floor
    X_train, y_train, X_test, y_test = prepare_data(df)
    
    # Define the LSTM model
    input_size = X_train.shape[2]  # shape of D, input data
    hidden_size = 64
    layer_size = 1
    output_size = 1
    model = LSTM(input_size, hidden_size, layer_size, output_size)
    
    # Train the model for the current floor
    learning_rate = 0.001
    epochs = 2000
    train_losses, test_losses = train(model, learning_rate, X_train, y_train, X_test, y_test, epochs=epochs)
    
    # Save the trained model and losses
    torch.save(model.state_dict(), f'floor{i}_lstm_model.pth')
    with open(f'floor{i}_losses.txt', 'w') as file:
        file.write("Train Losses:\n")
        for loss in train_losses:
            file.write(f"{loss}\n")
        file.write("\nTest Losses:\n")
        for loss in test_losses:
            file.write(f"{loss}\n")
    
    print(f'Model for floor {i} trained, saved, and losses recorded.')

Epoch [1/2000], Train Loss: 501.6827, Test Loss: 650.3195
Epoch [101/2000], Train Loss: 439.6642, Test Loss: 519.5189
Epoch [201/2000], Train Loss: 282.7224, Test Loss: 302.1882
Epoch [301/2000], Train Loss: 148.9744, Test Loss: 162.9556
Epoch [401/2000], Train Loss: 71.2825, Test Loss: 85.9070
Epoch [501/2000], Train Loss: 33.1899, Test Loss: 45.1019
Epoch [601/2000], Train Loss: 15.6570, Test Loss: 24.1936
Epoch [701/2000], Train Loss: 7.6761, Test Loss: 13.5000
Epoch [801/2000], Train Loss: 4.0961, Test Loss: 7.9994
Epoch [901/2000], Train Loss: 2.5899, Test Loss: 5.1681
Epoch [1001/2000], Train Loss: 1.9817, Test Loss: 3.7346
Epoch [1101/2000], Train Loss: 1.7215, Test Loss: 2.9709
Epoch [1201/2000], Train Loss: 1.5903, Test Loss: 2.5243
Epoch [1301/2000], Train Loss: 1.5110, Test Loss: 2.2578
Epoch [1401/2000], Train Loss: 1.4497, Test Loss: 2.0635
Epoch [1501/2000], Train Loss: 1.3913, Test Loss: 1.8991
Epoch [1601/2000], Train Loss: 1.3390, Test Loss: 1.7747
Epoch [1701/2000], T

In [12]:
# Example list of model paths
model_paths = ["floor0_lstm_model.pth", "floor1_lstm_model.pth", "floor2_lstm_model.pth", "floor3_lstm_model.pth"]

# Loop through each model path and load the corresponding model
models = []
for path in model_paths:
    # Initialize model parameters
    input_size = X_train.shape[2]  # D, assuming X_train is defined and loaded
    hidden_size = 64
    layer_size = 1
    output_size = 1

    # Instantiate and load the model
    model = LSTM(input_size, hidden_size, layer_size, output_size)
    model.load_state_dict(torch.load(path))
    model.eval()
    models.append(model)

In [13]:
model1 = models[0] # floor 0
model2 = models[1] # floor 1
model3 = models[2] # floor 2
model4 = models[3] # floor 3

In [22]:
# Loop over each floor
for i, df in enumerate(floor_dfs, start=0):
    # Load losses from the file
    with open(f'floor{i}_losses.txt', 'r') as file:
        lines = file.readlines()
    
    # Extract train and test losses
    train_losses = []
    test_losses = []
    is_test_loss = False
    
    for line in lines:
        if line.strip() == 'Train Losses:':
            is_test_loss = False
            continue
        elif line.strip() == 'Test Losses:':
            is_test_loss = True
            continue
        elif line.strip() == '':
            continue
        
        if not is_test_loss:
            train_losses.append(float(line.strip()))
        else:
            test_losses.append(float(line.strip()))
    
    
    # Create a DataFrame for losses
    epochs = range(1, len(train_losses) + 1)
    loss_data = {'Epochs': epochs, 'Train Loss': train_losses, 'Test Loss': test_losses}
    loss_df = pd.DataFrame(loss_data)
    
    # Plot using Plotly Express
    fig = px.line(loss_df, x='Epochs', y=['Train Loss', 'Test Loss'], title=f'Loss for Floor {i}')
    fig.update_layout(yaxis_title="Loss")
    fig.show()

In [14]:
from sklearn.metrics import mean_squared_error

models = [model1, model2, model3, model4]  

for i, model in enumerate(models):
    train_rmse = mean_squared_error(model(X_train).detach().numpy(), y_train.numpy())
    test_rmse = mean_squared_error(model(X_test).detach().numpy(), y_test.numpy())
    
    print(f"Floor {i}: RMSE for trainset: {train_rmse}")
    print(f"Floor {i}: RMSE for testset: {test_rmse}")
    print()


Floor 0: RMSE for trainset: 2.950685739517212
Floor 0: RMSE for testset: 5.1349687576293945

Floor 1: RMSE for trainset: 2.2179911136627197
Floor 1: RMSE for testset: 4.399012565612793

Floor 2: RMSE for trainset: 2.8992397785186768
Floor 2: RMSE for testset: 3.5575618743896484

Floor 3: RMSE for trainset: 1.391347885131836
Floor 3: RMSE for testset: 2.497856616973877

