## Auslesen der Dateien ##

Achtung: Die Dateien aus dem ersten Verzeichnis (hier D:/hka-aqm-m) müssen extrahiert vorliegen (d.h. nicht als zip-Ordner).

hka-aqm-m

            > ... .dat

            > ... .dat

In [1]:
from ML_Preparation.Preprocessing_M import DataExtractor, DataPreprocessing

de = DataExtractor("D:/hka-aqm-m", "C:/Users/danie/building_M_data")

df = de.create_df()

No .dat files found in C:/Users/danie/building_M_data. 
 Trying to extract files from the original directory D:/hka-aqm-m
Read data successfully.
Data contains 979166 data points and 18 columns.


## Preprocessing ##

df_preprocessed := Datensatz mit Wetterdaten und Jahreszeiten

df_preprocessed_without_weather := Datensatz ohne Wetterdaten, aber mit Jahreszeiten

df_preprocessed_without_seasons := Datensatz mit Wetterdaten, aber ohne Jahreszeiten

In [2]:
label = "CO2"

dp = DataPreprocessing(label = label)

df_preprocessed = dp.preprocess_df(df, rolling_window = "3d", sample_time = "1d")

Number of outliers detected: -73438
Number of normal samples detected: 905728


In [3]:
df_preprocessed.columns

Index(['date_time', 'tmp', 'hum', 'CO2', 'VOC', 'vis', 'IR', 'BLE', 'tavg',
       'tmin', 'tmax', 'wdir', 'wspd', 'wpgt', 'pres', 'room_number',
       'tmp_diff', 'hum_diff', 'VOC_diff', 'vis_diff', 'IR_diff', 'BLE_diff',
       'tavg_diff', 'tmin_diff', 'tmax_diff', 'wdir_diff', 'wspd_diff',
       'wpgt_diff', 'pres_diff', 'year', 'dayofweek', 'hour', 'season',
       'VOC_CO2_ratio'],
      dtype='object')

In [4]:
df_preprocessed_without_weather = df_preprocessed.drop(columns = ['tavg',
       'tmin', 'tmax', 'wdir', 'wspd', 'wpgt', 'pres',  'tavg_diff', 'tmin_diff', 'tmax_diff', 'wdir_diff', 'wspd_diff',
       'wpgt_diff', 'pres_diff'], axis = 1)

df_preprocessed_without_seasons = df_preprocessed.drop(columns = ["season"], axis = 1)

## Feature Engineering ##

In [5]:
from ML_Preparation.Feature_Engineering import *

n_steps = 7

fe = FeatureEngineering(df_preprocessed,
                        label = label, 
                        categorical_features = ["season", "room_number", "dayofweek"],
                        automated_feature_engineering = False)

X_train, X_val, X_test, y_train, y_val, y_test = fe.feature_engineering(steps_to_forecast = n_steps, skip_scale = True)

In [6]:
fe_no_weather = FeatureEngineering(df_preprocessed_without_weather,
                        label = label, 
                        categorical_features = ["season", "room_number", "dayofweek"],
                        automated_feature_engineering = False)

X_train_nowe, X_val_nowe, X_test_nowe, y_train_nowe, y_val_nowe, y_test_nowe = fe_no_weather.feature_engineering(steps_to_forecast = n_steps, skip_scale = True)

fe_no_seasons = FeatureEngineering(df_preprocessed_without_seasons,
                        label = label, 
                        categorical_features = ["room_number", "dayofweek"],
                        automated_feature_engineering = False)

X_train_nose, X_val_nose, X_test_nose, y_train_nose, y_val_nose, y_test_nose = fe_no_seasons.feature_engineering(steps_to_forecast = n_steps, skip_scale = True)

## Modelltraining ##

In [7]:
from Forecasting_Evaluation.Forecasting_Models import CO2_Forecasting_Model
from matplotlib import pyplot

trainierte Modelle laden

In [8]:
import tensorflow as tf

train_models = False

try:
    model = tf.keras.models.load_model(f"{label}_Forecasting_Model.keras")
    model_no_weather = tf.keras.models.load_model(f"{label}_Forecasting_Model_no_weather.keras")
    model_no_seasons = tf.keras.models.load_model(f"{label}_Forecasting_Model_no_seasons.keras")
except:
    print("Models couldn't be loaded. Train new models.")
    train_models = True

In [9]:
if train_models:    
    # Instantiate the model
    model = CO2_Forecasting_Model(n_steps, fe)

    history = model.model.fit(fe.X_train, fe.y_train, epochs=50, batch_size = 7, 
                                validation_data = (fe.X_val, fe.y_val),
                                verbose = 0, 
                                shuffle=False
                                )

    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history["val_loss"], label = "val")
    pyplot.title("Training mit Jahreszeiten und Wetterdaten")
    pyplot.legend()
    pyplot.show()



    model_no_weather = CO2_Forecasting_Model(n_steps, fe_no_weather)

    history = model_no_weather.model.fit(fe_no_weather.X_train, fe_no_weather.y_train, epochs=50, batch_size=7, 
                                        validation_data = (fe_no_weather.X_val, fe_no_weather.y_val),
                                        verbose = 0, 
                                        shuffle=False
                                        )

    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history["val_loss"], label = "val")
    pyplot.title("Training mit Jahreszeiten und OHNE Wetterdaten")
    pyplot.legend()
    pyplot.show()

    model_no_seasons = CO2_Forecasting_Model(n_steps, fe_no_seasons)

    history = model_no_seasons.model.fit(fe_no_seasons.X_train, fe_no_seasons.y_train, epochs=50, batch_size = 7, 
                                        validation_data = (fe_no_seasons.X_val, fe_no_seasons.y_val),
                                        verbose = 0, 
                                        shuffle=False
                                    )

    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history["val_loss"], label = "val")
    pyplot.title("Training OHNE Jahreszeiten und mit Wetterdaten")
    pyplot.legend()
    pyplot.show()

In [10]:
if train_models:
    model.model.save(f"{label}_Forecasting_Model.keras")
    model_no_weather.model.save(f"{label}_Forecasting_Model_no_weather.keras")
    model_no_seasons.model.save(f"{label}_Forecasting_Model_no_seasons.keras")

    pred = model.model.predict(fe.X_test)
    pred_no_weather = model_no_weather.model.predict(fe_no_weather.X_test)
    pred_no_seasons = model_no_seasons.model.predict(fe_no_seasons.X_test)
else:
    pred = model.predict(fe.X_test)
    pred_no_weather = model_no_weather.predict(fe_no_weather.X_test)
    pred_no_seasons = model_no_seasons.predict(fe_no_seasons.X_test)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step 
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step 
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step 


## Modellevaluation ##

In [11]:
import plotly.express as px
import seaborn as sns
from Forecasting_Evaluation.Evaluator import Evaluator


ev = Evaluator()

for col in range(0, fe.y_test.shape[1]):
    print(f"t +", col)
    print("Modell mit Wetterdaten und Jahreszeiten (rot)")
    ev.evaluate(y_true = fe.y_test[:, col], y_pred = pred[:, col])
    print("Modell ohne Wetterdaten und mit Jahreszeiten (grün)")
    ev.evaluate(y_true = fe_no_weather.y_test[:, col], y_pred = pred_no_weather[:, col])
    print("Modell mit Wetterdaten und ohne Jahreszeiten (violett)")
    ev.evaluate(y_true = fe_no_seasons.y_test[:, col], y_pred = pred_no_seasons[:, col])

    px.line(y = [fe.y_test[:, col], pred[:, col], pred_no_weather[:,col], pred_no_seasons[:, col]], 
             labels = {"wide_variable_0": "y_true",
                        "wide_variable_1": "Modell 1 - Vorhersage (Wetterdaten + Jahreszeiten)", 
                       "wide_variable_2": "Modell 2 - Vorhersage (Jahreszeiten)",
                       "wide_variable_3": "Modell 3 - Vorhersage (Wetterdaten)", },
             title = f"Prediction for CO2(t + {col})  blue = y_true, red = y_pred").show()


t + 0
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  7056.92
RMSE:  84.01
MAPE:  0.13
R2:  -0.4


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5724.02
RMSE:  75.66
MAPE:  0.12
R2:  -0.13


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5780.98
RMSE:  76.03
MAPE:  0.1
R2:  -0.15




t + 1
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6679.44
RMSE:  81.73
MAPE:  0.13
R2:  -0.32


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5627.29
RMSE:  75.02
MAPE:  0.12
R2:  -0.12


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5574.21
RMSE:  74.66
MAPE:  0.1
R2:  -0.1




t + 2
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6776.44
RMSE:  82.32
MAPE:  0.13
R2:  -0.34


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5623.43
RMSE:  74.99
MAPE:  0.12
R2:  -0.11


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5686.14
RMSE:  75.41
MAPE:  0.11
R2:  -0.12




t + 3
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6755.78
RMSE:  82.19
MAPE:  0.13
R2:  -0.33


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5577.12
RMSE:  74.68
MAPE:  0.12
R2:  -0.1


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5624.51
RMSE:  75.0
MAPE:  0.1
R2:  -0.11




t + 4
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6687.11
RMSE:  81.77
MAPE:  0.13
R2:  -0.32


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5661.29
RMSE:  75.24
MAPE:  0.12
R2:  -0.12


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5539.63
RMSE:  74.43
MAPE:  0.1
R2:  -0.09




t + 5
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6752.46
RMSE:  82.17
MAPE:  0.13
R2:  -0.33


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5711.11
RMSE:  75.57
MAPE:  0.12
R2:  -0.13


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5666.26
RMSE:  75.27
MAPE:  0.1
R2:  -0.12




t + 6
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6788.77
RMSE:  82.39
MAPE:  0.13
R2:  -0.34


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5650.21
RMSE:  75.17
MAPE:  0.12
R2:  -0.12


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5767.97
RMSE:  75.95
MAPE:  0.11
R2:  -0.14




## Deployment bzw. Inferenz mit dem trainierten Modell ##

In [12]:
from Deployment.ML_Deployment import *

fe.df = df.copy()

deployer = Predictor(data = df.head(10000), feature_engineering_class_object = fe, label = "CO2", is_forecast = True, roll = True, steps_to_forecast = 7)


In [13]:
deployer.df.shape

(109, 33)

In [14]:
deployer.x.shape

(109, 1, 68)

In [15]:
if train_models:
    forecasted_pred = deployer.predict(x = deployer.x, model = model.model)
else:
    forecasted_pred = deployer.predict(x = deployer.x, model = model)

forecasted_pred

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


Unnamed: 0_level_0,prediction_t+0,prediction_t+1,prediction_t+2,prediction_t+3,prediction_t+4,prediction_t+5,prediction_t+6,room_number
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-06-01,553.691833,552.947144,554.454712,551.022339,550.150635,551.586731,549.504150,m102
2022-06-02,477.645233,474.418488,478.908325,476.208252,475.927887,475.533264,480.280151,m102
2022-06-03,554.099976,553.805176,555.401794,551.680847,550.570374,552.596680,549.683533,m102
2022-06-04,523.866516,521.641113,523.778992,522.481995,520.589294,524.216431,524.368164,m102
2022-07-14,470.117828,467.416534,470.708618,467.934723,468.812622,467.769318,472.133240,m105
...,...,...,...,...,...,...,...,...
2023-01-25,524.970703,522.868164,525.079224,523.573914,521.724670,525.131348,525.238892,m001
2023-01-26,534.126099,533.120239,535.709351,532.507874,531.082764,532.800598,532.313293,m002
2023-01-27,521.165649,518.687378,521.461487,519.727844,518.008240,520.757568,521.802429,m003
2023-01-31,524.054871,522.129150,524.331909,522.728394,520.938538,524.127869,524.140808,m207
