## Auslesen der Dateien ##

Achtung: Die Dateien aus dem ersten Verzeichnis (hier D:/hka-aqm-m) müssen extrahiert vorliegen (d.h. nicht als zip-Ordner).

hka-aqm-m

            > ... .dat

            > ... .dat

In [1]:
from ML_Preparation.Preprocessing_M import DataExtractor, DataPreprocessing

de = DataExtractor("D:/hka-aqm-m", "C:/Users/danie/building_M_data")

df = de.create_df()

No .dat files found in C:/Users/danie/building_M_data. 
 Trying to extract files from the original directory D:/hka-aqm-m
Read data successfully.
Data contains 979166 data points and 18 columns.


## Preprocessing ##

df_preprocessed := Datensatz mit Wetterdaten und Jahreszeiten

df_preprocessed_without_weather := Datensatz ohne Wetterdaten, aber mit Jahreszeiten

df_preprocessed_without_seasons := Datensatz mit Wetterdaten, aber ohne Jahreszeiten

In [2]:
label = "CO2"

dp = DataPreprocessing(label = label)

df_preprocessed = dp.preprocess_df(df, rolling_window = "3d", sample_time = "1d")

Number of outliers detected: -73438
Number of normal samples detected: 905728


In [3]:
df_preprocessed.columns

Index(['date_time', 'tmp', 'hum', 'CO2', 'VOC', 'vis', 'IR', 'BLE', 'tavg',
       'tmin', 'tmax', 'wdir', 'wspd', 'wpgt', 'pres', 'room_number',
       'tmp_diff', 'hum_diff', 'VOC_diff', 'vis_diff', 'IR_diff', 'BLE_diff',
       'tavg_diff', 'tmin_diff', 'tmax_diff', 'wdir_diff', 'wspd_diff',
       'wpgt_diff', 'pres_diff', 'year', 'dayofweek', 'hour', 'season',
       'VOC_CO2_ratio'],
      dtype='object')

In [4]:
df_preprocessed_without_weather = df_preprocessed.drop(columns = ['tavg',
       'tmin', 'tmax', 'wdir', 'wspd', 'wpgt', 'pres',  'tavg_diff', 'tmin_diff', 'tmax_diff', 'wdir_diff', 'wspd_diff',
       'wpgt_diff', 'pres_diff'], axis = 1)

df_preprocessed_without_seasons = df_preprocessed.drop(columns = ["season"], axis = 1)

## Feature Engineering ##

In [5]:
from ML_Preparation.Feature_Engineering import *

n_steps = 7

fe = FeatureEngineering(df_preprocessed,
                        label = label, 
                        categorical_features = ["season", "room_number", "dayofweek"],
                        automated_feature_engineering = False)

X_train, X_val, X_test, y_train, y_val, y_test = fe.feature_engineering(steps_to_forecast = n_steps, skip_scale = True)

In [6]:
fe_no_weather = FeatureEngineering(df_preprocessed_without_weather,
                        label = label, 
                        categorical_features = ["season", "room_number", "dayofweek"],
                        automated_feature_engineering = False)

X_train_nowe, X_val_nowe, X_test_nowe, y_train_nowe, y_val_nowe, y_test_nowe = fe_no_weather.feature_engineering(steps_to_forecast = n_steps, skip_scale = True)

fe_no_seasons = FeatureEngineering(df_preprocessed_without_seasons,
                        label = label, 
                        categorical_features = ["room_number", "dayofweek"],
                        automated_feature_engineering = False)

X_train_nose, X_val_nose, X_test_nose, y_train_nose, y_val_nose, y_test_nose = fe_no_seasons.feature_engineering(steps_to_forecast = n_steps, skip_scale = True)

## Modelltraining ##

In [7]:
from Forecasting_Evaluation.Forecasting_Models import CO2_Forecasting_Model
from matplotlib import pyplot

trainierte Modelle laden

In [8]:
import tensorflow as tf

train_models = False

try:
    model = tf.keras.models.load_model(f"{label}_Forecasting_Model.keras")
    model_no_weather = tf.keras.models.load_model(f"{label}_Forecasting_Model_no_weather.keras")
    model_no_seasons = tf.keras.models.load_model(f"{label}_Forecasting_Model_no_seasons.keras")
except:
    print("Models couldn't be loaded. Train new models.")
    train_models = True

In [9]:
if train_models:    
    # Instantiate the model
    model = CO2_Forecasting_Model(n_steps, fe)

    history = model.model.fit(fe.X_train, fe.y_train, epochs=50, batch_size = 7, 
                                validation_data = (fe.X_val, fe.y_val),
                                verbose = 0, 
                                shuffle=False
                                )

    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history["val_loss"], label = "val")
    pyplot.title("Training mit Jahreszeiten und Wetterdaten")
    pyplot.legend()
    pyplot.show()



    model_no_weather = CO2_Forecasting_Model(n_steps, fe_no_weather)

    history = model_no_weather.model.fit(fe_no_weather.X_train, fe_no_weather.y_train, epochs=50, batch_size=7, 
                                        validation_data = (fe_no_weather.X_val, fe_no_weather.y_val),
                                        verbose = 0, 
                                        shuffle=False
                                        )

    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history["val_loss"], label = "val")
    pyplot.title("Training mit Jahreszeiten und OHNE Wetterdaten")
    pyplot.legend()
    pyplot.show()

    model_no_seasons = CO2_Forecasting_Model(n_steps, fe_no_seasons)

    history = model_no_seasons.model.fit(fe_no_seasons.X_train, fe_no_seasons.y_train, epochs=50, batch_size = 7, 
                                        validation_data = (fe_no_seasons.X_val, fe_no_seasons.y_val),
                                        verbose = 0, 
                                        shuffle=False
                                    )

    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history["val_loss"], label = "val")
    pyplot.title("Training OHNE Jahreszeiten und mit Wetterdaten")
    pyplot.legend()
    pyplot.show()

In [10]:
if train_models:
    model.model.save(f"{label}_Forecasting_Model.keras")
    model_no_weather.model.save(f"{label}_Forecasting_Model_no_weather.keras")
    model_no_seasons.model.save(f"{label}_Forecasting_Model_no_seasons.keras")

    pred = model.model.predict(fe.X_test)
    pred_no_weather = model_no_weather.model.predict(fe_no_weather.X_test)
    pred_no_seasons = model_no_seasons.model.predict(fe_no_seasons.X_test)
else:
    pred = model.predict(fe.X_test)
    pred_no_weather = model_no_weather.predict(fe_no_weather.X_test)
    pred_no_seasons = model_no_seasons.predict(fe_no_seasons.X_test)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step 
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step 


## Modellevaluation ##

In [11]:
import plotly.express as px
import seaborn as sns
from Forecasting_Evaluation.Evaluator import Evaluator


ev = Evaluator()

for col in range(0, fe.y_test.shape[1]):
    print(f"t +", col)
    print("Modell mit Wetterdaten und Jahreszeiten (rot)")
    ev.evaluate(y_true = fe.y_test[:, col], y_pred = pred[:, col])
    print("Modell ohne Wetterdaten und mit Jahreszeiten (grün)")
    ev.evaluate(y_true = fe_no_weather.y_test[:, col], y_pred = pred_no_weather[:, col])
    print("Modell mit Wetterdaten und ohne Jahreszeiten (violett)")
    ev.evaluate(y_true = fe_no_seasons.y_test[:, col], y_pred = pred_no_seasons[:, col])

    px.line(y = [fe.y_test[:, col], pred[:, col], pred_no_weather[:,col], pred_no_seasons[:, col]], 
             labels = {"wide_variable_0": "y_true",
                        "wide_variable_1": "Modell 1 - Vorhersage (Wetterdaten + Jahreszeiten)", 
                       "wide_variable_2": "Modell 2 - Vorhersage (Jahreszeiten)",
                       "wide_variable_3": "Modell 3 - Vorhersage (Wetterdaten)", },
             title = f"Prediction for CO2(t + {col})  blue = y_true, red = y_pred").show()


t + 0
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6742.47
RMSE:  82.11
MAPE:  0.13
R2:  -0.36


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5633.44
RMSE:  75.06
MAPE:  0.12
R2:  -0.14


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5729.6
RMSE:  75.69
MAPE:  0.11
R2:  -0.16




t + 1
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6376.19
RMSE:  79.85
MAPE:  0.12
R2:  -0.29


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5467.24
RMSE:  73.94
MAPE:  0.12
R2:  -0.11


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5437.65
RMSE:  73.74
MAPE:  0.1
R2:  -0.1




t + 2
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6791.82
RMSE:  82.41
MAPE:  0.13
R2:  -0.38


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5556.7
RMSE:  74.54
MAPE:  0.12
R2:  -0.13


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5771.14
RMSE:  75.97
MAPE:  0.11
R2:  -0.17




t + 3
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6755.86
RMSE:  82.19
MAPE:  0.13
R2:  -0.37


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5515.49
RMSE:  74.27
MAPE:  0.12
R2:  -0.11


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5554.56
RMSE:  74.53
MAPE:  0.1
R2:  -0.12




t + 4
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6461.25
RMSE:  80.38
MAPE:  0.13
R2:  -0.31


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5463.03
RMSE:  73.91
MAPE:  0.12
R2:  -0.1


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5219.51
RMSE:  72.25
MAPE:  0.1
R2:  -0.06




t + 5
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6629.69
RMSE:  81.42
MAPE:  0.13
R2:  -0.34


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5582.74
RMSE:  74.72
MAPE:  0.12
R2:  -0.13


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5643.94
RMSE:  75.13
MAPE:  0.1
R2:  -0.14




t + 6
Modell mit Wetterdaten und Jahreszeiten (rot)
MSE:  6639.14
RMSE:  81.48
MAPE:  0.13
R2:  -0.34


Modell ohne Wetterdaten und mit Jahreszeiten (grün)
MSE:  5531.38
RMSE:  74.37
MAPE:  0.12
R2:  -0.12


Modell mit Wetterdaten und ohne Jahreszeiten (violett)
MSE:  5457.31
RMSE:  73.87
MAPE:  0.1
R2:  -0.1




## Deployment bzw. Inferenz mit dem trainierten Modell ##

In [12]:
from Deployment.ML_Deployment import *

fe.df = df.copy()

deployer = Predictor(data = df.head(10000), feature_engineering_class_object = fe, label = "CO2", is_forecast = True, roll = True, steps_to_forecast = 7)


In [13]:
deployer.df.shape

(109, 33)

In [14]:
deployer.x.shape

(109, 1, 68)

In [15]:
if train_models:
    forecasted_pred = deployer.predict(x = deployer.x, model = model.model)
else:
    forecasted_pred = deployer.predict(x = deployer.x, model = model)

forecasted_pred

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


Unnamed: 0_level_0,prediction_t+0,prediction_t+1,prediction_t+2,prediction_t+3,prediction_t+4,prediction_t+5,prediction_t+6,room_number
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-06-01,555.215149,554.385498,555.717346,552.324036,551.620850,552.809875,550.851685,m102(t-1)
2022-06-02,491.909088,489.172333,492.422150,491.113129,490.186310,490.567749,494.055054,m102(t-1)
2022-06-03,550.781128,550.335693,552.234680,548.631775,547.352661,549.444397,546.895874,m102(t-1)
2022-06-04,523.866516,521.641113,523.778992,522.481995,520.589294,524.216431,524.368164,m102(t-1)
2022-07-14,470.117035,467.415741,470.707764,467.933838,468.811768,467.768524,472.132324,m105(t-1)
...,...,...,...,...,...,...,...,...
2023-01-25,524.338989,522.166199,524.335388,522.949219,521.075134,524.607910,524.740784,m001(t-1)
2023-01-26,534.127747,533.128784,535.708618,532.503113,531.082947,532.800842,532.304321,m002(t-1)
2023-01-27,521.197754,518.722534,521.489075,519.760620,518.038940,520.798706,521.832947,m003(t-1)
2023-01-31,524.001648,521.991089,524.175598,522.658752,520.839844,524.152893,524.205078,m207(t-1)
