In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import os
current_dir = os.getcwd()

if "notebook" in current_dir:
    current_dir = current_dir.split("notebook")[0]
    os.chdir(current_dir)

# Change the current working directory
print(current_dir)

/Users/syntychefabien/Documents/Syntyche/Isheero/BootCamp_ATUT2023/PowerForecast_temp/


## Chargement les données

In [3]:
from src import data_gathering, data_analysis, data_featuring

# Charger les données à partir du fichier csv
data_filename = "data/PowerConsumptionTetouan/Tetuan_City_power_consumption.csv"
df_dataset = data_gathering.load_dataset(data_filename, col_sep=",", b_rename_cols=True)

NameError: name 'data_gathering' is not defined

## Définition des paramètres

In [None]:
# Paramètres des entrées et sortie
feat_name = "Consumption_Z3"

# Paramètres pour la création des bases de test et d'apprentissage
train_ratio = 0.8
win_size_hours = 1
time_sampling_mins = 10
b_scaler = True
scaler_str = "std"

## Création des bases d'apprentissage et de test

#### Séparation temporelles des bases d'apprentissage et de test

#### Création des séquences temporelles 
Il faudra structurer les données en séquences temporelles afin de prendre en compte un historique

In [None]:
X_train, y_train, idx_train, X_test, y_test, idx_test, scaler = \
training.build_train_test_univariate_sequences(df_data=df_dataset,
                                               feat_name=feat_name,
                                               win_size_hours=win_size_hours,
                                               time_sampling_mins=time_sampling_mins,
                                               train_ratio=train_ratio,
                                               scaler_str=scaler_str)

## Prédiction de la consommation

### Entrainement du modèle

In [None]:
import os
from datetime import datetime
import pickle

# Paramètres d'apprentissage
d_learning_params = dict()
d_learning_params['batch_size'] = 256
d_learning_params['epochs'] = 50
d_learning_params['activation_fn'] = "relu"
d_learning_params['loss_fn'] = "mse"
d_learning_params['optimizer'] = "adam"
d_learning_params['val_ratio'] = 0.2
d_learning_params['win_size_hours'] = win_size_hours
d_learning_params['time_sampling_mins'] = time_sampling_mins
d_learning_params['scaler'] = scaler
d_learning_params['feat_names'] = feat_name
d_learning_params['target_names'] = feat_name
d_learning_params['metrics'] = ['mae']
d_learning_params['d_train_test'] = {}
d_learning_params['seq_train_test'] = {"train":{"features":X_train, "targets":y_train, "targets_idx": idx_train},
                                       "test":{"features":X_test, "targets":y_test, "targets_idx": idx_test}}
d_learning_params['layers'] = [100, 50, 10]

# Récupérer la date et l'heure pour la création du fichier du modèle
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = os.path.join("models", "Univariate_LSTM_{}h".format(win_size_hours), feat_name, current_datetime)
params_file = os.path.join(results_dir, "lstm_params_{}.pkl".format(str(current_datetime)))

# Création du répertoire de sauvegarde des résultats s'il n'existe pas déjà
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# Sauvergarde des paramètres d'appentissag
with open(params_file, 'wb') as fp:
    pickle.dump(d_learning_params, fp)

model_lstm, scores = training.train_lstm_univariate(X_train, y_train,
                                         X_test, y_test, 
                                         d_learning_params,
                                         results_dir)

# Sauvegarder les scores de ce modèle
d_scores = d_learning_params.copy()
d_scores["scores"] = scores
scores_file = os.path.join(results_dir, "lstm_scores_{}.pkl".format(str(current_datetime)))

print("Performance du modèle")
print("\t --> Score R2 (Le modèle parfait a un score de 1)")
print("\t \t --> Ensemble d'apprentissage : {}".format(scores['R2']['train']))
print("\t \t --> Ensemble de test : {}".format(scores['R2']['test']))
print("\n")
print("\t --> MAE (Erreur moyenne absolue) en KW")
print("\t \t --> Ensemble d'apprentissage : {}".format(scores['MAE']['train']))
print("\t \t --> Ensemble de test : {}".format(scores['MAE']['test']))

# Sauvergarde des paramètres d'appentissag
with open(scores_file, 'wb') as fp:
    pickle.dump(d_scores, fp)

### Affichage des prédictions 

In [None]:
from src.prediction import display_prediction

# Calculer les prédictions
# Attention à appliquer la normalisation inverse pour retrouver l'échelle initiale
pred_col_name = ["prediction"]
df_test = pd.DataFrame(data=d_learning_params['scaler'].inverse_transform(y_test), columns=[feat_name],index=idx_test)
df_test[pred_col_name[0]] = d_learning_params['scaler'].inverse_transform(model_lstm.predict(X_test))

# Sauvegarde des résultats
df_test.to_pickle(os.path.join(results_dir, "lstm_predictions_{}_{}.pkl".format(feat_name, str(current_datetime))))

# Afficher les résultats
display_prediction(df_test, target_col_name=feat_name,
                   pred_col_name=pred_col_name, figsize=(14, 8))


In [None]:
df_test["2017-11-20":"2017-11-30"].plot()