In [14]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, LSTM, Dense
from sklearn.metrics import mean_squared_error as mse
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

#We will use this function later to predict to plot the predictions alongside the labels
def plot_predictions1(model, X, y, start=0, end=100):
  predictions = model.predict(X).flatten()
  df = pd.DataFrame(data={'Predictions':predictions, 'Actuals':y})
  plt.plot(df['Predictions'][start:end])
  plt.plot(df['Actuals'][start:end])
  return df, mse(y, predictions)

In [2]:
data_csv = '../data/your_train.csv'

In [3]:
#Leemos el archivo, indicando que la primera columna son datos de fecha y hora
#le decimos que use esos timestamps como indice
df = pd.read_csv(data_csv, parse_dates=[0], index_col=0)
df

Unnamed: 0,HU_B01,HU_B09,HU_B11,HU_B15,HU_B16,HU_B19,HU_Load,IT_B01,IT_B09,IT_B11,...,SE_B12,SE_B16,SE_B19,SE_Load,NE_Load,Sin_Hour,Cos_Hour,Sin_DayOfYear,Cos_DayOfYear,label
2021-12-31 23:00:00,133.0,1.0,12.0,8.0,1.0,127.0,4255.0,678.0,640.0,2116.0,...,7016.0,1.0,4046.0,15332.0,10327.0,-0.258819,9.659258e-01,6.432491e-16,1.000000,3
2022-01-01 00:00:00,524.0,1.0,45.0,31.0,1.0,744.0,16458.0,684.0,641.0,1952.0,...,7087.0,1.0,4022.0,15332.0,40707.0,0.000000,1.000000e+00,1.721336e-02,0.999852,3
2022-01-01 01:00:00,517.0,1.0,45.0,29.0,1.0,903.0,15427.0,671.0,640.0,1832.0,...,7089.0,1.0,3949.0,15271.0,39466.0,0.258819,9.659258e-01,1.721336e-02,0.999852,3
2022-01-01 02:00:00,508.0,1.0,45.0,29.0,1.0,946.0,14782.0,657.0,649.0,1843.0,...,6747.0,1.0,3764.0,15151.0,38924.0,0.500000,8.660254e-01,1.721336e-02,0.999852,3
2022-01-01 03:00:00,507.0,1.0,45.0,29.0,1.0,674.0,14631.0,650.0,649.0,1819.0,...,7174.0,1.0,3598.0,15388.0,38212.0,0.707107,7.071068e-01,1.721336e-02,0.999852,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-01 18:00:00,452.0,1.0,37.0,48.0,1.0,118.0,18252.0,682.0,621.0,2477.0,...,6917.0,1.0,5800.0,16588.0,49163.0,-1.000000,-1.836970e-16,1.721336e-02,0.999852,3
2023-01-01 19:00:00,451.0,1.0,37.0,43.0,1.0,132.0,17874.0,682.0,622.0,1768.0,...,6585.0,1.0,5764.0,16146.0,47636.0,-0.965926,2.588190e-01,1.721336e-02,0.999852,3
2023-01-01 20:00:00,453.0,1.0,37.0,42.0,1.0,110.0,17142.0,685.0,622.0,1559.0,...,6437.0,1.0,5551.0,15870.0,45690.0,-0.866025,5.000000e-01,1.721336e-02,0.999852,3
2023-01-01 21:00:00,451.0,1.0,37.0,43.0,1.0,123.0,16260.0,688.0,622.0,1461.0,...,5829.0,1.0,5400.0,15144.0,43277.0,-0.707107,7.071068e-01,1.721336e-02,0.999852,3


In [4]:
#We input the number of regions we are going to focus on
num_regions=8
#We choose the parameters for our model
window_size=10
num_train = 300
num_train2 = num_train+300
num_val = 150
#num:test would be the last 20% of the year, to comply with the task
num_test = 300

In [5]:
def df_to_X_y(df, window_size=window_size):
    df_as_np = df.to_numpy()
    X = []
    y = []
    for i in range(len(df_as_np) - window_size):
        #We take every column except the last one, because its values are the labels
        row = [r[0:-1] for r in df_as_np[i:i+window_size]]
        X.append(row)
        label = df_as_np[i+window_size][-1]
        y.append(label)
    return np.array(X), np.array(y)

In [6]:
X, y = df_to_X_y(df)
X.shape, y.shape

((8771, 10, 51), (8771,))

In [7]:

#X_train and y_train have a size of "num_train"
X_train, y_train = X[:num_train], y[:num_train]
#X_val and y_val have a size of "num_val"
X_val, y_val = X[num_train:(num_train+num_val)], y[num_train:(num_train+num_val)]
#X_test and y_test have a size of "num_test"
X_test, y_test = X[(num_train+num_val):(num_train+num_val+num_test)], y[num_train:(num_train+num_test)]

X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((300, 10, 51), (300,), (150, 10, 51), (150,), (300, 10, 51), (300,))

In [8]:
#We are going to standardize our dataframes, so that the model can work with them better
df_training_mean = np.mean(X_train[:, :, 0])
df_training_std = np.std(X_train[:, :, 0])
                           
def preprocess(X):
  X[:, :, 0] = (X[:, :, 0] - df_training_mean) / df_training_std
  return X

In [9]:
#We preprocess all the dataframes
preprocess(X_train)
preprocess(X_val)
preprocess(X_test)

array([[[ 2.27479176e+00,  5.00000000e+00,  5.30000000e+01, ...,
         -2.58819045e-01,  3.21269662e-01,  9.46987753e-01],
        [ 2.24849623e+00,  5.00000000e+00,  5.40000000e+01, ...,
         -1.83697020e-16,  3.21269662e-01,  9.46987753e-01],
        [ 2.40626944e+00,  5.00000000e+00,  5.70000000e+01, ...,
          2.58819045e-01,  3.21269662e-01,  9.46987753e-01],
        ...,
        [ 2.66922481e+00,  5.00000000e+00,  5.30000000e+01, ...,
          1.00000000e+00,  3.37522900e-01,  9.41317318e-01],
        [ 2.69552034e+00,  5.00000000e+00,  5.30000000e+01, ...,
          9.65925826e-01,  3.37522900e-01,  9.41317318e-01],
        [ 2.82699802e+00,  5.00000000e+00,  5.30000000e+01, ...,
          8.66025404e-01,  3.37522900e-01,  9.41317318e-01]],

       [[ 2.24849623e+00,  5.00000000e+00,  5.40000000e+01, ...,
         -1.83697020e-16,  3.21269662e-01,  9.46987753e-01],
        [ 2.40626944e+00,  5.00000000e+00,  5.70000000e+01, ...,
          2.58819045e-01,  3.21269662e

In [10]:
#We create our model, adding the necessary layers

model1 = Sequential()
model1.add(InputLayer((window_size, X.shape[2])))
model1.add(LSTM(100))
model1.add(Dense(8, 'relu'))
model1.add(Dense(1, 'linear'))

model1.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100)               60800     
                                                                 
 dense (Dense)               (None, 8)                 808       
                                                                 
 dense_1 (Dense)             (None, 1)                 9         
                                                                 
Total params: 61617 (240.69 KB)
Trainable params: 61617 (240.69 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
cp4 = ModelCheckpoint('model/', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [12]:
model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=12, callbacks=[cp4])

Epoch 1/12



INFO:tensorflow:Assets written to: model\assets


Epoch 2/12


INFO:tensorflow:Assets written to: model\assets


Epoch 3/12


INFO:tensorflow:Assets written to: model\assets


Epoch 4/12


INFO:tensorflow:Assets written to: model\assets


Epoch 5/12


INFO:tensorflow:Assets written to: model\assets


Epoch 6/12


INFO:tensorflow:Assets written to: model\assets


Epoch 7/12


INFO:tensorflow:Assets written to: model\assets


Epoch 8/12


INFO:tensorflow:Assets written to: model\assets


Epoch 9/12


INFO:tensorflow:Assets written to: model\assets


Epoch 10/12


INFO:tensorflow:Assets written to: model\assets


Epoch 11/12


INFO:tensorflow:Assets written to: model\assets


Epoch 12/12


INFO:tensorflow:Assets written to: model\assets




<keras.src.callbacks.History at 0x1bfab117b10>

In [15]:
plot_predictions1(model1, X_test, y_test)



AttributeError: module 'matplotlib' has no attribute 'plot'