# Acondicionamiento de datos

En esta sección se escaliza y se acondicionan los datos para ser ingresados al modelo LSTM. Los datos que este modelo acepta a la entrada es de tres dimensiones (X,Y,Z). Donde X represneta la cantidad de lotes de datos ingresados al modelo. Y, el tamaño del lote, que para este caso es 18, que representa 18 horas. Z, la cantidad de caracteristicas del dataset, para este caso son 9. Por lo anterior los datos que aceptara el modelo son de la forma (X,18,9)

In [None]:
#Libreria necesarias para el tratamiento de los datos

import pickle
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
#Carga dataset de datos para entrenamiento

df2 = pd.read_csv("Dataset_Julio28_Septiembre17_5h.csv")
df2['Date'] = pd.to_datetime(df2['Date'],format = '%Y-%m-%d %H:%M')
df2 = df2.set_index('Date')
df2.sort_index(inplace=True)
df = df2.reset_index()
df = df.drop(columns=['Date'])

#########################################################################
'''
Como el dataset tiene secciones donde hacen falta datos de varios días
y los datos del sensor de humedad fallaron, se realizó un seccionamiento
para eliminar esas inconsistencias y generar bloques de datos del dataset
que fueran continuos y sin errores
'''

dt1 = datetime.strptime("2023-07-28 21:32:00", "%Y-%m-%d %H:%M:%S")
dt2 = datetime.strptime("2023-07-31 08:53:00", "%Y-%m-%d %H:%M:%S")

dt3 = datetime.strptime("2023-07-31 20:16:00", "%Y-%m-%d %H:%M:%S")
dt4 = datetime.strptime("2023-08-04 13:17:00", "%Y-%m-%d %H:%M:%S")

dt5 = datetime.strptime("2023-08-04 15:42:00", "%Y-%m-%d %H:%M:%S")
dt6 = datetime.strptime("2023-08-06 22:13:00", "%Y-%m-%d %H:%M:%S")

dt7 = datetime.strptime("2023-08-07 07:52:00", "%Y-%m-%d %H:%M:%S")
dt8 = datetime.strptime("2023-08-20 17:46:00", "%Y-%m-%d %H:%M:%S")

dt9 = datetime.strptime("2023-08-21 09:49:00", "%Y-%m-%d %H:%M:%S")
dt10 = datetime.strptime("2023-08-31 05:28:00", "%Y-%m-%d %H:%M:%S")

dt11 = datetime.strptime("2023-09-02 20:40:00", "%Y-%m-%d %H:%M:%S")
dt12 = datetime.strptime("2023-09-07 22:48:00", "%Y-%m-%d %H:%M:%S")

dt13 = datetime.strptime("2023-09-07 22:59:00", "%Y-%m-%d %H:%M:%S")
dt14 = datetime.strptime("2023-09-12 17:01:00", "%Y-%m-%d %H:%M:%S")

dt15 = datetime.strptime("2023-09-12 17:11:00", "%Y-%m-%d %H:%M:%S")
dt16 = datetime.strptime("2023-09-17 11:34:00", "%Y-%m-%d %H:%M:%S")

data_rng1 = df2[(df2.index >= dt1 ) & (df2.index <= dt2)]
data_rng1 = data_rng1.iloc[::6]
data_rng2 = df2[(df2.index >= dt3 ) & (df2.index <= dt4)]
data_rng2 = data_rng2.iloc[::6]
data_rng3 = df2[(df2.index >= dt5 ) & (df2.index <= dt6)]
data_rng3 = data_rng3.iloc[::6]
data_rng4 = df2[(df2.index >= dt7 ) & (df2.index <= dt8)]
data_rng4 = data_rng4.iloc[::6]
data_rng5 = df2[(df2.index >= dt9 ) & (df2.index <= dt10)]
data_rng5 = data_rng5.iloc[::6]
data_rng6 = df2[(df2.index >= dt11 ) & (df2.index <= dt12)]
data_rng6 = data_rng6.iloc[::6]
data_rng7 = df2[(df2.index >= dt13 ) & (df2.index <= dt14)]
data_rng7 = data_rng7.iloc[::6]
data_rng8 = df2[(df2.index >= dt15 ) & (df2.index <= dt16)]
data_rng8 = data_rng8.iloc[::6]

#########################################################################


In [None]:
#Carga dataset de datos para prueba

df21 = pd.read_csv('/content/Dataset_Septiembre21_Octubre3_10h.csv')
df21['Date'] = pd.to_datetime(df21['Date'],format = '%Y-%m-%d %H:%M')
df21 = df21.set_index('Date')
df21.sort_index(inplace=True)
df1 = df21.reset_index()
df1 = df1.drop(columns=['Date'])

#########################################################################
'''
Como la frecuencia de los datos es de cada 10 minutos, se cambío a
una frecuencia de 1 hora para reducción del tamaño final del modelo
al igual que los datos de entrenamiento
'''
df21 = df21.iloc[::6]
df1 = df1.iloc[::6]
#########################################################################

In [None]:
# Ensamble para generar dataset de 10 y 15 horas
'''
Si se ha cargado el archivo de 5 horas "Dataset_Julio28_Septiembre17_5h.csv"
y se quiere hacer el entrenamiento para predecir 10 o 15 horas se debe ejecutar
esta celda, de lo contrario omita ésta
'''

CORRIMIENTO = 5  #Para 10 horas = 5   para 15 horas = 10

data_rng1_10h = data_rng1.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng1_10h = data_rng1_10h.iloc[CORRIMIENTO:]
data_rng1_10h = data_rng1_10h.reset_index()
data_rng1_10h = data_rng1_10h.drop(columns=['Date'])
data_rng1_sensors = data_rng1.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng1_sensors = data_rng1_sensors.iloc[:-CORRIMIENTO]
data_rng1_sensors = data_rng1_sensors.reset_index()
data_rng1_sensors = data_rng1_sensors.drop(columns=['Date'])
data_rng1= pd.concat([data_rng1_sensors, data_rng1_10h], axis=1)


data_rng2_10h = data_rng2.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng2_10h = data_rng2_10h.iloc[CORRIMIENTO:]
data_rng2_10h = data_rng2_10h.reset_index()
data_rng2_10h = data_rng2_10h.drop(columns=['Date'])
data_rng2_sensors = data_rng2.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng2_sensors = data_rng2_sensors.iloc[:-CORRIMIENTO]
data_rng2_sensors = data_rng2_sensors.reset_index()
data_rng2_sensors = data_rng2_sensors.drop(columns=['Date'])
data_rng2= pd.concat([data_rng2_sensors, data_rng2_10h], axis=1)


data_rng3_10h = data_rng3.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                          'S_soilMoisture','S_anRain'])
data_rng3_10h = data_rng3_10h.iloc[CORRIMIENTO:]
data_rng3_10h = data_rng3_10h.reset_index()
data_rng3_10h = data_rng3_10h.drop(columns=['Date'])
data_rng3_sensors = data_rng3.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng3_sensors = data_rng3_sensors.iloc[:-CORRIMIENTO]
data_rng3_sensors = data_rng3_sensors.reset_index()
data_rng3_sensors = data_rng3_sensors.drop(columns=['Date'])

data_rng3= pd.concat([data_rng3_sensors, data_rng3_10h], axis=1)

data_rng4_10h = data_rng4.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng4_10h = data_rng4_10h.iloc[CORRIMIENTO:]
data_rng4_10h = data_rng4_10h.reset_index()
data_rng4_10h = data_rng4_10h.drop(columns=['Date'])
data_rng4_sensors = data_rng4.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng4_sensors = data_rng4_sensors.iloc[:-CORRIMIENTO]
data_rng4_sensors = data_rng4_sensors.reset_index()
data_rng4_sensors = data_rng4_sensors.drop(columns=['Date'])

data_rng4= pd.concat([data_rng4_sensors, data_rng4_10h], axis=1)

data_rng5_10h = data_rng5.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng5_10h = data_rng5_10h.iloc[CORRIMIENTO:]
data_rng5_10h = data_rng5_10h.reset_index()
data_rng5_10h = data_rng5_10h.drop(columns=['Date'])
data_rng5_sensors = data_rng5.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng5_sensors = data_rng5_sensors.iloc[:-CORRIMIENTO]
data_rng5_sensors = data_rng5_sensors.reset_index()
data_rng5_sensors = data_rng5_sensors.drop(columns=['Date'])
data_rng5= pd.concat([data_rng5_sensors, data_rng5_10h], axis=1)

data_rng6_10h = data_rng6.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng6_10h = data_rng6_10h.iloc[CORRIMIENTO:]
data_rng6_10h = data_rng6_10h.reset_index()
data_rng6_10h = data_rng6_10h.drop(columns=['Date'])
data_rng6_sensors = data_rng6.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng6_sensors = data_rng6_sensors.iloc[:-CORRIMIENTO]
data_rng6_sensors = data_rng6_sensors.reset_index()
data_rng6_sensors = data_rng6_sensors.drop(columns=['Date'])
data_rng6= pd.concat([data_rng6_sensors, data_rng6_10h], axis=1)

data_rng7_10h = data_rng7.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng7_10h = data_rng7_10h.iloc[CORRIMIENTO:]
data_rng7_10h = data_rng7_10h.reset_index()
data_rng7_10h = data_rng7_10h.drop(columns=['Date'])
data_rng7_sensors = data_rng7.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng7_sensors = data_rng7_sensors.iloc[:-CORRIMIENTO]
data_rng7_sensors = data_rng7_sensors.reset_index()
data_rng7_sensors = data_rng7_sensors.drop(columns=['Date'])
data_rng7= pd.concat([data_rng7_sensors, data_rng7_10h], axis=1)

data_rng8_10h = data_rng8.drop(columns=['S_temperature','S_humidity', 'S_uv',
                                        'S_soilMoisture','S_anRain'])
data_rng8_10h = data_rng8_10h.iloc[CORRIMIENTO:]
data_rng8_10h = data_rng8_10h.reset_index()
data_rng8_10h = data_rng8_10h.drop(columns=['Date'])
data_rng8_sensors = data_rng8.drop(columns=['Temperature_5h','Humidity_5h',
                                            'Clouds_5h','PoP_5h'])
data_rng8_sensors = data_rng8_sensors.iloc[:-CORRIMIENTO]
data_rng8_sensors = data_rng8_sensors.reset_index()
data_rng8_sensors = data_rng8_sensors.drop(columns=['Date'])
data_rng8= pd.concat([data_rng8_sensors, data_rng8_10h], axis=1)

In [None]:
#Preparación para dataset hasta de 5 horas

'''
Si se ejecuto la celda anterior "Ensamble para generar dataset de 10 y 15 horas"
no ejecute esta.
Esta celda organiza los datos para los datasets para predecir hasta 5 horas
'''

data_rng1 = data_rng1.reset_index()
data_rng1 = data_rng1.drop(columns=['Date'])
data_rng2 = data_rng2.reset_index()
data_rng2 = data_rng2.drop(columns=['Date'])
data_rng3 = data_rng3.reset_index()
data_rng3 = data_rng3.drop(columns=['Date'])
data_rng4 = data_rng4.reset_index()
data_rng4 = data_rng4.drop(columns=['Date'])
data_rng5 = data_rng5.reset_index()
data_rng5 = data_rng5.drop(columns=['Date'])
data_rng6 = data_rng6.reset_index()
data_rng6 = data_rng6.drop(columns=['Date'])
data_rng7 = data_rng7.reset_index()
data_rng7 = data_rng7.drop(columns=['Date'])
data_rng8 = data_rng8.reset_index()
data_rng8 = data_rng8.drop(columns=['Date'])

In [None]:
 #Funciones para crear dataset supervisado y para escalizar los datos

'''
Esta función genera los dataset para entrenamiento, validación y prueba a
partir del dataset de entrenamiento
'''
def crear_dataset_supervisado(array, input_length, output_length,corrimiento):
    input_length2= corrimiento + input_length
    # Arreglo para datos de entrada y salida generados
    X, Y = [], []
    shape = array.shape
    fils, cols = array.shape
    for i in range(fils-input_length-output_length):
        if(i+input_length2+output_length)<=fils:
            X.append(array[i:i+input_length,0:cols])
            # Salida (el índice 3 corresponde a la columna con la variable a predecir)
            Y.append(array[i+input_length2:i+input_length2+output_length,3].reshape(output_length,1))

    # Convertir listas a arreglos de NumPy
    X = np.array(X)
    Y = np.array(Y)

    return X, Y

'''
Esta función escala los datos para cada característica del dataset
guarda la lista de escalador en el archivo "lista_scalers.pkl" y retorna
los valores escalados en un diccionario y el escalador de la variable de interes
'''

def escalar_dataset(data_input, col_ref):
    col_ref = df.columns.get_loc(col_ref)
    NFEATS = data_input['x_tr'].shape[2]
    # Generar listado de escaladores para las 9 característcas.
    scalers = [MinMaxScaler(feature_range=(-1,1)) for i in range(NFEATS)]
    # Arreglos para los datasets escalados
    x_tr_s = np.zeros(data_input['x_tr'].shape)
    x_vl_s = np.zeros(data_input['x_vl'].shape)
    x_ts_s = np.zeros(data_input['x_ts'].shape)
    x_ts_sN = np.zeros(data_input['x_tsN'].shape)
    y_tr_s = np.zeros(data_input['y_tr'].shape)
    y_vl_s = np.zeros(data_input['y_vl'].shape)
    y_ts_s = np.zeros(data_input['y_ts'].shape)
    y_ts_sN = np.zeros(data_input['y_tsN'].shape)

    for i in range(NFEATS):
        x_tr_s[:, :, i] = scalers[i].fit_transform(x_tr[:, :, i].reshape(-1, 1)).reshape(x_tr.shape[0], x_tr.shape[1])
        x_ts_sN[:, :, i] = scalers[i].fit_transform(x_tsN[:, :, i].reshape(-1, 1)).reshape(x_tsN.shape[0], x_tsN.shape[1])
        x_vl_s[:, :, i] = scalers[i].fit_transform(x_vl[:, :, i].reshape(-1, 1)).reshape(x_vl.shape[0], x_vl.shape[1])
        x_ts_s[:, :, i] = scalers[i].fit_transform(x_ts[:, :, i].reshape(-1, 1)).reshape(x_ts.shape[0], x_ts.shape[1])

    y_tr_s[:, :, 0] = scalers[col_ref].fit_transform(y_tr[:, :, 0].reshape(-1, 1)).reshape(y_tr.shape[0], y_tr.shape[1])
    y_ts_sN[:, :, 0] = scalers[col_ref].fit_transform(y_tsN[:, :, 0].reshape(-1, 1)).reshape(y_tsN.shape[0], y_tsN.shape[1])
    y_vl_s[:, :, 0] = scalers[col_ref].fit_transform(y_vl[:, :, 0].reshape(-1, 1)).reshape(x_vl.shape[0], y_vl.shape[1])
    y_ts_s[:, :, 0] = scalers[col_ref].fit_transform(y_ts[:, :, 0].reshape(-1, 1)).reshape(y_ts.shape[0], y_ts.shape[1])

    # Diccionario de salida
    data_scaled = {
        'x_tr_s': x_tr_s, 'y_tr_s': y_tr_s,
        'x_vl_s': x_vl_s, 'y_vl_s': y_vl_s,
        'x_ts_s': x_ts_s, 'y_ts_s': y_ts_s,
        'x_ts_sN': x_ts_sN, 'y_ts_sN': y_ts_sN,
    }
    #Guarda la lista de escaladores en una lista
    with open('lista_scalers.pkl', 'wb') as file:
        pickle.dump(scalers, file)

    return data_scaled, scalers[col_ref]

In [None]:
#Genera los datos de entrenamiento, validación y prueba

# Crear los datasets de entrenamiento, prueba y validación y verificar sus tamaños
INPUT_LENGTH = 18    # Hiperparámetro ajustado para 18 horas de entrada al modelo LSTM
OUTPUT_LENGTH = 1    # Salida del modelo LSTM
corrimiento = 9      # saltos de 1h  1h=0 2h=1 3h=2 4h=3 5h=4 10h=9 15h=14

##########Para datos de prueba con el datasaet para preubas###############
x_tsN, y_tsN = crear_dataset_supervisado(df1.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
############################################################

x_dt1, y_dt1 = crear_dataset_supervisado(data_rng1.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt2, y_dt2 = crear_dataset_supervisado(data_rng2.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt3, y_dt3 = crear_dataset_supervisado(data_rng3.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt4, y_dt4 = crear_dataset_supervisado(data_rng4.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt5, y_dt5 = crear_dataset_supervisado(data_rng5.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt6, y_dt6 = crear_dataset_supervisado(data_rng6.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt7, y_dt7 = crear_dataset_supervisado(data_rng7.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)
x_dt8, y_dt8 = crear_dataset_supervisado(data_rng8.values, INPUT_LENGTH, OUTPUT_LENGTH,corrimiento)

x_data = np.concatenate((x_dt1, x_dt2, x_dt3, x_dt4, x_dt5, x_dt6, x_dt7, x_dt8), axis=0)
y_data = np.concatenate((y_dt1, y_dt2, y_dt3, y_dt4, y_dt5, y_dt6, y_dt7, y_dt8), axis=0)

x_tr, x_validation, y_tr, y_validation = train_test_split(
    x_data, y_data, test_size=0.2, random_state=42)
x_vl, x_ts, y_vl, y_ts = train_test_split(
    x_validation, y_validation, test_size=0.5, random_state=42)

print('Tamaños entrada (BATCHES x INPUT_LENGTH x FEATURES) y de salida (BATCHES x OUTPUT_LENGTH x FEATURES)')
print(f'Set de entrenamiento - x_tr: {x_tr.shape}, y_tr: {y_tr.shape}')
print(f'Set de validación - x_vl: {x_vl.shape}, y_vl: {y_vl.shape}')
print(f'Set de prueba - x_ts: {x_ts.shape}, y_ts: {y_ts.shape}')

In [None]:
#Escalamiento de los datos

# Crear diccionario de entrada
data_in = {
    'x_tr': x_tr, 'y_tr': y_tr,
    'x_vl': x_vl, 'y_vl': y_vl,
    'x_ts': x_ts, 'y_ts': y_ts,
    'x_tsN':x_tsN,'y_tsN': y_tsN,
}

# Escala los valores espacificando la variable de interes
data_s, scaler = escalar_dataset(data_in, col_ref = 'S_soilMoisture' )

# Extraer subsets escalados
x_tr_s, y_tr_s = data_s['x_tr_s'], data_s['y_tr_s']
x_vl_s, y_vl_s = data_s['x_vl_s'], data_s['y_vl_s']
x_ts_s, y_ts_s = data_s['x_ts_s'], data_s['y_ts_s']
x_ts_sN, y_ts_sN = data_s['x_ts_sN'], data_s['y_ts_sN']

fig, ax = plt.subplots(figsize=(12,4))
for i in range(9):
    ax.violinplot(dataset=x_tr_s[:,:,i].flatten(), positions=[i])
    ax.violinplot(dataset=x_vl_s[:,:,i].flatten(), positions=[i])
    ax.violinplot(dataset=x_ts_s[:,:,i].flatten(), positions=[i])

ax.set_xticks(list(range(9)))
ax.set_xticklabels(df.keys(), rotation=45)
ax.autoscale();

#Guarda escalador de salida
with open('scaler_y.pkl', 'wb') as file:
        pickle.dump(scaler, file)

In [None]:
#Cambia el formato de 64 a 32 bits

x_tr = x_tr.astype('float32')
x_vl = x_vl.astype('float32')
x_ts = x_ts.astype('float32')
x_tsN= x_tsN.astype('float32')

y_tr = y_tr.astype('float32')
y_vl = y_vl.astype('float32')
y_ts = y_ts.astype('float32')
y_tsN= y_tsN.astype('float32')

x_tr_s = x_tr_s.astype('float32')
x_vl_s = x_vl_s.astype('float32')
x_ts_s = x_ts_s.astype('float32')
x_ts_sN = x_ts_sN.astype('float32')

y_tr_s = y_tr_s.astype('float32')
y_vl_s = y_vl_s.astype('float32')
y_ts_s = y_ts_s.astype('float32')
y_ts_sN= y_ts_sN.astype('float32')

#Entrenamiento del modelo LSTM

Esta sección ejecuta el entrenamiento para el modelo LSTM

In [None]:
import tensorflow as tf
from keras.layers import LSTM
from keras.layers import Dense
from keras import backend as K
from keras.models import Sequential
from keras.optimizers import RMSprop, Adam
from sklearn.metrics import mean_squared_error
from __future__ import absolute_import, division, print_function, unicode_literals

In [None]:
#Creacíon del modelo

# Ajuste de parámetros para reproducibilidad del entrenamiento
tf.random.set_seed(123)
tf.config.experimental.enable_op_determinism()
# El modelo
N_UNITS = 50 # Tamaño del estado oculto (h) y de la celdad de memoria (c) (128) 50
INPUT_SHAPE = (x_tr_s.shape[1], x_tr_s.shape[2]) # 24 (horas) x 13 (features)
modelo = Sequential()
modelo.add(LSTM(N_UNITS, input_shape=INPUT_SHAPE))
modelo.add(Dense(OUTPUT_LENGTH, activation='linear')) # activation = 'linear' pues se quiere pronosticar (regresión)

# Pérdida: se usará el RMSE (root mean squared error) para el entrenamiento
# pues permite tener errores en las mismas unidades de la humedad

def root_mean_squared_error(y_true, y_pred):
    rmse = tf.math.sqrt(tf.math.reduce_mean(tf.square(y_pred-y_true)))
    return rmse

# Compilación
optimizador = RMSprop(learning_rate=15e-5) # 5e-4
modelo.compile(
    optimizer = optimizador,
    loss = root_mean_squared_error,
)

In [None]:
#Entrenamiento del modelo LSTM

EPOCHS = 250
BATCH_SIZE = 256
historia = modelo.fit(
    x = x_tr_s,
    y = y_tr_s,
    batch_size = BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = (x_vl_s, y_vl_s),
    verbose=2
)

In [None]:
#Grafica curvas de entrenamiento y validación

plt.figure(figsize=(5,3))
plt.plot(historia.history['loss'],label='RMSE train')
plt.plot(historia.history['val_loss'],label='RMSE val')
plt.xlabel('Iteración')
plt.ylabel('RMSE')
plt.legend()
plt.title("15 horas")
plt.show()

# Cálculo de rmses para train, val y test
rmse_tr = modelo.evaluate(x=x_tr_s, y=y_tr_s, verbose=0)
rmse_vl = modelo.evaluate(x=x_vl_s, y=y_vl_s, verbose=0)
rmse_ts = modelo.evaluate(x=x_ts_s, y=y_ts_s, verbose=0)

#print('Comparativo desempeños:', i)
print(f'  RMSE train:\t  {rmse_tr:.3f}')
print(f'  RMSE val:\t {rmse_vl:.3f}')
print(f'  RMSE test:\t {rmse_ts:.3f}')

# Inferencias con el modelo LSTM

Esta sección realiza inferencias con el modelo LSTM previamente entrenado

In [None]:
#Función para inferencias

'''
Esta función retorna el valor estimado de la humedad del suelo
escalado a sus valores originales
'''
def predecir(x, model, scaler):
    y_pred_s = model.predict(x,verbose=0)

    # Llevar la predicción a la escala original
    y_pred = scaler.inverse_transform(y_pred_s)

    return y_pred.flatten()

In [None]:
#Estimación de la humedad del suelo

expected = predecir(x_ts_sN, modelo, scaler)
real = y_tsN.flatten()
fig1 = plt.figure("Predicciones de humedad del suelo (18 entradas(18h))")
fig1.suptitle("Predicciones de humedad del suelo (18 entradas(18h))")
fig1.subplots_adjust(hspace=0.5, wspace=0.5)

ax = fig1.add_subplot(1, 1, 1)
ax.plot(real)
ax.plot(expected)
ax.plot(y_tsN.flatten())
ax.set_xlabel("Muestras")
ax.set_ylabel("% Humedad del suelo")
ax.set_title("10 Horas")
ax.grid(color='gray', linestyle='dashed', linewidth=1, alpha=0.4)
ax.axhline(0, color='black', linewidth=0.5)
fig1.legend(["real","expected"], loc ="lower right")

# Conversion a TinyML

Esta sección convierte el modelo de ML a tinyML para usarse en un microcontrolador

In [None]:
#Conversión del modelo a Tinyml

run_model = tf.function(lambda x: modelo(x))
# This is important, let's fix the input size.
BATCH_SIZE = 1
STEPS = 18
INPUT_SIZE = 9
concrete_func = run_model.get_concrete_function(
    tf.TensorSpec([BATCH_SIZE, STEPS, INPUT_SIZE], modelo.inputs[0].dtype))

# model directory.
MODEL_DIR = "keras_lstm"
modelo.save(MODEL_DIR, save_format="tf", signatures=concrete_func)

converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_DIR)
tflite_model = converter.convert()

In [None]:
# Guarda en archivo el modelo convertido
open("LSTM_model.tflite", "wb").write(tflite_model)

In [None]:
# Corre el modelo de TensorFlow Lite

'''
Ejecuta el modelo de TensorGlow Lite y hace 10 comparaciones conel modelo
de TensorFLow para verificar la conversión
'''

interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

TEST_CASES=10

for i in range(TEST_CASES):
  expected = modelo.predict(x_ts_sN[i:i+1])
  interpreter.set_tensor(input_details[0]["index"], x_ts_sN[i:i+1, :, :])
  interpreter.invoke()
  result = interpreter.get_tensor(output_details[0]["index"])
  np.testing.assert_almost_equal(expected, result, decimal=5)
  print("El resultado de TensorFlow concuerda con el de TensorFlow Lite")
  interpreter.reset_all_variables()

In [None]:
#Evalua el tamño del archivo del modelo convertido

import os
basic_model_size = os.path.getsize("LSTM_model.tflite")
print("El tamaño del modelo es de %d bytes" % basic_model_size)

In [None]:
#Instala la herramienta xxd
'''
Esta herramienta permite convertir el modelo a un arreglo de valores
hexadecimal
'''
!sudo apt-get update
!sudo apt-get install xxd

In [None]:
# Convierte y guarda el modelo como un archivo en C

!xxd -i LSTM_model.tflite > LSTM_model.cc
!cat LSTM_model.cc