In [2]:
%run funciones.py
import pandas as pd
from funciones import *

# ============================== parametros del dataset ==============================
div_split = 36
INPUT_LENGTH, OUTPUT_LENGTH = 36, 1
SCALER = 'Robust'
CONV = False
name_dataset = "travel_time_series_2col.csv"

# leer dataset
cols = ['id_linkref', 'horas', 'minute', 'travel_time']
origin_dataset = pd.read_csv('../data/travel_time_series_2col.csv', sep=',', usecols=cols)
dataset = origin_dataset[cols]

# delimitar links (0, 82)
range_links = (14, 49)
if range_links != (0, 82):
    dataset =  dataset[(dataset['id_linkref']>= range_links[0]) & (dataset['id_linkref']<=range_links[1])]
    print(f"Data set delimitado con Links desde {range_links[0]} - {range_links[1]} => {len(dataset['id_linkref'].unique())} links")

# preparar dataset
data, scaler = cargar_dataset(dataset, div_split, (INPUT_LENGTH, OUTPUT_LENGTH), name_scaler=SCALER, conv=False)

Data set delimitado con Links desde 14 - 49 => 36 links


In [3]:
# extraer dataset escalado
x_tr_s, y_tr_s = data['x_tr_s'], data['y_tr_s']
x_vl_s, y_vl_s = data['x_vl_s'], data['y_vl_s']
x_ts_s, y_ts_s = data['x_ts_s'], data['y_ts_s']

x_tr_s.shape, x_vl_s.shape ,x_ts_s.shape

((73655, 36, 4), (9143, 36, 4), (9215, 36, 4))

In [4]:
import json
import keras
import subprocess
import tensorflow as tf
from tensorflow.python.platform import build_info as build
from keras.optimizers import RMSprop, Adam
from keras.models import Sequential
from keras.layers import LSTM, CuDNNLSTM, Dense,BatchNormalization, Dropout, RepeatVector, TimeDistributed, ConvLSTM2D, Flatten, Reshape

print("Tensorflow".ljust(25) + f":{tf.__version__}")
print("Keras".ljust(25) + f":{keras.__version__}")
print("GPU Available".ljust(25) + f":{len(tf.config.experimental.list_physical_devices('GPU'))}")
print("Cuda Built".ljust(25) + f":{tf.test.is_built_with_cuda()}")
print("Cuda Version".ljust(25) + f":{build.build_info['cuda_version']}")
print("Cudnn Version".ljust(25) + f":{build.build_info['cudnn_version']}")
print("List Tensorflow GPUs".ljust(25) + f":{tf.config.list_physical_devices('GPU')}")
print("Name of GPU".ljust(25) + f":{subprocess.check_output('nvidia-smi --query-gpu=gpu_name --format=csv', shell=True).decode().splitlines()[1]}")
print("Numpy Version".ljust(25) + f":{np.__version__}")
print("Pandas Version".ljust(25) + f":{pd.__version__}")
#print("Matplotlib Version".ljust(25) + f":{matplotlib.__version__}")
#print("Seaborn Version".ljust(25) + f":{sns.__version__}")

Tensorflow               :2.10.0
Keras                    :2.10.0
GPU Available            :1
Cuda Built               :True
Cuda Version             :64_112
Cudnn Version            :64_8
List Tensorflow GPUs     :[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Name of GPU              :NVIDIA GeForce MX110
Numpy Version            :1.24.3
Pandas Version           :2.0.1


In [5]:
# Ajustar parámetros para reproducibilidad del entrenamiento
tf.random.set_seed(123)
tf.config.experimental.enable_op_determinism()

# =============================== PARAMETROS DEL MODELO ===============================
INPUT_SHAPE = (x_tr_s.shape[1], x_tr_s.shape[2]) # dimensiones del input
N_UNITS = 128 # unidades del LSTM
name_opt = 'RMSprop-lr5e-5'
ACT = 'linear' # funcion de activacion
name_function_loss = 'rmse'

# opimizador
opt = RMSprop(learning_rate=5e-5)

# funcion de perdida
def rmse(y_true, y_pred): 
    return tf.math.sqrt(tf.math.reduce_mean(tf.square(y_pred-y_true))) 

In [5]:
# ================================= MODELO 1 =================================
# n_modelo = 'LSTM1'
# modelo = Sequential()
# modelo.add(LSTM(N_UNITS, input_shape=INPUT_SHAPE, return_sequences = False))
# modelo.add(Dense(OUTPUT_LENGTH, activation=ACT))

# # Compilación
# modelo.compile(optimizer = opt, loss = rmse, metrics=['mse', 'mae'])

In [6]:
# ================================= MODELO 2 =================================
n_modelo = 'LSMT2'
modelo = Sequential()
modelo.add(BatchNormalization(name = 'batch_norm_0', input_shape = INPUT_SHAPE))
modelo.add(LSTM(name ='lstm_1', units = N_UNITS, return_sequences = True))
modelo.add(Dropout(0.15, name = 'dropout_1'))
modelo.add(BatchNormalization(name = 'batch_norm_1'))
modelo.add(LSTM(name ='lstm_2', units = N_UNITS, return_sequences = False))
modelo.add(Dropout(0.1, name = 'dropout_2'))
modelo.add(BatchNormalization(name = 'batch_norm_2'))
modelo.add(RepeatVector(1))
modelo.add(LSTM(name ='lstm_3', units = N_UNITS, return_sequences = True))
modelo.add(Dropout(0.15, name = 'dropout_3'))
modelo.add(BatchNormalization(name = 'batch_norm_3'))
modelo.add(LSTM(name ='lstm_4', units = N_UNITS, return_sequences = False))
modelo.add(Dense(units = OUTPUT_LENGTH, name = 'dense_1', activation = ACT))

# Compilación
modelo.compile(optimizer = opt, loss = rmse, metrics=['mse', 'mae'])

In [7]:
# parametros de entrenamiento
EPOCHS = 100
BATCH_SIZE = 256 

# Entrenamiento
historia = modelo.fit(
    x = x_tr_s,
    y = y_tr_s,
    batch_size = BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = (x_vl_s, y_vl_s)
)

# GUARDAR MODELO
name_files = f"{n_modelo}_e{EPOCHS}b{BATCH_SIZE}_{SCALER}_in{INPUT_SHAPE[0]}-{INPUT_SHAPE[1]}_dt{range_links[0]}-{range_links[1]}"
modelo.save(f'../models/{name_files}.h5')
print('Entrenamiento finalizado y modelo guardado')
print('\nEVALUANDO MODELO')
# Cálculo de rmses para train, val y test
rmse_dict = {'train':modelo.evaluate(x=x_tr_s, y=y_tr_s, verbose=1), 
             'val':  modelo.evaluate(x=x_vl_s, y=y_vl_s, verbose=1), 
             'test': modelo.evaluate(x=x_ts_s, y=y_ts_s, verbose=1)}

# recolectar parametros
param_dataset = {
    'range_links': range_links,
    'div_split': div_split,
    'INPUT_LENGTH': INPUT_LENGTH,
    'OUTPUT_LENGTH': OUTPUT_LENGTH,
    'scaler': SCALER,
    'isCONV': CONV,
    'name_dataset': name_dataset
}

param_model ={
    'INPUT_SHAPE': INPUT_SHAPE,
    'n_unist-LSMT': N_UNITS,
    'name_optimizer': name_opt,
    'name_function_activation': ACT,
    'name_function_loss': name_function_loss, 
    'n_modelo': n_modelo,
    'epoch': EPOCHS,
    'BATCH_SIZE': BATCH_SIZE,
}

# GUARDAR DATOS DE ENTRENAMIENTO
hist_df = pd.DataFrame(historia.history)  
with open(f'../info_models/{name_files}.json', mode='w') as f:
    temp = {**hist_df.to_dict('list'), **rmse_dict, **param_dataset, **param_model}
    json.dump(temp, f)
    print('Historial y evalucacion correctamente guardados')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [8]:
#name_files = f"{n_modelo}e{EPOCHS}b{BATCH_SIZE}_{SCALER}_in{INPUT_SHAPE[0]}-{INPUT_SHAPE[1]}"