In [1]:
import numpy as np
import os
import torch
import pandas as pd
from autoencoder import NNAutoencoder
from read_data import read_raw, read_and_perform, train_test_split, scalings
import matplotlib.pyplot as plt

In [2]:
import seaborn as sns

## Carga de datos

In [3]:
# Leemos los archivos raw
folder = os.path.join('..', 'Date')
dataframes1 = read_raw(folder)
folder = os.path.join('..', 'Date2')
dataframes2 = read_raw(folder)

In [4]:
# Convertimos todo en arrays de numpy con series del mismo largo
data_1 = read_and_perform(dataframes1, row_range=300, col_range=(3,12), split= True)
data_2 = read_and_perform(dataframes2, row_range=99, col_range=(2,5), split= False)
# concatenamos todas las series
data_total = np.vstack([data_1.T, data_2.T])

# separamos train y test
train, test = train_test_split(data_total)

[+] Se procesaron 297 series de longitud 99
[+] Se procesaron 32 series de longitud 99


In [5]:
# escalamos
scaler = scalings(train)
train = scaler.fit_transform(train)
print(f'[+] Train shape {train.shape}')
test = scaler.transform(test)
print(f'[+] Test shape {test.shape}')

[+] StandardScaler entrenado
[+] Train shape (310, 99)
[+] Test shape (19, 99)


## Entrenar modelo para un solo espacio latente

In [66]:
epochs = 1000
lr = 1e-3
lat = 2
drop = np.linspace(0.2,0.6,5)

# Crear el directorio para guardar las imágenes si no existe
path_img = os.path.join("..","img","Drop-Optimo",f"Loos-lat{lat}")
os.makedirs(path_img, exist_ok=True)

for dr in drop:
    # Entrenamiento
    hist_train = []
    hist_test = []
    autoencoder = NNAutoencoder(99, lat, dr)
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr = lr)
    criterio = torch.nn.MSELoss()

    for e in range(epochs):
        autoencoder.train()
        x = torch.FloatTensor(train)
        y_pred = autoencoder(x)
        loss = criterio(y_pred, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if e%100 == 0:
            print(e, "loss =",loss.item())
        hist_train.append(loss.item())
        with torch.no_grad():
            autoencoder.eval()
            x = torch.FloatTensor(test)
            y_pred = autoencoder(x)
            loss = criterio(y_pred, x)
            hist_test.append(loss.item())

    #guardo las img
    plt.semilogy(hist_train, label = 'train loss')
    plt.semilogy(hist_test, label = 'test loss')
    plt.title(f"Loss train -eval, drop = {dr}")
    plt.legend()
    plt.savefig(os.path.join(path_img,f"lat{lat}-drop{dr}.png"), bbox_inches='tight')
    plt.close()


0 loss = 1.0036629438400269
100 loss = 0.12055174261331558
200 loss = 0.0933930054306984
300 loss = 0.05316300690174103
400 loss = 0.05084472522139549
500 loss = 0.03961068019270897
600 loss = 0.03843935579061508
700 loss = 0.035866428166627884
800 loss = 0.035514894872903824
900 loss = 0.0345127247273922
0 loss = 1.0055004358291626
100 loss = 0.13980978727340698
200 loss = 0.109782874584198
300 loss = 0.1003403514623642
400 loss = 0.07922305911779404
500 loss = 0.05936254560947418
600 loss = 0.048795104026794434
700 loss = 0.047507770359516144
800 loss = 0.036474116146564484
900 loss = 0.04032644256949425
0 loss = 1.0061428546905518
100 loss = 0.16261130571365356
200 loss = 0.1271541267633438
300 loss = 0.12171897292137146
400 loss = 0.10697949677705765
500 loss = 0.08774914592504501
600 loss = 0.0790974423289299
700 loss = 0.0679035633802414
800 loss = 0.058117374777793884
900 loss = 0.05725330859422684
0 loss = 1.003230094909668
100 loss = 0.16563333570957184
200 loss = 0.1336490511

## Early stopping

Es  una técnica que permite detener el entrenamiento cuando el valor de pérdida en el conjunto de validación (test) comienza a aumentar. Esto ayuda a prevenir el sobreajuste y permite guardar el mejor modelo basado en el mínimo valor de pérdida en el conjunto de validación

In [11]:
# Crear el directorio para guardar las imágenes si no existe
path_model = os.path.join("..","Save_Models")
os.makedirs(path_model, exist_ok=True)

In [67]:
epochs = 1000
lr = 1e-3
dr = 0.3
lat = 2

# Entrenamiento
hist_train = []
hist_test = []
best_test_loss = 100
best_epoch = 0
best_model = None
espera = 100
b = 0 #bandera

autoencoder = NNAutoencoder(99, lat, dr)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr = lr)
criterio = torch.nn.MSELoss()

for e in range(epochs):
    autoencoder.train()
    x = torch.FloatTensor(train)
    y_pred = autoencoder(x)
    loss = criterio(y_pred, x)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    hist_train.append(loss.item())
    
    with torch.no_grad():
        autoencoder.eval()
        x = torch.FloatTensor(test)
        y_pred = autoencoder(x)
        test_loss = criterio(y_pred, x)
        hist_test.append(test_loss.item())

    if e%100 == 0:
            print(f'Epoch {e}, train Loss: {loss.item():.4f}, test Loss: {test_loss.item():.4f}')

    if test_loss < loss:
        #guardo el mejor modelo
        if test_loss < best_test_loss: #encuantra el primer minimo??
            best_test_loss = test_loss
            best_epoch = e
            best_model = autoencoder.state_dict().copy() #copia del mejor modelo

        if (e - best_epoch >= espera) and (b < 3):
            m_epoch = best_epoch
            m_test_loss = best_test_loss
            torch.save(best_model, os.path.join(path_model,f"model-lat{lat}.pth"))
            b += 1
            plt.plot(m_epoch,m_test_loss,'x', color = "red")

print(f'Best epoch was {m_epoch} with val loss {m_test_loss:.4f}')
#guardo las img
plt.semilogy(hist_train, label = 'train loss')
plt.semilogy(hist_test, label = 'test loss')
plt.plot(m_epoch,m_test_loss,'x', color = "red", label = "best model")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.title(f"dim_lat: {lat}, drop = {dr}")
plt.legend()
plt.savefig(os.path.join(path_img,f"BestModel-lat{lat}-drop{dr}.png"), bbox_inches='tight')
plt.close()

Epoch 0, train Loss: 1.0047, test Loss: 0.5473
Epoch 100, train Loss: 0.1357, test Loss: 0.0970
Epoch 200, train Loss: 0.1194, test Loss: 0.0933
Epoch 300, train Loss: 0.1108, test Loss: 0.0859
Epoch 400, train Loss: 0.1002, test Loss: 0.0640
Epoch 500, train Loss: 0.0898, test Loss: 0.0457
Epoch 600, train Loss: 0.0772, test Loss: 0.0510
Epoch 700, train Loss: 0.0621, test Loss: 0.0448
Epoch 800, train Loss: 0.0549, test Loss: 0.0333
Epoch 900, train Loss: 0.0468, test Loss: 0.0316
Best epoch was 487 with val loss 0.0414
