In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
#from torch.utils.tensorboard import SummaryWriter # TensorBoard support
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import sklearn as skl
import pandas as pd
import time
#from torchviz import make_dot
import torch.optim as optim
from collections import defaultdict
import pickle
import dill
import json
import datetime
from IPython.display import clear_output
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)     # On by default, leave it here for clarity
try:
    import google.colab
    from google.colab import files  
    COLAB = True
except:
    COLAB = False

Los mejores resultados para un ANN clasificador de una capa oculta eran con 512 neuronas de capa oculta y 36 epocas de entrenamiento, mientras que para la CNN son con x epocas de entrenamiento, dropout 0.3, funcion de activacion LeakyReLU y optimizador Adam. Primero entrenamos aca a la ANN en 36 epocas, despues entrenamos a la CNN con x epocas y luego les hacemos el test loop.

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Usando el dispositivo {}'.format(device))

Usando el dispositivo cpu


In [3]:
# La primera vez esto tarda un rato ya que tiene que bajar los datos de la red.
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}
train_dataset = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
test_dataset = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [25]:
train_dataset_orig = train_dataset
len(train_dataset_orig)

60000

In [26]:
class ANN(nn.Module):
    def __init__(self,n=512):
        super(ANN,self).__init__()
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(28*28,n)
        self.linear2 = nn.Linear(n,10)
    def forward(self,x):
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

In [27]:
# Build the neural network, expand on top of nn.Module
class CNN(nn.Module):
  def __init__(self):
    super().__init__()

    # define layers
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    
    
    self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
    self.fc2 = nn.Linear(in_features=120, out_features=60)
    self.out = nn.Linear(in_features=60, out_features=10)
    
    self.dropout = nn.Dropout(p=0.3)

  # define forward function
  def forward(self, t):
    # conv 1
    t = self.conv1(t)
    t = F.leaky_relu(t)
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    # conv 2
    t = self.conv2(t)
    t = F.leaky_relu(t)
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    # fc1
    t = t.reshape(-1, 12*4*4)
    t = self.fc1(t)
    t = self.dropout(t)
    t = F.leaky_relu(t)

    # fc2
    t = self.fc2(t)
    t = self.dropout(t)
    t = F.leaky_relu(t)

    # output
    t = self.out(t)
    # don't need softmax here since we'll use cross-entropy as activation.

    return t

In [28]:
# Definimos la función de entrenamiento
def train_loop(dataloader,model,loss_fn,optimizer,verbose_each=32):  
    # Calculamos cosas utiles que necesitamos
    num_samples = len(dataloader.dataset) #numero de muestras de entrenamiento
    # Seteamos el modelo en modo entrenamiento. Esto sirve para activar, por ejemplo, dropout, etc. durante la fase de entrenamiento.
    model.train()
    # Pasamos el modelo a la GPU si está disponible.        
    model = model.to(device)    
    # Iteramos sobre lotes (batchs)
    for batch,(X,y) in enumerate(dataloader):
        # Pasamos los tensores a la GPU si está disponible.
        X = X.to(device)
        y = y.to(device)      
        # Calculamos la predicción del modelo y la correspondiente pérdida (error)
        pred = model(X)
        loss = loss_fn(pred,y)
        # Backpropagamos usando el optimizador proveido.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Imprimimos el progreso cada 100 batchs
        if batch % verbose_each*len(X) == 0:
            loss   = loss.item()
            sample = batch*len(X) # Número de batch * número de muestras en cada batch
            #print(f"batch={batch} loss={loss:>7f}  muestras-procesadas:[{sample:>5d}/{num_samples:>5d}]")            
# De manera similar, definimos la función de validación y testeo
def test_loop(dataloader,model,loss_fn):
    num_samples  = 0
    num_batches  = 0
    avrg_loss    = 0
    frac_correct = 0
    # Seteamos el modelo en modo evaluacion. Esto sirve para desactivar, por ejemplo, dropout, etc. cuando no estamos en una fase de entrenamiento.
    model.eval()
    # Pasamos el modelo la GPU si está disponible.    
    model = model.to(device)    
    # Para validar, desactivamos el cálculo de gradientes.
    with torch.no_grad():
        # Iteramos sobre lotes (batches)
        for X,y in dataloader:
            # Pasamos los tensores a la GPU si está disponible.
            X = X.to(device)
            y = y.to(device)           
            # Calculamos las predicciones del modelo...
            pred = model(X)
            # y las correspondientes pérdidas (errores), los cuales vamos acumulando en un valor total.
            num_batches += 1
            avrg_loss += loss_fn(pred,y).item()
            # También calculamos el número de predicciones correctas, y lo acumulamos en un total.
            num_samples += y.size(0)            
            frac_correct += (pred.argmax(1)==y).type(torch.float).sum().item()
    # Calculamos la pérdida total y la fracción de clasificaciones correctas, y las imprimimos.
    avrg_loss    /= num_batches
    frac_correct /= num_samples
    #print(f"Test Error: \n Accuracy: {frac_correct:>0.5f}, Avg. loss: {avrg_loss:>8f} \n")
    return avrg_loss,frac_correct

In [32]:
# Definimos hiperparámetros de entrenamiento
init_datetime = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
learning_rate = 1e-3
batch_size = 1000
num_epochs = 40
num_k = 1 #72
# Recordar que 28*28=784
#n=512
# Creamos una funcion de perdida
loss_fn = nn.CrossEntropyLoss()
# Creamos un DataFrame de pandas para ir almacenando los valores calculados.
dfANN = pd.DataFrame()
# Simulamos por tramos porque google colab se desconecta antes de que concluya para todos los valores de n en la lista.
min_valid_loss = 10000000.0
max_valid_accu = 0.0  
for k in range(num_k):
    # Creamos el modelo y el optimzador
    model = ANN()
    # Dividimos el dataset de entrenamiento, el cual tiene 60000 muestras, en 60 partes de 1000 muestras.
    train_dataset,valid_dataset = random_split(train_dataset_orig,[50000,10000])
    # Creamos los dataloaders ...
    train_dataloader = DataLoader(train_dataset,batch_size=batch_size)
    valid_dataloader = DataLoader(valid_dataset,batch_size=batch_size)         
    #optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
    optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate,eps=1e-08,weight_decay=0,amsgrad=False)
    # Entrenamos el modelo y calcualmos curvas.
    min_valid_loss = float("inf")
    for epoch in range(num_epochs):
        train_loop(train_dataloader,model,loss_fn,optimizer)
        train_loss,train_accu = test_loop(train_dataloader,model,loss_fn)
        valid_loss,valid_accu = test_loop(valid_dataloader,model,loss_fn)
        print(f"k={k} epoch={epoch} train_loss={train_loss} train_accu={train_accu} valid_loss={valid_loss} valid_accu={valid_accu}")
        dfANN = dfANN.append({"k":k,
                        "epoch":epoch,
                        "train_loss":train_loss,
                        "train_accu":train_accu,
                        "valid_loss":valid_loss,
                        "valid_accu":valid_accu}
                        ,ignore_index=True)
        if min_valid_loss > valid_loss: # or max_valid_accu < valid_accu:
            if min_valid_loss > valid_loss:
                min_valid_loss = valid_loss
            if max_valid_accu < valid_accu:
                max_valid_accu = valid_accu
            # Guardamos los parámetros del modelo.
            model_ANN = "best-model-"+init_datetime+".ptm"
            print("   Saving model_ANN =",model_ANN,end="")
            print(" ... DONE!")
            torch.save(model.state_dict(),model_ANN)
json_ANN = "simulation-results-"+init_datetime+".json"
df.to_json(json_ANN)
if COLAB:
    files.download(model_ANN)
    files.download(json_ANN)

k=0 epoch=0 train_loss=0.5448048406839371 train_accu=0.81442 valid_loss=0.5438126623630524 valid_accu=0.8134
   Saving model_ANN = best-model-2022-02-22-12-41-15.ptm ... DONE!
k=0 epoch=1 train_loss=0.46033042907714844 train_accu=0.84366 valid_loss=0.46605122089385986 valid_accu=0.8393
   Saving model_ANN = best-model-2022-02-22-12-41-15.ptm ... DONE!
k=0 epoch=2 train_loss=0.42169182300567626 train_accu=0.856 valid_loss=0.4303102374076843 valid_accu=0.85
   Saving model_ANN = best-model-2022-02-22-12-41-15.ptm ... DONE!
k=0 epoch=3 train_loss=0.3941757494211197 train_accu=0.86516 valid_loss=0.40575324296951293 valid_accu=0.8585
   Saving model_ANN = best-model-2022-02-22-12-41-15.ptm ... DONE!
k=0 epoch=4 train_loss=0.37308495104312894 train_accu=0.87116 valid_loss=0.3881243646144867 valid_accu=0.8653
   Saving model_ANN = best-model-2022-02-22-12-41-15.ptm ... DONE!
k=0 epoch=5 train_loss=0.355564803481102 train_accu=0.8768 valid_loss=0.3742578595876694 valid_accu=0.8708
   Saving mo

In [33]:
model_ANN

'best-model-2022-02-22-12-41-15.ptm'

In [34]:
# Definimos hiperparámetros de entrenamiento
init_datetime = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
learning_rate = 1e-3
batch_size = 1000
num_epochs = 50
num_k = 1 #72
# Recordar que 28*28=784
#n=512
# Creamos una funcion de perdida
loss_fn = nn.CrossEntropyLoss()
# Creamos un DataFrame de pandas para ir almacenando los valores calculados.
dfCNN = pd.DataFrame()
# Simulamos por tramos porque google colab se desconecta antes de que concluya para todos los valores de n en la lista.
min_valid_loss = 10000000.0
max_valid_accu = 0.0  
for k in range(num_k):
    # Creamos el modelo y el optimzador
    model = CNN()
    # Dividimos el dataset de entrenamiento, el cual tiene 60000 muestras, en 60 partes de 1000 muestras.
    train_dataset,valid_dataset = random_split(train_dataset_orig,[50000,10000])
    # Creamos los dataloaders ...
    train_dataloader = DataLoader(train_dataset,batch_size=batch_size)
    valid_dataloader = DataLoader(valid_dataset,batch_size=batch_size)         
    #optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
    optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate,eps=1e-08,weight_decay=0,amsgrad=False)
    # Entrenamos el modelo y calcualmos curvas.
    min_valid_loss = float("inf")
    for epoch in range(num_epochs):
        train_loop(train_dataloader,model,loss_fn,optimizer)
        train_loss,train_accu = test_loop(train_dataloader,model,loss_fn)
        valid_loss,valid_accu = test_loop(valid_dataloader,model,loss_fn)
        print(f"k={k} epoch={epoch} train_loss={train_loss} train_accu={train_accu} valid_loss={valid_loss} valid_accu={valid_accu}")
        dfCNN = dfCNN.append({"k":k,
                        "epoch":epoch,
                        "train_loss":train_loss,
                        "train_accu":train_accu,
                        "valid_loss":valid_loss,
                        "valid_accu":valid_accu}
                        ,ignore_index=True)
        if min_valid_loss > valid_loss: # or max_valid_accu < valid_accu:
            if min_valid_loss > valid_loss:
                min_valid_loss = valid_loss
            if max_valid_accu < valid_accu:
                max_valid_accu = valid_accu
            # Guardamos los parámetros del modelo.
            model_CNN = "best-model-"+init_datetime+".ptm"
            print("   Saving model_CNN =",model_CNN,end="")
            print(" ... DONE!")
            torch.save(model.state_dict(),model_CNN)
json_CNN = "simulation-results-"+init_datetime+".json"
df.to_json(json_CNN)
if COLAB:
    files.download(model_CNN)
    files.download(json_CNN)

k=0 epoch=0 train_loss=0.9371773707866669 train_accu=0.67734 valid_loss=0.9513522267341614 valid_accu=0.6662
   Saving model_CNN = best-model-2022-02-22-12-59-04.ptm ... DONE!
k=0 epoch=1 train_loss=0.7006771576404571 train_accu=0.73828 valid_loss=0.7174831211566925 valid_accu=0.732
   Saving model_CNN = best-model-2022-02-22-12-59-04.ptm ... DONE!
k=0 epoch=2 train_loss=0.6214777946472168 train_accu=0.76024 valid_loss=0.6356602013111115 valid_accu=0.7541
   Saving model_CNN = best-model-2022-02-22-12-59-04.ptm ... DONE!
k=0 epoch=3 train_loss=0.5877949130535126 train_accu=0.77422 valid_loss=0.6012763857841492 valid_accu=0.7681
   Saving model_CNN = best-model-2022-02-22-12-59-04.ptm ... DONE!
k=0 epoch=4 train_loss=0.5556639468669892 train_accu=0.78352 valid_loss=0.5680227816104889 valid_accu=0.7787
   Saving model_CNN = best-model-2022-02-22-12-59-04.ptm ... DONE!
k=0 epoch=5 train_loss=0.5322651493549347 train_accu=0.79144 valid_loss=0.5453404605388641 valid_accu=0.7879
   Saving mo

In [35]:
model_CNN

'best-model-2022-02-22-12-59-04.ptm'

In [36]:
model_ANN = model_ANN.split()[0]
model_CNN = model_CNN.split()[0]

In [38]:
modelA = ANN()
modelA.load_state_dict(torch.load(model_ANN,map_location="cpu"))
modelA.eval()
modelA = modelA.to(device)

In [39]:
modelC = CNN()
modelC.load_state_dict(torch.load(model_CNN,map_location="cpu"))
modelC.eval()
modelC = modelC.to(device)

In [40]:
batch_size = 1000
loss_fn = nn.CrossEntropyLoss()
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size)
test_loss,test_accu = test_loop(test_loader,modelA,loss_fn)
print("test_loss ANN = ",test_loss)
print("test_accu ANN = ",test_accu)

test_loss ANN =  0.3300893396139145
test_accu ANN =  0.8852


In [42]:
batch_size = 1000
loss_fn = nn.CrossEntropyLoss()
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size)
test_loss,test_accu = test_loop(test_loader,modelC,loss_fn)
print("test_loss CNN = ",test_loss)
print("test_accu CNN = ",test_accu)

test_loss CNN =  0.31221417188644407
test_accu CNN =  0.8879
