# 2 - Convolutional Network & Fashion MNIST
Red convolucional mínima para clasificar Fashion MNIST

In [6]:
# Import de paquetes
%matplotlib inline
import matplotlib.pyplot as plt
%matplotlib inline
plt.ion()

import sys
import os

# Numpy
import numpy as np
from skimage import color, io

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F

# Torchvision
import torchvision.utils
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# Dataset
from torchvision.datasets import FashionMNIST

## Hiperparámetros de entrenamiento

In [None]:
num_epochs = 20
batch_size = 128
learning_rate = 1e-3
use_dropout = True
use_gpu = True

## Fashion MNIST Dataset

60.000 imágenes en 10 categorias de ropa: Top/T-shirt, Trouser, Pullover, Dress, Coat, Sandar, Shirt, Sneaker, Bag, Anckle Boot. Normalizamos el dataset.

In [None]:
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)) # image = (image - mean) / std
])

train_dataset = FashionMNIST(root='./data/FashionMNIST', download=True, train=True, transform=img_transform)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = FashionMNIST(root='./data/FashionMNIST', download=True, train=False, transform=img_transform)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Mostramos algunas imágenes random
def imshow(img):
    img = img / 2 + 0.5
    img = img.numpy()
    plt.imshow(np.transpose(img, (1,2,0)))
    plt.show
    
samples = enumerate(train_dataloader)
batch_idx, (sample_data, sample_targets) = next(samples)
imshow(torchvision.utils.make_grid(sample_data))
print(sample_targets)

## Convolutional Network

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 8, kernel_size=4, stride=2, padding=1) # out: 8 x 14 x 14
        self.conv2 = nn.Conv2d(8, 16, kernel_size=4, stride=2, padding=1) # out: 16 x 7 x 7
        self.conv3 = nn.Conv2d(16, 32, kernel_size=4, stride=2, padding=1) # out: 32 x 3 x 3
        if use_dropout:
            self.do1 = nn.Dropout2d(p=0.5)
        self.fc1 = nn.Linear(288, 50)
        if use_dropout:
            self.do2 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(50, 10) # 10 outputs: probabilidad de cada clase 

    def forward(self, x):
        # convolutional layers
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        if use_dropout:
            x = self.do1(x)
        x = x.view(x.size(0), -1) # aplastamos los features maps a feature vectors
        
        # fully connected layers
        x = F.relu(self.fc1(x))
        if use_dropout:
            x = self.do2(x)
        
        #act = nn.LogSoftmax(dim=1)
        x = self.fc2(x) # ultima activación es un softmax para obtener probabilidades (log)
        
        return x

convnet = ConvNet()

device = torch.device("cuda:0" if use_gpu and torch.cuda.is_available() else "cpu")
convnet = convnet.to(device)

num_params = sum(p.numel() for p in convnet.parameters() if p.requires_grad)
print('Number of parameters: %d' % num_params)

## Loop de entrenamiento

In [None]:
# Optimizador
optimizer = torch.optim.Adam(params=convnet.parameters(), lr=learning_rate)

# red en modo training
convnet.train()

train_loss_avg = []

# loss function (Cross Entropy)
criterion = nn.CrossEntropyLoss()

print('Training ...')
for epoch in range(num_epochs):
    train_loss_avg.append(0)
    num_batches = 0
    
    for image_batch, label_batch in train_dataloader:
        
        # Batch y etiquetas a memoria del dispositivo
        image_batch = image_batch.to(device)
        label_batch = label_batch.to(device)
        
        # Predicciones para el batch
        prediction_batch = convnet(image_batch)
        
        # El primer parámetro son las probabilidades de cada clase que el modelo predice
        # El segundo son las etiquetas de probabilidad.
        loss = criterion(prediction_batch, label_batch)
        
        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        
        # paso del optimizador (usando los gradientes computados por backpropagation)
        optimizer.step()
        
        train_loss_avg[-1] += loss.item()
        num_batches += 1
        
    train_loss_avg[-1] /= num_batches
    print('Epoch [%d / %d] average loss: %f' % (epoch+1, num_epochs, train_loss_avg[-1]))

In [None]:
import matplotlib.pyplot as plt
plt.ion()

fig = plt.figure()
plt.plot(train_loss_avg)
plt.xlabel('Epochs')
plt.ylabel('Cross-entropy loss')
plt.show()

## Evaluación

In [None]:
# Modo evaluacion
convnet.eval()

num_incorrect = 0
test_loss_avg = 0
num_batches = 0
num_instances = 0
for image_batch, label_batch in test_dataloader:
    
    with torch.no_grad():

        image_batch = image_batch.to(device)
        label_batch = label_batch.to(device)

        # predicciones
        prediction_batch = convnet(image_batch)

        # Contamos la cantidad de predicciones correctas
        _, predicted_label = prediction_batch.max(dim=1)
        num_incorrect += (predicted_label != label_batch).sum().item()

        # cross-entropy 
        loss = criterion(prediction_batch, label_batch)

        test_loss_avg += loss.item()
        num_batches += 1
        num_instances += image_batch.size(0)
    
test_loss_avg /= num_batches
print('average loss: %f' % (test_loss_avg))
print('classification error: %f%%' % ((num_incorrect / num_instances)*100))

In [None]:
# Verificar predicciones en imágenes random
samples = enumerate(test_dataloader)
batch_idx, (sample_data, sample_targets) = next(samples)

prediction_batch = convnet(sample_data)
_, predicted_label = prediction_batch.max(dim=1)

imshow(torchvision.utils.make_grid(sample_data))
print(sample_targets)
print(predicted_label)