# CNN con Pytorch

Repetimos el ejemplo de Keras pero ahora utilizando Pytorch. Como veremos es mucho mas complejo ya que nos permite definir mas en detalle todo el proceso.

In [None]:
import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import transforms
from torch.optim.lr_scheduler import StepLR

Definimos la arquitectura de la CNN

In [None]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        # Input shape will be inferred automatically in PyTorch
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1) # Assuming input channels is 1 (grayscale)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=(2, 2))
        
        # Global average pooling can be done by adaptive pooling in PyTorch
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Dropout layer
        self.dropout = nn.Dropout(0.5)
        
        # Fully connected layer
        self.fc = nn.Linear(128, num_classes)
        
    def forward(self, x):
        # Apply the first two convolution layers with relu
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        
        # Apply max pooling
        x = self.pool(x)
        
        # Apply the next two convolution layers with relu
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        
        # Apply global average pooling
        x = self.global_avg_pool(x)
        
        # Flatten the output for the dropout and dense layer
        x = x.view(x.size(0), -1)
        
        # Apply dropout
        x = self.dropout(x)
        
        # Apply the fully connected layer with softmax
        x = self.fc(x)
        output = F.softmax(x, dim=1)
        return output

Definimos como el modelo debe cargar los datos

In [None]:
class CustomMNISTDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = torch.tensor(self.encodings[idx]).to(device)
        label = torch.tensor(self.labels[idx]).to(device)
        return image, label

Definimos el proceso de entrenamiento paso a paso

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

Definimos el proceso de validacion paso a paso

In [None]:
def validation(model, device, val_loader):
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            print(data.shape)
            output = model(data)
            val_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    val_loss /= len(val_loader.dataset)

    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))

Cargamos los datos igual que hicimos antes

In [None]:
import zipfile
with zipfile.ZipFile("./data/mnist/mnist_train.csv.zip", 'r') as zip_ref:
    zip_ref.extractall("./data/mnist/extracted/")
with zipfile.ZipFile("./data/mnist/mnist_test.csv.zip", 'r') as zip_ref:
    zip_ref.extractall("./data/mnist/extracted/")

In [None]:
train_df = pd.read_csv("./data/mnist/extracted/mnist_train.csv")

x_train = train_df.iloc[:,1:].values
y_train = train_df.iloc[:,0].values

# Hacer que cada pixel este entre 0 y 1
x_train = x_train.astype("float32") / 255

# Cambiar la forma de unidimensional de (784) a bidimensional (28, 28, 1)
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)

El input tensor en Keras tiene que tener la forma  **(batch_size, height, width, channels)**

El input tensor en PyTorch tiene que tener la forma **(batch_size, channels, height, width)**

In [None]:
x_train.shape

In [None]:
x_train.transpose((0, 3, 1, 2)).shape

In [None]:
x_train = x_train.transpose((0, 3, 1, 2))

Separamos manualmente los conjuntos de entrenamiento y validacion.
Antes lo hacia automaticamente keras

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

In [None]:
torch.manual_seed(42)

device = 'cpu'

train_dataset = CustomMNISTDataset(x_train, y_train)
val_dataset = CustomMNISTDataset(x_val, y_val)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128)

In [None]:
train_dataset[0][0].shape

In [None]:
train_dataset[0][0].permute(1, 2, 0).shape

In [None]:
from matplotlib import pyplot as plt
plt.imshow(train_dataset[0][0].permute(1, 2, 0), interpolation='nearest')
plt.show()

In [None]:
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1, 3):
    train(model, device, train_loader, optimizer, epoch)
    validation(model, device, val_loader)

#torch.save(model.state_dict(), "mnist_cnn.pt")

In [None]:
test = pd.read_csv("./data/mnist/extracted/mnist_test.csv")

x_test = test.iloc[:,1:].values
y_test = test.iloc[:,0].values

# Hacer que cada pixel este entre 0 y 1
x_test = x_test.astype("float32") / 255

# Cambiar la forma de unidimensional de (784) a bidimensional (28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

#Cambiar al formato de pytorch
x_test = x_test.transpose((0, 3, 1, 2))

In [None]:
x_test[0:1,:].shape #(batch_size, layers, witdh, height)

In [None]:
tensor = torch.tensor(x_test[0:1,:]).to(device)
tensor.shape

In [None]:
from matplotlib import pyplot as plt
plt.imshow(tensor[0].permute(1, 2, 0), interpolation='nearest')
plt.show()

In [None]:
model.eval()
with torch.no_grad():
    predictions = model(tensor)

In [None]:
predictions

In [None]:
predictions.argmax(axis=-1).flatten()

## Referencias:

- https://github.com/pytorch/examples/blob/main/mnist/main.py
- https://www.kaggle.com/datasets/oddrationale/mnist-in-csv