In [1]:
# Get general purpose APIs first
import numpy as np
import matplotlib.pyplot as plt

# load the Pytorch APIs
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

import torchvision.models as models

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Preparar los datos (CIFAR - 10)

In [4]:
## Preparar los datos (CIFAR - 10)
# Transformaciones para preprocesar las imágenes
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # AlexNet original usa 224x224
    transforms.ToTensor(),    transforms.Normalize((0.5,), (0.5,))  # Normalización
])

# Descargar y cargar CIFAR-10
batch_size = 64

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:05<00:00, 30.7MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


#1.- Implementar los modelos AlexNet a mano

## 1.1.- Implementar Alex Net en pytorch

In [2]:

# Definir la arquitectura de AlexNet personalizada
class CustomAlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomAlexNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),  # 1
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # 2

            nn.Conv2d(64, 192, kernel_size=5, padding=2),  # 3
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # 4

            nn.Conv2d(192, 384, kernel_size=3, padding=1),  # 5
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # 6
            nn.ReLU(inplace=True),

            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # 7
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # 8
        )

        self.classifier = nn.Sequential(
            nn.Linear(256 * 6 * 6, 4096),  # 9
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),  # 10
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)  # 11
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)  # Flatten antes de la capa densa
        x = self.classifier(x)
        return x

# Crear el modelo
model = CustomAlexNet(num_classes=10).to(device)

# Mostrar el resumen de la arquitectura
print(model)

CustomAlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in

In [6]:

# Definir función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ✅ Incluir Accuracy en Entrenamiento y Evaluación
def train_and_evaluate(model, trainloader, testloader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)  # Obtener la clase con mayor probabilidad
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_acc = 100 * correct_train / total_train
        avg_loss = running_loss / len(trainloader)

        # Evaluación en test set
        model.eval()
        correct_test = 0
        total_test = 0
        test_loss = 0.0

        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item()

                _, predicted = outputs.max(1)
                correct_test += (predicted == labels).sum().item()
                total_test += labels.size(0)

        test_acc = 100 * correct_test / total_test
        avg_test_loss = test_loss / len(testloader)

        print(f"Epoch {epoch+1}/{epochs} | Loss: {avg_loss:.4f} | Train Acc: {train_acc:.2f}% | Test Loss: {avg_test_loss:.4f} | Test Acc: {test_acc:.2f}%")

# Entrenar modelo
train_and_evaluate(model, trainloader, testloader, criterion, optimizer, epochs=10)


Epoch 1/10 | Loss: 1.3690 | Train Acc: 50.58% | Test Loss: 1.3042 | Test Acc: 53.26%
Epoch 2/10 | Loss: 1.2406 | Train Acc: 55.74% | Test Loss: 1.2143 | Test Acc: 56.93%
Epoch 3/10 | Loss: 1.1410 | Train Acc: 59.68% | Test Loss: 1.1314 | Test Acc: 59.54%
Epoch 4/10 | Loss: 1.0640 | Train Acc: 62.25% | Test Loss: 1.1581 | Test Acc: 59.07%
Epoch 5/10 | Loss: 0.9928 | Train Acc: 65.15% | Test Loss: 1.1109 | Test Acc: 61.14%
Epoch 6/10 | Loss: 0.9216 | Train Acc: 67.45% | Test Loss: 1.0667 | Test Acc: 62.10%
Epoch 7/10 | Loss: 0.8517 | Train Acc: 70.13% | Test Loss: 1.0885 | Test Acc: 61.90%
Epoch 8/10 | Loss: 0.7841 | Train Acc: 72.50% | Test Loss: 1.0595 | Test Acc: 62.86%
Epoch 9/10 | Loss: 0.7344 | Train Acc: 74.23% | Test Loss: 1.1064 | Test Acc: 61.81%
Epoch 10/10 | Loss: 0.6803 | Train Acc: 76.08% | Test Loss: 1.1091 | Test Acc: 61.59%



#2.- Ahora vamos a comparar con los modelos preentrenados

1.   Pytorch
2.   Tensor-flow



## 2.1- Usar AlexNet preentrenado en PyTorch

In [7]:
import torchvision.models as models

# Descargar AlexNet preentrenado
model_pretrained = models.alexnet(pretrained=True)
model_pretrained.eval()  # Modo evaluación
model_pretrained.to(device)

# Modificar la última capa para CIFAR-10 (10 clases)
model_pretrained.classifier[6] = nn.Linear(4096, 10)
model_pretrained.to(device)

# Evaluar en CIFAR-10
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_pretrained(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Precisión de AlexNet preentrenado en CIFAR-10: {100 * correct / total:.2f}%")


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 176MB/s]


Precisión de AlexNet preentrenado en CIFAR-10: 11.38%


El accuracy se ve muy bajo, parece que algo no estamos adaptando bien, volvamos a ajustar

In [8]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

# Configurar dispositivo (GPU si está disponible)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Transformaciones: Redimensionar imágenes a 224x224 para AlexNet
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Ajustar tamaño
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalización estándar
])


# 2. Descargar AlexNet preentrenado
model_pretrained = models.alexnet(pretrained=True)
model_pretrained.to(device)

# 3. Modificar la última capa para 10 clases en CIFAR-10
model_pretrained.classifier[6] = nn.Linear(4096, 10)
model_pretrained.classifier[6].to(device)

# 4. Fine-Tuning: Descongelar las últimas 2 capas convolucionales
for param in model_pretrained.features[:-2].parameters():
    param.requires_grad = False  # Congelar capas iniciales

# Definir función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_pretrained.parameters(), lr=0.0001)  # Learning rate bajo

# 5. Entrenar el modelo
num_epochs = 10
for epoch in range(num_epochs):
    model_pretrained.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model_pretrained(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Época {epoch+1}/{num_epochs} - Pérdida: {running_loss/len(trainloader):.4f} - Accuracy: {accuracy:.2f}%")

# 6. Evaluar el modelo preentrenado
model_pretrained.eval()
correct, total = 0, 0

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_pretrained(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Precisión de AlexNet Preentrenado en CIFAR-10: {100 * correct / total:.2f}%")


Época 1/10 - Pérdida: 0.7518 - Accuracy: 73.73%
Época 2/10 - Pérdida: 0.5335 - Accuracy: 81.08%
Época 3/10 - Pérdida: 0.4512 - Accuracy: 83.93%
Época 4/10 - Pérdida: 0.3882 - Accuracy: 86.24%
Época 5/10 - Pérdida: 0.3377 - Accuracy: 87.62%
Época 6/10 - Pérdida: 0.2906 - Accuracy: 89.66%
Época 7/10 - Pérdida: 0.2587 - Accuracy: 90.79%
Época 8/10 - Pérdida: 0.2216 - Accuracy: 92.00%
Época 9/10 - Pérdida: 0.1953 - Accuracy: 92.95%
Época 10/10 - Pérdida: 0.1790 - Accuracy: 93.58%
Precisión de AlexNet Preentrenado en CIFAR-10: 86.02%
