In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [2]:
# Definición de AlexNet desde cero (5 conv, 3 max pool, 3 densas)
class AlexNetScratch(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNetScratch, self).__init__()
        self.features = nn.Sequential(
            # Capa 1: Conv + ReLU + Pooling
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),  # conv1
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),                 # pool1

            # Capa 2: Conv + ReLU + Pooling
            nn.Conv2d(64, 192, kernel_size=3, padding=1),           # conv2
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),                 # pool2

            # Capa 3: Conv + ReLU
            nn.Conv2d(192, 384, kernel_size=3, padding=1),          # conv3
            nn.ReLU(inplace=True),

            # Capa 4: Conv + ReLU
            nn.Conv2d(384, 256, kernel_size=3, padding=1),          # conv4
            nn.ReLU(inplace=True),

            # Capa 5: Conv + ReLU + Pooling
            nn.Conv2d(256, 256, kernel_size=3, padding=1),          # conv5
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)                  # pool3
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 4 * 4, 4096),  # Para CIFAR-10, tras 3 pool (32 -> 16 -> 8 -> 4)
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)   # 3 capa densa final para clasificación
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 4 * 4)  # Aplanamos para las capas densas
        x = self.classifier(x)
        return x

In [3]:
# Preparación del dataset CIFAR-10
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:04<00:00, 35.1MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
# Modelo creado desde cero
model_scratch = AlexNetScratch().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_scratch.parameters(), lr=0.001)

In [5]:
# Bucle de entrenamiento para el modelo desde cero
num_epochs = 10
print("Entrenando modelo AlexNet desde cero (PyTorch)...")
for epoch in range(num_epochs):
    model_scratch.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model_scratch(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(trainloader):.4f}")

Entrenando modelo AlexNet desde cero (PyTorch)...
Epoch 1/10, Loss: 1.5977
Epoch 2/10, Loss: 1.1730
Epoch 3/10, Loss: 0.9600
Epoch 4/10, Loss: 0.8241
Epoch 5/10, Loss: 0.7169
Epoch 6/10, Loss: 0.6434
Epoch 7/10, Loss: 0.5728
Epoch 8/10, Loss: 0.5228
Epoch 9/10, Loss: 0.4715
Epoch 10/10, Loss: 0.4330


In [6]:
# Evaluación del modelo desde cero
model_scratch.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model_scratch(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print("Precisión del modelo desde cero: {:.2f}%".format(100 * correct / total))


Precisión del modelo desde cero: 78.83%


In [7]:
# --- Comparación: Modelo Preentrenado en PyTorch ---
# Cargamos AlexNet preentrenado (originalmente entrenado en ImageNet) y adaptamos la última capa
pretrained_model = torchvision.models.alexnet(pretrained=True)
pretrained_model.classifier[6] = nn.Linear(4096, 10)  # Adaptamos a 10 clases
pretrained_model = pretrained_model.to(device)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 141MB/s]


In [8]:
# Para usar el modelo preentrenado, se recomienda redimensionar las imágenes a 224x224 y usar las normalizaciones de ImageNet
transform_pretrained = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
trainset_pre = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_pretrained)
testset_pre = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_pretrained)
trainloader_pre = torch.utils.data.DataLoader(trainset_pre, batch_size=128, shuffle=True, num_workers=2)
testloader_pre = torch.utils.data.DataLoader(testset_pre, batch_size=128, shuffle=False, num_workers=2)

criterion_pre = nn.CrossEntropyLoss()
optimizer_pre = optim.Adam(pretrained_model.parameters(), lr=0.001)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
# Bucle de entrenamiento para el modelo preentrenado (fine-tuning)
print("\nEntrenando modelo preentrenado (PyTorch)...")
for epoch in range(num_epochs):
    pretrained_model.train()
    running_loss = 0.0
    for inputs, labels in trainloader_pre:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer_pre.zero_grad()
        outputs = pretrained_model(inputs)
        loss = criterion_pre(outputs, labels)
        loss.backward()
        optimizer_pre.step()
        running_loss += loss.item()
    print(f"[Preentrenado] Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(trainloader_pre):.4f}")


Entrenando modelo preentrenado (PyTorch)...
[Preentrenado] Epoch 1/10, Loss: 1.4162
[Preentrenado] Epoch 2/10, Loss: 0.8725
[Preentrenado] Epoch 3/10, Loss: 0.7142
[Preentrenado] Epoch 4/10, Loss: 0.6217
[Preentrenado] Epoch 5/10, Loss: 0.5709
[Preentrenado] Epoch 6/10, Loss: 0.5245
[Preentrenado] Epoch 7/10, Loss: 0.4946
[Preentrenado] Epoch 8/10, Loss: 0.4667
[Preentrenado] Epoch 9/10, Loss: 0.4452
[Preentrenado] Epoch 10/10, Loss: 0.4200


In [10]:
# Evaluación del modelo preentrenado
pretrained_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader_pre:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = pretrained_model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print("Precisión del modelo preentrenado: {:.2f}%".format(100 * correct / total))

Precisión del modelo preentrenado: 81.84%
