In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

In [None]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_dataset = torchvision.datasets.CIFAR10(root='/data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='/data', train=False, download=True, transform=transform)

100%|██████████| 170M/170M [00:15<00:00, 11.3MB/s]


In [None]:
print(train_dataset.classes)
img, label = train_dataset[0]
print(img.shape, label)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
torch.Size([3, 224, 224]) 6


In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [None]:
class AlexNet(nn.Module):
  def __init__(self, num_classes=10):
    super(AlexNet, self).__init__()
    self.features = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=64, kernel_size=11, stride=4, padding=2), #224x224x3 -> 55x55x64
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2), #55x55x64 -> 27x27x64

        nn.Conv2d(in_channels=64, out_channels=192, kernel_size=5, padding=2), #27x27x64 -> 27x27x192
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2), #27x27x192 -> 13x13x192

        nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, padding=1), #13x13x192 -> 13x13x384
        nn.ReLU(),

        nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1), #13x13x384 -> 13x13x256
        nn.ReLU(),

        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), #13x13x256 -> 13x13x256
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2) #13x13x256 -> 6x6x256
    )

    self.classifier = nn.Sequential(
        nn.Dropout(),
        nn.Linear(in_features=256*6*6, out_features=4096), #6x6x256 -> 4096
        nn.ReLU(),

        nn.Dropout(),
        nn.Linear(in_features=4096, out_features=4096), #4096 -> 4096
        nn.ReLU(),

        nn.Linear(in_features=4096, out_features=num_classes) #4096 -> 10
    )

  def forward(self, x):
    x = self.features(x)
    x = x.view(x.size(0), -1)
    x = self.classifier(x)
    return x

In [None]:
model = AlexNet(num_classes=10)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
for epoch in range(10):
  model.train()
  for xb, yb in train_dataloader:
    pred = model(xb)
    loss = loss_fn(pred, yb)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for xb, yb in test_dataloader:
      pred = model(xb)
      _, predicted = torch.max(pred, 1)
      correct += (predicted==yb).sum().item()
      total += yb.size(0)
  accuracy = correct/total
  print(f'Epoch {epoch+1} accuracy {accuracy:.4f}')

Epoch 1 accuracy 0.1000
Epoch 2 accuracy 0.1000
