<a href="https://colab.research.google.com/github/harryypham/MyMLPractice/blob/main/AlexNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.utils as utils
import torch.optim as optim

%matplotlib inline

In [2]:
train_transform = transforms.Compose([
    transforms.Resize((70, 70)),
    transforms.RandomCrop((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
test_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(root="/content/data/train", train=True, download=True, transform=train_transform)
validset = torchvision.datasets.CIFAR10(root="/content/data/train", train=True, transform=test_transform)
testset = torchvision.datasets.CIFAR10(root="/content/data/test", train=False, download=True, transform=test_transform)

def split_train_valid(valid_size=0.1):
  train_indices = torch.arange(0, int(50000*(1-valid_size)))
  valid_indices = torch.arange(int(50000*(1-valid_size)), 50000)

  train_sampler = utils.data.SubsetRandomSampler(train_indices)
  valid_sampler = utils.data.SubsetRandomSampler(valid_indices)

  return train_sampler, valid_sampler


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12788243.79it/s]


Extracting /content/data/train/cifar-10-python.tar.gz to /content/data/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/test/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 13123236.39it/s]


Extracting /content/data/test/cifar-10-python.tar.gz to /content/data/test


In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
batch_size = 128
epochs = 20

In [4]:
train_sampler, valid_sampler = split_train_valid()
trainloader = utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler, num_workers=2)
validloader = utils.data.DataLoader(trainset, batch_size=batch_size, sampler=valid_sampler, num_workers=2)
testloader = utils.data.DataLoader(testset, batch_size=batch_size, num_workers=2)

In [5]:
#Checking dimension
for images, labels in trainloader:
    print('Image batch dimensions:', images.size())
    print('Image label dimensions:', labels.size())
    break

Image batch dimensions: torch.Size([128, 3, 64, 64])
Image label dimensions: torch.Size([128])


In [6]:
class AlexNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = nn.Sequential(
        #Conv1
        nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2),

        #Conv2
        nn.Conv2d(64, 192, kernel_size=5, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2),

        #Conv3
        nn.Conv2d(192, 384, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),

        #Conv4
        nn.Conv2d(384, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),

        #Conv5
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
    )
    self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

    self.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(256 * 6 * 6, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Linear(4096, 10)
    )

  def forward(self, x):
    x = self.conv(x)
    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

In [7]:
def compute_accuracy(model, dataloader, device):
    """
    Compute accuracy on dataset
    """
    correct, total = 0, 0
    model.to(device)
    for inputs, targets in dataloader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        _, predict = outputs.max(1)

        total += targets.size(0)
        correct += predict.eq(targets).sum().item()

    return correct/total * 100




In [8]:
def train(model, trainloader, validloader, criterion, optimizer, epochs, device, compute_accuracy):
  loss_history, acc_history = [], []
  for epoch in range(epochs):
    print(f'\nEpoch {epoch+1}:')
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        model.train()
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predict = outputs.max(1)
        total += targets.size(0)
        correct += predict.eq(targets).sum().item()

        if (not batch_idx % 50) and batch_idx != 0:
              print ('Batch %03d | Cost: %.6f | Train Acc: %.4f'
                    %(batch_idx, train_loss/(batch_idx+1), 100*correct/total))

    loss_history.append(train_loss/(batch_idx+1))
    acc_history.append(100*correct/total)

    model.eval()
    with torch.set_grad_enabled(False):
        valid_acc = compute_accuracy(model, validloader, device)
    print ('Valid Acc: %.4f' % (valid_acc))

  return loss_history, acc_history



def test(model, testloader, device, compute_accuracy):
  model.eval()
  with torch.set_grad_enabled(False):
    test_acc = compute_accuracy(model, testloader, device)
  return test_acc

In [10]:
model = AlexNet()
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=5e-4)
loss_history, acc_history = train(model, trainloader, validloader, criterion, optimizer, epochs, device, compute_accuracy)
test_acc = test(model, testloader, device, compute_accuracy)


Epoch 1:
Batch 050 | Cost: 2.184216 | Train Acc: 14.1697
Batch 100 | Cost: 2.069616 | Train Acc: 18.3323
Batch 150 | Cost: 1.992834 | Train Acc: 20.6902
Batch 200 | Cost: 1.933537 | Train Acc: 22.8273
Batch 250 | Cost: 1.890591 | Train Acc: 24.6514
Batch 300 | Cost: 1.845879 | Train Acc: 26.6715
Batch 350 | Cost: 1.807046 | Train Acc: 28.4477
Valid Acc: 41.6200

Epoch 2:
Batch 050 | Cost: 1.489876 | Train Acc: 42.8309
Batch 100 | Cost: 1.478596 | Train Acc: 43.4638
Batch 150 | Cost: 1.458008 | Train Acc: 44.2674
Batch 200 | Cost: 1.441386 | Train Acc: 45.0715
Batch 250 | Cost: 1.426502 | Train Acc: 45.8759
Batch 300 | Cost: 1.413750 | Train Acc: 46.5142
Batch 350 | Cost: 1.398844 | Train Acc: 47.2801
Valid Acc: 52.4400

Epoch 3:
Batch 050 | Cost: 1.284678 | Train Acc: 52.6961
Batch 100 | Cost: 1.265424 | Train Acc: 53.4808
Batch 150 | Cost: 1.252867 | Train Acc: 53.9839
Batch 200 | Cost: 1.248605 | Train Acc: 54.1628
Batch 250 | Cost: 1.242144 | Train Acc: 54.3482
Batch 300 | Cost: 1.

In [11]:
#I was surprised when initially validation accuracy is higher than training accuracy but then realized I have high drop out rate.
print(test_acc)

76.64


In [None]:
"""
To do list:
1. Visualize result
2. Visualize kernels, kernel maps
3. Experiment with hyperparameters tuning, preprocessing
4. Maybe train for more epochs and graph validation and training loss for early stopping
5. Maybe tweaking the architecture a little bit
6. Learn how to do distributed/parallel training like the paper suggest.
"""