In [1]:
import os
%cd datasets
!bash get_datasets.sh
%cd ..

/Users/letunglam/Projects/deep-learning-training/minigrad/datasets
/Users/letunglam/Projects/deep-learning-training/minigrad


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

USE_GPU = True
dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print_every = 100
print('using device:', device)

using device: cpu


In [5]:
NUM_TRAIN = 49000

transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

cifar10_train = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./datasets', train=False, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [6]:
def flatten(x):
    N = x.shape[0] # read
    return x.view(N, -1)

def test_flatten():
    x = torch.arange(12).view(2, 1, 3, 2)
    print('Before flattening: ', x)
    print('after flatenning: ', flatten(x))

test_flatten()

Before flattening:  tensor([[[[ 0,  1],
          [ 2,  3],
          [ 4,  5]]],


        [[[ 6,  7],
          [ 8,  9],
          [10, 11]]]])
after flatenning:  tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])


In [8]:
import torch.nn.functional as F

def two_layer_fc(x, params):
    """
    A fully-connected NN,
    """

    x = flatten(x)

    w1, w2 = params

    x = F.relu(x.mm(w1))
    x = x.mm(w2)
    return x

def two_layer_fc_test():
    hidden_layer_size = 42
    x = torch.zeros((64, 50), dtype=dtype)
    w1 = torch.zeros((50, hidden_layer_size), dtype=dtype)
    w2 = torch.zeros((hidden_layer_size, 10), dtype=dtype)
    scores = two_layer_fc(x, [w1, w2])
    print(scores.size())

two_layer_fc_test()

torch.Size([64, 10])


In [24]:
def three_layer_convnet(x, params):
    """
    Performs the forward pass of a three-layer convolutional network with the
    architecture defined above.

    Inputs:
    - x: A PyTorch Tensor of shape (N, 3, H, W) giving a minibatch of images
    - params: A list of PyTorch Tensors giving the weights and biases for the
      network; should contain the following:
      - conv_w1: PyTorch Tensor of shape (channel_1, 3, KH1, KW1) giving weights
        for the first convolutional layer
      - conv_b1: PyTorch Tensor of shape (channel_1,) giving biases for the first
        convolutional layer
      - conv_w2: PyTorch Tensor of shape (channel_2, channel_1, KH2, KW2) giving
        weights for the second convolutional layer
      - conv_b2: PyTorch Tensor of shape (channel_2,) giving biases for the second
        convolutional layer
      - fc_w: PyTorch Tensor giving weights for the fully-connected layer. Can you
        figure out what the shape should be?
      - fc_b: PyTorch Tensor giving biases for the fully-connected layer. Can you
        figure out what the shape should be?

    Returns:
    - scores: PyTorch Tensor of shape (N, C) giving classification scores for x
    """
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    x = F.relu(F.conv2d(x, conv_w1, conv_b1, padding=2))
    x = F.relu(F.conv2d(x, conv_w2, conv_b2, padding=1))
    scores = flatten(x).mm(fc_w) + fc_b

    return scores


In [26]:
def three_layer_convnet_test():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)

    conv_w1 = torch.zeros((6, 3, 5, 5),dtype=dtype)
    conv_b1 = torch.zeros((6,))
    conv_w2 = torch.zeros((9, 6, 3, 3))
    conv_b2 = torch.zeros((9,))

    fc_w = torch.zeros((9*32*32, 10))
    fc_b = torch.zeros(10)

    scores = three_layer_convnet(x, [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b])
    print(scores.size())
three_layer_convnet_test()

torch.Size([64, 10])


In [33]:
def random_weight(shape):
    """
    Create random Tensors for weights; setting requires_grad=True means that we
    want to compute gradients for these Tensors during the backward pass.
    We use Kaiming normalization: sqrt(2 / fan_in)
    """
    if len(shape) == 2:
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:])

    w = torch.randn(shape, device=device, dtype=dtype) * np.sqrt(2. / fan_in)
    w.requires_grad= True
    return w

def zero_weight(shape):
    return torch.zeros(shape, device=device, dtype=dtype, requires_grad=True)

random_weight((3, 5))

tensor([[-0.2822,  0.3512,  1.7735, -0.2328, -1.7790],
        [ 0.5571,  1.4962,  0.4925, -0.5720,  1.1564],
        [ 0.8788, -0.0739,  1.3372,  0.6263,  0.5293]], requires_grad=True)

In [34]:
def check_accuracy_part2(loader, model_fn, params):
    """
    Check the accuracy of a classification model.

    Inputs:
    - loader: A DataLoader for the data split we want to check
    - model_fn: A function that performs the forward pass of the model,
      with the signature scores = model_fn(x, params)
    - params: List of PyTorch Tensors giving parameters of the model

    Returns: Nothing, but prints the accuracy of the model
    """
    split = 'val' if loader.dataset.train else 'test'
    print('Checking accuracy on the %s set' % split)
    num_correct, num_samples = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.int64)
            scores = model_fn(x, params)
            _, preds = scores.max(1)
            num_correct += (preds==y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, acc))

In [35]:
def train_part2(model_fn, params, learning_rate):
    """
    Train a model on CIFAR-10.

    Inputs:
    - model_fn: A Python function that performs the forward pass of the model.
      It should have the signature scores = model_fn(x, params) where x is a
      PyTorch Tensor of image data, params is a list of PyTorch Tensors giving
      model weights, and scores is a PyTorch Tensor of shape (N, C) giving
      scores for the elements in x.
    - params: List of PyTorch Tensors giving weights for the model
    - learning_rate: Python scalar giving the learning rate to use for SGD

    Returns: Nothing
    """
    for t, (x, y) in enumerate(loader_train):
        x = x.to(device=device, dtype=dtype)
        y = y.to(device=device, dtype=torch.long)

        scores = model_fn(x, params)
        loss = F.cross_entropy(scores, y)

        loss.backward()
        
        with torch.no_grad():
            for w in params:
                w -= learning_rate * w.grad

                w.grad.zero_()

        if t % print_every == 0:
            print('Iteration %d, loss = %.4f' % (t, loss.item()))
            check_accuracy_part2(loader_val, model_fn, params)
            print()

In [36]:
hidden_layer_size = 4000
learning_rate = 1e-2

w1 = random_weight((3 * 32* 32, hidden_layer_size))
w2 = random_weight((hidden_layer_size, 10))

train_part2(two_layer_fc, [w1, w2], learning_rate)

Iteration 0, loss = 3.3838
Checking accuracy on the val set
Got 108 / 1000 correct (0.11%)

Iteration 100, loss = 2.3637
Checking accuracy on the val set
Got 360 / 1000 correct (0.36%)

Iteration 200, loss = 1.5350
Checking accuracy on the val set
Got 371 / 1000 correct (0.37%)

Iteration 300, loss = 2.0431
Checking accuracy on the val set
Got 336 / 1000 correct (0.34%)

Iteration 400, loss = 1.7830
Checking accuracy on the val set
Got 406 / 1000 correct (0.41%)

Iteration 500, loss = 1.9439
Checking accuracy on the val set
Got 424 / 1000 correct (0.42%)

Iteration 600, loss = 1.8237
Checking accuracy on the val set
Got 399 / 1000 correct (0.40%)

Iteration 700, loss = 2.1525
Checking accuracy on the val set
Got 433 / 1000 correct (0.43%)



In [38]:
learning_rate = 3e-3

channel_1 = 32
channel_2 = 16

conv_w1 = random_weight((channel_1, 3, 5, 5))
conv_b1 = zero_weight(channel_1)
conv_w2 = random_weight((channel_2, channel_1, 3, 3))
conv_b2 = zero_weight(channel_2)
fc_w = random_weight((channel_2 * 32 * 32, 10))
fc_b = zero_weight(1)

params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
train_part2(three_layer_convnet, params, learning_rate)

Iteration 0, loss = 4.0382
Checking accuracy on the val set
Got 86 / 1000 correct (0.09%)

Iteration 100, loss = 1.8815
Checking accuracy on the val set
Got 373 / 1000 correct (0.37%)

Iteration 200, loss = 1.5817
Checking accuracy on the val set
Got 396 / 1000 correct (0.40%)

Iteration 300, loss = 1.7287
Checking accuracy on the val set
Got 404 / 1000 correct (0.40%)

Iteration 400, loss = 1.7200
Checking accuracy on the val set
Got 445 / 1000 correct (0.45%)

Iteration 500, loss = 1.7015
Checking accuracy on the val set
Got 459 / 1000 correct (0.46%)

Iteration 600, loss = 1.6525
Checking accuracy on the val set
Got 469 / 1000 correct (0.47%)

Iteration 700, loss = 1.7230
Checking accuracy on the val set
Got 472 / 1000 correct (0.47%)



In [41]:
class TwoLayerFC(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        nn.init.kaiming_normal_(self.fc2.weight)

    def forward(self, x):
        x = flatten(x)
        scores = self.fc2(F.relu(self.fc1(x)))
        return scores
    
def test_TwoLayerFC():
    input_size = 50
    x = torch.zeros((64, input_size), dtype=dtype)
    model = TwoLayerFC(input_size, 42, 10)
    scores = model(x)
    print(scores.size())
test_TwoLayerFC()

torch.Size([64, 10])


In [46]:
class ThreeLayerConvNet(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel, channel_1, 5, padding=2, bias=True)
        nn.init.kaiming_normal_(self.conv1.weight)
        self.conv2 = nn.Conv2d(channel_1, channel_2, 3, padding=1, bias=True)
        nn.init.kaiming_normal_(self.conv2.weight)
        self.fc = nn.Linear(channel_2 * 32 * 32, num_classes)
        nn.init.kaiming_normal_(self.fc.weight)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = flatten(x)
        scores = self.fc(x)
        return scores
    
def test_ThreeLayerConvNet():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)
    model = ThreeLayerConvNet(in_channel=3, channel_1=12, channel_2=8, num_classes=10)
    scores = model(x)
    print(scores.size())
test_ThreeLayerConvNet()

torch.Size([64, 10])


In [54]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds==y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [55]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.

    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for

    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [56]:
hidden_layer_size = 4000
learning_rate = 1e-2
model = TwoLayerFC(3 * 32 * 32, hidden_layer_size, 10)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

train_part34(model, optimizer)

Iteration 0, loss = 3.4179
Checking accuracy on validation set
Got 156 / 1000 correct (15.60)

Iteration 100, loss = 2.0242
Checking accuracy on validation set
Got 395 / 1000 correct (39.50)

Iteration 200, loss = 1.6251
Checking accuracy on validation set
Got 390 / 1000 correct (39.00)

Iteration 300, loss = 1.9791
Checking accuracy on validation set
Got 343 / 1000 correct (34.30)

Iteration 400, loss = 1.9126
Checking accuracy on validation set
Got 425 / 1000 correct (42.50)

Iteration 500, loss = 1.9340
Checking accuracy on validation set
Got 414 / 1000 correct (41.40)

Iteration 600, loss = 1.9275
Checking accuracy on validation set
Got 402 / 1000 correct (40.20)

Iteration 700, loss = 2.1010
Checking accuracy on validation set
Got 448 / 1000 correct (44.80)



In [61]:
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16

model = ThreeLayerConvNet(3, channel_1, channel_2, 10)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

train_part34(model, optimizer)

Iteration 0, loss = 3.2246
Checking accuracy on validation set
Got 76 / 1000 correct (7.60)

Iteration 100, loss = 2.0094
Checking accuracy on validation set
Got 374 / 1000 correct (37.40)

Iteration 200, loss = 1.5607
Checking accuracy on validation set
Got 399 / 1000 correct (39.90)

Iteration 300, loss = 1.6306
Checking accuracy on validation set
Got 419 / 1000 correct (41.90)

Iteration 400, loss = 1.6600
Checking accuracy on validation set
Got 452 / 1000 correct (45.20)

Iteration 500, loss = 1.7936
Checking accuracy on validation set
Got 472 / 1000 correct (47.20)

Iteration 600, loss = 1.6498
Checking accuracy on validation set
Got 477 / 1000 correct (47.70)

Iteration 700, loss = 1.7103
Checking accuracy on validation set
Got 491 / 1000 correct (49.10)



In [62]:
class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)
    
hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
    Flatten(),
    nn.Linear(3 * 32 * 32, hidden_layer_size),
    nn.ReLU(),
    nn.Linear(hidden_layer_size, 10),
)

optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

train_part34(model, optimizer)

Iteration 0, loss = 2.3573
Checking accuracy on validation set
Got 136 / 1000 correct (13.60)

Iteration 100, loss = 1.7764
Checking accuracy on validation set
Got 403 / 1000 correct (40.30)

Iteration 200, loss = 1.3513
Checking accuracy on validation set
Got 435 / 1000 correct (43.50)

Iteration 300, loss = 1.6495
Checking accuracy on validation set
Got 403 / 1000 correct (40.30)

Iteration 400, loss = 1.8281
Checking accuracy on validation set
Got 427 / 1000 correct (42.70)

Iteration 500, loss = 1.7685
Checking accuracy on validation set
Got 429 / 1000 correct (42.90)

Iteration 600, loss = 2.1002
Checking accuracy on validation set
Got 445 / 1000 correct (44.50)

Iteration 700, loss = 1.7316
Checking accuracy on validation set
Got 430 / 1000 correct (43.00)



In [63]:
channel_1 = 32
channel_2 = 16
learning_rate = 1e-2

model = nn.Sequential(
    nn.Conv2d(3, channel_1, 5, padding=2, bias=True),
    nn.ReLU(),
    nn.Conv2d(channel_1, channel_2, 3, padding=1),
    nn.ReLU(),
    Flatten(),
    nn.Linear(channel_2*32*32, 10)
)
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                    momentum=0.9, nesterov=True)

train_part34(model, optimizer)

Iteration 0, loss = 2.3111
Checking accuracy on validation set
Got 81 / 1000 correct (8.10)

Iteration 100, loss = 1.6125
Checking accuracy on validation set
Got 472 / 1000 correct (47.20)

Iteration 200, loss = 1.2376
Checking accuracy on validation set
Got 521 / 1000 correct (52.10)

Iteration 300, loss = 1.4588
Checking accuracy on validation set
Got 497 / 1000 correct (49.70)

Iteration 400, loss = 1.2304
Checking accuracy on validation set
Got 524 / 1000 correct (52.40)

Iteration 500, loss = 1.4531
Checking accuracy on validation set
Got 522 / 1000 correct (52.20)

Iteration 600, loss = 1.5504
Checking accuracy on validation set
Got 560 / 1000 correct (56.00)

Iteration 700, loss = 1.2581
Checking accuracy on validation set
Got 584 / 1000 correct (58.40)

