# ECE 117 Assignment 3: Part 1
Training an MNIST model. Goal is to achieve 90+% accuracy.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import tqdm

import matplotlib.pyplot as plt

In [2]:
# check if the GPUs are available to use
# if you get "Using CUDA device", then you are using GPUs
# you can select "Runtime -> Change runtime type" on Colab to change your GPU instance

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [3]:
# prepare the datasets, in part1, we use MNIST
# the following code will download MNIST dataset

transform = transforms.Compose([transforms.ToTensor()])

train_data = datasets.MNIST("./data", train=True, download=True, transform=transform)
test_data = datasets.MNIST("./data", train=False, download=True, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 51.4MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.80MB/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 13.8MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 13.7MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [4]:
# load the dataset via PyTorch dataloader, batch_size is set as 32

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False)

In [5]:
# the baseline model is implemented as a referral
# in this baseline:
# --- CNN ----
# --- layer: [4] ---
# --- 2 convolution layers + 2 fully connected (fc) layers ---
# Dropout(): regulation of outputs to solve overfitting, it randomly selects a portion of values after activations in model training
# relu(): activation function
# nn.Linear(in_features, out_features, bias=True)
# nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, ...)
# max_pool2d(): maximun pooling layer is for down sampling

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [28]:
model = CNN().to(device)

i_max = 512 # TODO: choose a value here: i_max means total iteration of training

criterion =  torch.nn.CrossEntropyLoss() # TODO: choose a value here: criterion is a loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001) # TODO:  choose a value here: is a optimizer

In [29]:
@torch.no_grad()
def get_accuracy(model, data_loader, device):
    correct = 0
    total = 0

    for inputs, labels in data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, dim=1)

        total += labels.shape[0]
        correct += int((predicted == labels).sum())

    return correct / total

In [30]:
progress = tqdm.tqdm(total=i_max, desc="Training")

i = 0
while i < i_max:
    for inputs, labels in train_loader:
      # TODO: implement the training function, this includes a Forward Pass and a Backward Pass, we divide the tasks into smaller subtasks for you

        # ============ Forward Pass ==========
        # model's training forward pass

        # move the images and labels to the GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # get the output of the model
        outputs = model(inputs)

        # calculate the loss with respect to the output
        loss = criterion(outputs, labels)

        # ============= Backwards Pass ===========
        # zero the optimizer's gradient
        optimizer.zero_grad()

        # perform backpropagation on the loss function
        loss.backward()

        # call .step() on the optimizer
        optimizer.step()

      # End of TODO

        i += 1
        progress.update(1)

        # print log every 100 iterations
        if i % 100 == 0:
            train_acc = get_accuracy(model, train_loader, device)
            test_acc = get_accuracy(model, test_loader, device)
            progress.write(f"Iter {i} Train Acc {train_acc:.4f} Test Acc {test_acc:.4f}")

        if i >= i_max:
            break

torch.save(model.state_dict(), "./model.pth")



Training:   0%|          | 0/512 [00:00<?, ?it/s][A[A

Training: 100%|██████████| 256/256 [01:11<00:00,  3.59it/s]


Training:   7%|▋         | 36/512 [00:00<00:03, 158.14it/s][A[A

Training:  10%|█         | 53/512 [00:00<00:02, 160.74it/s][A[A

Training:  14%|█▎        | 70/512 [00:00<00:02, 160.34it/s][A[A

Training:  17%|█▋        | 87/512 [00:00<00:02, 159.53it/s][A[A

[A[A
[A

Training:  20%|█▉        | 100/512 [00:10<00:02, 159.53it/s][A[A
Training:  47%|████▋     | 300/640 [05:10<00:09, 34.23it/s][A

Training:  20%|██        | 103/512 [00:10<01:25,  4.81it/s] [A[A

Training:  24%|██▎       | 121/512 [00:10<00:54,  7.20it/s][A[A

Iter 100 Train Acc 0.8607 Test Acc 0.8631




Training:  27%|██▋       | 138/512 [00:10<00:36, 10.32it/s][A[A

Training:  30%|███       | 155/512 [00:10<00:24, 14.53it/s][A[A

Training:  34%|███▎      | 172/512 [00:11<00:16, 20.03it/s][A[A

Training:  37%|███▋      | 189/512 [00:11<00:11, 27.41it/s][A[A

[A[A
[A

Training:  39%|███▉      | 200/512 [00:20<00:11, 27.41it/s][A[A
Training:  47%|████▋     | 300/640 [05:20<00:09, 34.23it/s][A

Training:  40%|████      | 205/512 [00:20<01:02,  4.94it/s][A[A

Training:  42%|████▏     | 217/512 [00:20<00:46,  6.40it/s][A[A

Iter 200 Train Acc 0.9139 Test Acc 0.9166




Training:  45%|████▍     | 229/512 [00:21<00:33,  8.40it/s][A[A

Training:  47%|████▋     | 241/512 [00:21<00:24, 11.15it/s][A[A

Training:  50%|████▉     | 254/512 [00:21<00:17, 15.18it/s][A[A

Training:  52%|█████▏    | 266/512 [00:21<00:12, 19.86it/s][A[A

Training:  54%|█████▍    | 278/512 [00:21<00:09, 25.90it/s][A[A

Training:  57%|█████▋    | 290/512 [00:21<00:06, 33.03it/s][A[A

[A[A
[A

Training:  59%|█████▊    | 300/512 [00:31<00:06, 33.03it/s][A[A
Training:  47%|████▋     | 300/640 [05:31<00:09, 34.23it/s][A

Training:  59%|█████▉    | 301/512 [00:31<00:54,  3.86it/s][A[A

Training:  62%|██████▏   | 318/512 [00:31<00:31,  6.11it/s][A[A

Iter 300 Train Acc 0.9307 Test Acc 0.9364




Training:  65%|██████▌   | 334/512 [00:31<00:19,  9.04it/s][A[A

Training:  69%|██████▊   | 351/512 [00:31<00:12, 13.28it/s][A[A

Training:  71%|███████▏  | 366/512 [00:31<00:08, 18.24it/s][A[A

Training:  75%|███████▍  | 382/512 [00:31<00:05, 25.16it/s][A[A

Training:  78%|███████▊  | 399/512 [00:31<00:03, 34.60it/s][A[A

[A[A
[A

Training:  78%|███████▊  | 400/512 [00:41<00:03, 34.60it/s][A[A
Training:  47%|████▋     | 300/640 [05:41<00:09, 34.23it/s][A

Training:  81%|████████  | 414/512 [00:41<00:20,  4.69it/s][A[A

Training:  84%|████████▍ | 431/512 [00:41<00:11,  6.80it/s][A[A

Iter 400 Train Acc 0.9391 Test Acc 0.9459




Training:  87%|████████▋ | 447/512 [00:41<00:06,  9.50it/s][A[A

Training:  90%|█████████ | 463/512 [00:42<00:03, 13.24it/s][A[A

Training:  94%|█████████▎| 479/512 [00:42<00:01, 18.22it/s][A[A

Training:  97%|█████████▋| 495/512 [00:42<00:00, 24.77it/s][A[A

[A[A
[A

Training:  98%|█████████▊| 500/512 [00:52<00:00, 24.77it/s][A[A
Training:  47%|████▋     | 300/640 [05:52<00:09, 34.23it/s][A

Training: 100%|█████████▉| 511/512 [00:52<00:00,  4.65it/s][A[A

Iter 500 Train Acc 0.9466 Test Acc 0.9477


In [31]:
model.load_state_dict(torch.load("./model.pth"))

model.eval()

  model.load_state_dict(torch.load("./model.pth"))


CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [32]:
# Test your model, the goal is to make the accuracy should be greater than 90%

correct = 0
total = 0

with torch.no_grad():
    for image, label in test_loader:
        image = image.to(device)
        label = label.to(device)

        pred = model(image)
        _, pred = torch.max(pred, dim=1)

        total += label.shape[0]
        correct += int((pred == label).sum())

    print(f"Accuracy: {correct / total * 100:.2f}%")

Accuracy: 96.52%
