In [1]:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import MNIST

In [2]:
def resize_batch(imgs):
    imgs = imgs.reshape((-1, 28, 28, 1))
    resized_imgs = np.zeros((imgs.shape[0], 32, 32, 1))
    
    for i in range(imgs.shape[0]):
        # add 2 pixels of padding to the top, bottom, left, and right
        resized_imgs[i, ..., 0] = np.pad(imgs[i, ..., 0], ((2, 2), (2, 2)), mode='constant')
    
    return resized_imgs

root = 'data'
classes = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

# load data and select the classes of interest
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

# load data
labels = ['train', 'test']
dataset = {'train': MNIST(root=root, train=True, download=True, transform=transform), 
            'test': MNIST(root=root, train=False, download=True, transform=transform)}

# resize images to (32, 32)
for label in labels:
    dataset[label].data = resize_batch(dataset[label].data.numpy())

# create dicts for storing sampled data
X = {'train': [], 'test': []}
y = {'train': [], 'test': []}

# for the training and test datasets
for label in labels:
    # sample 600 points for each class
    for c in classes:
        subset_idx = torch.isin(dataset[label].targets, torch.as_tensor(c))
        # convert to tensor
        X[label].append(torch.tensor(dataset[label].data[subset_idx][:600]).float())
        y[label].append(torch.tensor(dataset[label].targets[subset_idx][:600]).long()) 
    
    # concatenate along the first dimension
    X[label] = torch.cat(X[label], dim=0)
    y[label] = torch.cat(y[label], dim=0)    
    
print(X['train'].shape)
print(X['test'].shape)
print(y['train'].shape)
print(y['test'].shape)

  y[label].append(torch.tensor(dataset[label].targets[subset_idx][:600]).long())


torch.Size([6000, 32, 32, 1])
torch.Size([6000, 32, 32, 1])
torch.Size([6000])
torch.Size([6000])


In [3]:
# VGG11 model
class VGG11(nn.Module):
    def __init__(self):
        super().__init__()
        # 1. Conv(001, 064, 3, 1, 1) - BatchNorm(064) - ReLU - MaxPool(2, 2)
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        self.BatchNorm1 = nn.BatchNorm2d(64)
        self.act1 = nn.ReLU()
        self.maxPool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 2. Conv(064, 128, 3, 1, 1) - BatchNorm(128) - ReLU - MaxPool(2, 2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.BatchNorm2 = nn.BatchNorm2d(128)
        self.act2 = nn.ReLU()
        self.maxPool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 3. Conv(128, 256, 3, 1, 1) - BatchNorm(256) - ReLU
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.BatchNorm3 = nn.BatchNorm2d(256)
        self.act3 = nn.ReLU()
        
        # 4. Conv(256, 256, 3, 1, 1) - BatchNorm(256) - ReLU - MaxPool(2, 2)
        self.conv4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.BatchNorm4 = nn.BatchNorm2d(256)
        self.act4 = nn.ReLU()
        self.maxPool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 5. Conv(256, 512, 3, 1, 1) - BatchNorm(512) - ReLU
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.BatchNorm5 = nn.BatchNorm2d(512)
        self.act5 = nn.ReLU()
        
        # 6. Conv(512, 512, 3, 1, 1) - BatchNorm(512) - ReLU - MaxPool(2, 2)
        self.conv6 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.BatchNorm6 = nn.BatchNorm2d(512)
        self.act6 = nn.ReLU()
        self.maxPool6 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 7. Conv(512, 512, 3, 1, 1) - BatchNorm(512) - ReLU
        self.conv7 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.BatchNorm7 = nn.BatchNorm2d(512)
        self.act7 = nn.ReLU()
        
        # 8. Conv(512, 512, 3, 1, 1) - BatchNorm(512) - ReLU - MaxPool(2, 2)
        self.conv8 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.BatchNorm8 = nn.BatchNorm2d(512)
        self.act8 = nn.ReLU()
        self.maxPool8 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 9. Linear(0512, 4096) - ReLU - Dropout(0.5)
        self.hidden9 = nn.Linear(512, 4096)
        self.act9 = nn.ReLU()
        self.drop9 = nn.Dropout(0.5)
        
        # 10. Linear(4096, 4096) - ReLU - Dropout(0.5)
        self.hidden10 = nn.Linear(4096, 4096)
        self.act10 = nn.ReLU()
        self.drop10 = nn.Dropout(0.5)
        
        # 11. Linear(4096, 10)
        self.output = nn.Linear(4096, 10)
        
    def forward(self, x):
        # adjust input shape: reshape using view
        x = x.view(x.size(0), 1, 32, 32)
    
        # convolutional layers
        x = self.maxPool1(self.act1(self.BatchNorm1(self.conv1(x))))
        x = self.maxPool2(self.act2(self.BatchNorm2(self.conv2(x))))
        x = self.act3(self.BatchNorm3(self.conv3(x)))
        x = self.maxPool4(self.act4(self.BatchNorm4(self.conv4(x))))
        x = self.act5(self.BatchNorm5(self.conv5(x)))
        x = self.maxPool6(self.act6(self.BatchNorm6(self.conv6(x))))
        x = self.act7(self.BatchNorm7(self.conv7(x)))
        x = self.maxPool8(self.act8(self.BatchNorm8(self.conv8(x))))
        
        # flatten output of convolutional layers
        x = x.view(x.size(0), -1)
        
        # fully connected layers
        x = self.drop9(self.act9(self.hidden9(x)))
        x = self.drop10(self.act10(self.hidden10(x)))
        x = self.output(x)
        return x

In [None]:
# instantiate the model, loss function, and optimizer
model = VGG11()
# print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

n_epochs = 10 
batch_size = 64

# grabbing training data
x = X['train']
y = y['train']

# gradient descent to train
for epoch in range(n_epochs):
    start = time.time()
    for i in range(0, len(x), batch_size):
        x_batch = x[i:i+batch_size]
        y_batch = y[i:i+batch_size]
        
        y_pred = model(x_batch)
        loss = loss_fn(y_pred, y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    end = time.time()
    print(f'Epoch [{epoch + 1}/{n_epochs}], Loss: {loss.item():.4f}, Time elapsed: {(end - start):.4f}s')

# compute accuracy
y_pred = model(x)
_, predicted = torch.max(y_pred, dim=1)
accuracy = (predicted == y).float().mean()
print(f'Accuracy = {accuracy:.4f}')

Epoch [1/10], Loss: 2.4606, TIme elapsed: 57.3468s
Epoch [2/10], Loss: 2.3006, TIme elapsed: 64.1294s
Epoch [3/10], Loss: 2.3402, TIme elapsed: 59.3136s
Epoch [4/10], Loss: 2.3610, TIme elapsed: 70.9664s
Epoch [5/10], Loss: 2.3690, TIme elapsed: 76.8668s
Epoch [6/10], Loss: 2.3719, TIme elapsed: 57.4869s
Epoch [7/10], Loss: 2.3729, TIme elapsed: 60.7021s
Epoch [8/10], Loss: 2.3733, TIme elapsed: 60.0297s
Epoch [9/10], Loss: 2.3734, TIme elapsed: 61.4850s
Epoch [10/10], Loss: 2.3734, TIme elapsed: 64.7037s
