# Q3 Please follow the PyTorch tutorial in the following link and train a simple feed-forward neural network.
(https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [None]:
# the CNN structure

# input layer: 1*32*32 image
# conv1 layer: input_channel = 1, output_channel = 6, kernel_size = 5*5, stride = 1, padding = 0, parameter_name: C1 
# output of conv1: 6*28*28
# activation function: relu
# MaxPool: filter_size = 2*2
# output of MaxPool: 6*14*14
# conv2 layer: input_channel = 6, output_channel = 16, kernel_size = 5*5, stride = 1, padding = 0, parameter_name:C2
# output of conv2: 16*10*10
# activation function: relu
# MaxPool: filter_size = 2*2
# output of MaxPool: 16*5*5
# fc1 layer: num_nodes = 120, parameter_name = W1, b1, size_W1 = (16*5*5, 120), size_b1 = (120,1)
# activation function: relu
# fc2 layer: num_nodes = 82, paremeter_name = W2, b2, size_W2 = (120, 84), size_b2 = (84,1)
# activation function: relu
# fc3 layer: num_notes = 10, parameter_name = W3, b3, size_W3 = (84, 10), size_b3 = (10,1)


# create CNN structure

In [49]:
class CNN(nn.Module):

    def __init__(self, in_channels = 1, num_classes = 10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = (5,5), stride = (1,1), padding = (2,2))
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride = (2,2))
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = (5,5), stride = (1,1), padding = (0,0))
        self.fc1 = nn.Linear(16*5*5, 120, bias=True)
        self.fc2 = nn.Linear(120, 84, bias=True)
        self.fc3 = nn.Linear(84, 10, bias=True)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [50]:
model_test=CNN()
x = torch.randn(64,1,28,28)
print(model_test(x).shape)

torch.Size([64, 10])


In [51]:
params = list(model_test.parameters()) # recall that conv has bias parameter as well
print(len(params))
for i in range(len(params)):
    print(params[i].size()) 

10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


# set device

In [52]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# hyperparameters

In [53]:
in_channels = 1
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

# load data

In [54]:
train_dataset = datasets.MNIST(root = 'dataset/', train=True, transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_dataset = datasets.MNIST(root = 'dataset/', train=False, transform = transforms.ToTensor(), download = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

# initialized network

In [55]:
model = CNN().to(device)

# loss and optimizer

In [56]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

# train network

In [57]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device = device)
        targets = targets.to(device = device)
        scores = model(data)
        loss = criterion(scores, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

# check accuracy on training & testing to see how good our model does

In [58]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on training data')
    else:
        print('Checking accuracy on testing data')

    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x,y in loader:
            x = x.to(device)
            y = y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')

    model.train()

In [59]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 58832/60000 with accuracy 98.05
Checking accuracy on testing data
Got 9815/10000 with accuracy 98.15
