In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import transforms

In [29]:
# Build up the class for Vanilla FFN
class VanillaFFN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(VanillaFFN, self).__init__()
        self.fc1 = nn.Linear(in_features=input_size, out_features=input_size // 2)
        self.fc2 = nn.Linear(in_features=input_size // 2, out_features=num_classes)
    
    def forward(self, x):
        x = x.flatten(start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x

In [30]:
# model sanity_check
sample_net = VanillaFFN(784, 10)
print(sample_net(torch.randn(64, 784)).shape)

torch.Size([64, 10])


In [22]:
# Download Dataset
train_data = datasets.MNIST(root='./data', 
                                   train=True, 
                                   transform=transforms.Compose([
                                       transforms.ToTensor()
                                   ]),
                                   download=True)
# Set up the data loader for our training data.
train_data_loader = DataLoader(dataset=train_data, 
                               shuffle=True,
                               batch_size=512)

In [12]:
# sanity check for train data loader.
for idx, data_tuple in enumerate(train_data_loader):
    print(data_tuple[0].shape, data_tuple[1].shape)
    break

torch.Size([512, 1, 28, 28]) torch.Size([512])


In [33]:
# check and setup device tensor computations should be assigned to
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [39]:
INPUT_SIZE = 28 * 28
NUM_CLASSES = 10

vanilla_fnn_object = VanillaFFN(INPUT_SIZE, NUM_CLASSES)
vanilla_fnn_object.to(device=device)
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vanilla_fnn_object.parameters(), lr=0.1)

for epoch in range(2):
    loss = 0
    for iteration, (data, targets) in enumerate(train_data_loader):
        # assign data and targets to device.
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # forward pass, compute losses, backpropagate.
        outputs = vanilla_fnn_object(data)
        loss = loss_criterion(outputs, targets)
        print("Epoch {}:, Iteration: {}, Loss: {}".format(epoch + 1, iteration + 1, loss))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
print("Learning Done!")

Epoch 1:, Iteration: 1, Loss: 2.317263603210449
Epoch 1:, Iteration: 2, Loss: 94.96660614013672
Epoch 1:, Iteration: 3, Loss: 213.5109100341797
Epoch 1:, Iteration: 4, Loss: 122.19387817382812
Epoch 1:, Iteration: 5, Loss: 120.39815521240234
Epoch 1:, Iteration: 6, Loss: 94.13008117675781
Epoch 1:, Iteration: 7, Loss: 74.5239486694336
Epoch 1:, Iteration: 8, Loss: 32.79188919067383
Epoch 1:, Iteration: 9, Loss: 15.758865356445312
Epoch 1:, Iteration: 10, Loss: 4.098278999328613
Epoch 1:, Iteration: 11, Loss: 2.6491663455963135
Epoch 1:, Iteration: 12, Loss: 4.110940933227539
Epoch 1:, Iteration: 13, Loss: 3.3856558799743652
Epoch 1:, Iteration: 14, Loss: 2.589475631713867
Epoch 1:, Iteration: 15, Loss: 2.0008091926574707
Epoch 1:, Iteration: 16, Loss: 1.8412837982177734
Epoch 1:, Iteration: 17, Loss: 1.5358116626739502
Epoch 1:, Iteration: 18, Loss: 1.553002119064331
Epoch 1:, Iteration: 19, Loss: 1.5871416330337524
Epoch 1:, Iteration: 20, Loss: 1.66726553440094
Epoch 1:, Iteration: 2

In [38]:
# set up testing data and data loader
test_data = datasets.MNIST(root='./data',
                                  train=False,
                                  download=True,
                                  transform=transforms.Compose([
                                      transforms.ToTensor()
                                  ]))

test_data_loader = DataLoader(dataset=test_data,
                              shuffle=True,
                              batch_size=512)

In [45]:
# Lets check the accuracy on the testing set.
num_correct = 0
num_samples = 0
vanilla_fnn_object.eval()    # to set in eval mode

with torch.no_grad():
    for iteration, (data, targets) in enumerate(test_data_loader):
        # assign data and targets to device.
        data = data.to(device=device)
        targets = targets.to(device=device)
        outputs = vanilla_fnn_object(data)
        
        values, index_of_max_value = outputs.max(1)
        num_correct += (index_of_max_value == targets).sum()
        num_samples += targets.size(0)
    
    print(num_correct, num_samples)
    print("Testing Accuracy of Vanilla FNN model on the FMNIST dataset: {}".format((num_correct.item() / num_samples) * 100))

tensor(7828, device='cuda:0') 10000
Testing Accuracy of Vanilla FNN model on the FMNIST dataset: 78.28
