# Pytorch Step by Step: One Layer Feed Forward Networks

We need to introduce some non linearity in order to be able to represent linear and non linear functions well.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets as dsets

In [2]:
train_dataset = dsets.MNIST(root="./data", train = True, transform=transforms.ToTensor(), download = True)

In [3]:
test_dataset = dsets.MNIST(root='./data', train=False, transform = transforms.ToTensor())

In [4]:
batch_size = 100
n_iters = 3000

num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
num_epochs

5

In [31]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

len(train_loader)

600

In [17]:
class FFNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FFNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.sigmoid = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        return out
    


In [18]:
input_size = 28*28
hidden_size = 100
num_classes = 10

model = FFNModel(input_size, hidden_size, num_classes)

In [19]:
print(model)

FFNModel(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (sigmoid): Sigmoid()
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


In [20]:
criterion = nn.CrossEntropyLoss()

In [21]:
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [22]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, 28*28))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        
        # we want to check the accuracy with test dataset every 500 iterations
        # we can change this number, it is just if it is too small we lose a lot of time
        # checking accuracy while if it is big, we have less answers but takes less time for the algorithm
        if iter % 500 == 0:
            # calculate accuracy
            correct = 0
            total = 0
            
            # iterate through test dataset
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                
                outputs = model(images)
                # get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # total number of labels
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            print("Iteration: {}. Loss: {}. Accuracy: {}".format(iter, loss.data[0], accuracy))

Iteration: 500. Loss: 0.7699686288833618. Accuracy: 85.67
Iteration: 1000. Loss: 0.39298808574676514. Accuracy: 89.24
Iteration: 1500. Loss: 0.40605971217155457. Accuracy: 90.37
Iteration: 2000. Loss: 0.3932380676269531. Accuracy: 91.07
Iteration: 2500. Loss: 0.23703166842460632. Accuracy: 91.64
Iteration: 3000. Loss: 0.3743129074573517. Accuracy: 91.91


### More on Deep Neural Network

In this section we will build a deeper neural network, so more layers and we will use the common ReLU activation function which is actually a very common practice in Deep Learning due to its advantages. The only difference as we can see is in the definition of the model.

In [24]:
class FFNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FFNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.fc4(out)
        return out

In [25]:
input_size = 28*28
hidden_size = 100
num_classes = 10

model = FFNModel(input_size, hidden_size, num_classes)

In [26]:
print(model)

FFNModel(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=100, out_features=100, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=100, out_features=10, bias=True)
)


In [27]:
criterion = nn.CrossEntropyLoss()

In [28]:
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [29]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, 28*28))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        
        # we want to check the accuracy with test dataset every 500 iterations
        # we can change this number, it is just if it is too small we lose a lot of time
        # checking accuracy while if it is big, we have less answers but takes less time for the algorithm
        if iter % 500 == 0:
            # calculate accuracy
            correct = 0
            total = 0
            
            # iterate through test dataset
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                
                outputs = model(images)
                # get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # total number of labels
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            print("Iteration: {}. Loss: {}. Accuracy: {}".format(iter, loss.data[0], accuracy))

Iteration: 500. Loss: 0.25188133120536804. Accuracy: 90.64
Iteration: 1000. Loss: 0.23892641067504883. Accuracy: 94.27
Iteration: 1500. Loss: 0.15743722021579742. Accuracy: 95.09
Iteration: 2000. Loss: 0.08909279108047485. Accuracy: 96.03
Iteration: 2500. Loss: 0.05477692559361458. Accuracy: 96.39
Iteration: 3000. Loss: 0.13530467450618744. Accuracy: 96.85
