<a href="https://colab.research.google.com/github/ellyanalinden/MNIST_comparinghiddenlayer/blob/master/MNIST_2_hidden_layer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Steps**

1. Load Dataset
2. Make Dataset Iterable
3. ***Create Model Class. Change this part***
4. Instantiate Model Class
5. Instantiate Loss Class
6. Instantiate Optimizer Class
7. Train Model

In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
# Step 1: Loading MNIST Train Dataset
import torch #for tensor
import torch.nn as nn #for model
import torchvision.transforms as transforms #for dataset loading
import torchvision.datasets as dsets #for dataset loading
from torch.autograd import Variable #to instantiate variable

In [3]:
train_dataset = dsets.MNIST(root='./data',
                           train = True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = dsets.MNIST(root='./data',
                          train = False,
                          transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [0]:
# Step 2: Make dataset iterable

batch_size = 100 #every iteration, we feed 100 images to the model at one time
n_iters = 6000 #1 epoch 6000 iterations

#1 epoch is going through training dataset once
num_epochs = n_iters/ (len(train_dataset) / batch_size) 
#num_epochs = 6000/(60000/100) = 10 epochs, meaning we're going through the dataset 10 times
num_epochs = int(num_epochs)

#Make dataset iterable
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True) #after single epoch, we shuffle the image

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False)

In [0]:
# Step 3: Create Model Class
class TwoHiddenLayerModel(nn.Module):
  #create classes instantiate object
  def __init__(self, input_size, hidden_size, num_classes):
    super(TwoHiddenLayerModel, self).__init__()
    # Linear function 1: 784 --> 100
    self.fc1 = nn.Linear(input_dim, hidden_dim)
    # Non-linearity 1
    self.relu = nn.ReLU()
    
    # Linear function 2: 100 --> 100
    self.fc2 = nn.Linear(input_dim, hidden_dim)
    # Non-linearity 1
    self.relu2 = nn.ReLU()
    
    # Linear function 3 (readout): 100 -->10
    self.fc3 = nn.Linear(hidden_dim, output_dim)
    
  #run classes  
  def forward(self, x):
    # Linear function 1
    out = self.fc1(x)
    # Non-linearity 1
    out = self.relu(out)
    
    # Linear function 2
    out = self.fc2(x)
    # Non-linearity 2
    out = self.relu2(out)
    
    # Linear function 3 (readout)
    out = self.fc3(out)
    return out

**Instantiate Model Class**

1. Input dimension: 784
    Image size = 28 x 28 = 784
2. Output dimension: 10
3. Hidden dimension: 100 (can be any number)

In [0]:
# Step 4: Instantiate Model Class
input_dim = 28*28
hidden_dim = 100
output_dim = 10

model = TwoHiddenLayerModel(input_dim, hidden_dim, output_dim)

In [0]:
# Step 5: Instantiate Loss Class
# Softmax included in CEL
criterion = nn.CrossEntropyLoss()

In [0]:
# Step 6: Instantiate Optimizer Class
# Goal to update model's parameter at every iteration (after 100 images)
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [9]:
# Access the parameters
print(model.parameters())

print(len(list(model.parameters())))

# FC 1 parameters
print(list(model.parameters())[0].size())

# FC 1 Bias Parameters
print(list(model.parameters())[1].size())

# FC 2 Parameters
print (list(model.parameters())[2].size())

# FC 2 Bias Parameters
print (list(model.parameters())[3].size())

# FC 3 Parameters
print (list(model.parameters())[4].size())

# FC 3 Bias Parameters
print (list(model.parameters())[5].size())


<generator object Module.parameters at 0x7f618ab650f8>
6
torch.Size([100, 784])
torch.Size([100])
torch.Size([100, 784])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])


**Training Model Steps**
1. Convert inputs/labels to variables
2. Clear gradient buffers
3. Get output given inputs
4. Get loss
5. Get gradients w.r.t. parameters
6. Update parameters using gradients
7. REPEAT

In [10]:
# Training model
iter = 0
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # Load images as Variable
    images = Variable(images.view(-1, 28*28))
    labels = Variable(labels)
    
    # Clear gradients
    optimizer.zero_grad()
    
    # Forward pass to get output/logits
    outputs = model(images)
    
    # Calculate Loss: softmax --> CEL
    loss = criterion(outputs, labels)
    
    # Getting gradients
    loss.backward()
    
    # Updating parameters
    optimizer.step()
    
    iter+=1
    
    if iter % 500 == 0:
      # Calculate Accuracy
      correct = 0
      total = 0
      # Iterate through test dataset
      for images, labels in test_loader:
        # Load images to a Torch Variable
        images = Variable(images.view(-1, 28*28))
        
        # Forward pass only to get logits/output
        outputs = model(images)
        
        # Get prediction from the maximum value
        _, predicted = torch.max(outputs.data, 1)
        
        # Total number of labels
        total += labels.size(0)
        
        # Total correct predictions
        correct += (predicted == labels).sum()
        
      accuracy = 100* correct/ total
      
      # Print Loss
      print('Iteration {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0],accuracy))



Iteration 500. Loss: 0.3749723732471466. Accuracy: 91
Iteration 1000. Loss: 0.28421130776405334. Accuracy: 93
Iteration 1500. Loss: 0.2760450839996338. Accuracy: 93
Iteration 2000. Loss: 0.1538916975259781. Accuracy: 94
Iteration 2500. Loss: 0.14108964800834656. Accuracy: 95
Iteration 3000. Loss: 0.1378956437110901. Accuracy: 95
Iteration 3500. Loss: 0.113124780356884. Accuracy: 95
Iteration 4000. Loss: 0.16928768157958984. Accuracy: 96
Iteration 4500. Loss: 0.11813310533761978. Accuracy: 96
Iteration 5000. Loss: 0.1161489263176918. Accuracy: 96
Iteration 5500. Loss: 0.2259005755186081. Accuracy: 96
Iteration 6000. Loss: 0.07550930976867676. Accuracy: 96
