In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
#importing necessary packages
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

In [0]:
#importing transforms and datasets from torchvision
from torchvision.transforms import transforms
import torchvision.datasets as datasets

In [0]:
#Using the MNIST dataset present in torchvision
#obtain the training data with this syntax, check dataloaders
train_data = datasets.MNIST(root='./data', 
                            train='True',
                            transform=transforms.ToTensor(),
                            download=True)

In [0]:
#obtain the testing data with this syntax, check dataloaders
test_data = datasets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

In [0]:
#checking out the size of the training data
print(train_data.train_data.size())

In [0]:
#checking out the size of the testing data
print(test_data.test_data.size())

In [0]:
#making dataset iterable
batch_size = 200
iterations = 4000
epochs = 10

In [0]:
#working with the dataloader
#creating the training data loader
train_data_loader = torch.utils.data.DataLoader(dataset=train_data,
                                                batch_size=batch_size,
                                                shuffle=True)

In [0]:
#creating the testing data loader
test_data_loader = torch.utils.data.DataLoader(dataset=test_data,
                                               batch_size=batch_size,
                                               shuffle=False)

In [0]:
#create CNN model class
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    
    #Convolution layer 1 using 2d convolution (hidden layer)
    #considered image size = 28*28
    #feature detecters = 16
    #filters = 5; each of size 5*5 (kernel_size)
    #stride=1
    #padding=2
    self.cnn_conv1 = nn.Conv2d(in_channels=1,
                               out_channels=16,
                               kernel_size=5,
                               stride=1,
                               padding=2)
    #activation fuction
    #will keep either x or 0, discards negative values
    #majorly used as it yeilds faster training for larger neural networks
    self.relu1 = nn.ReLU()
    
    #maxpooling layer 1; 2*2 pooling feature
    self.maxpool1 = nn.MaxPool2d(kernel_size=2)
    
    #Convolution layer 2 using 2d convolution (hidden layer)
    #after pooling on 16 feature detecters, we have 32 feature detectors
    #filter = 5; each of size 5*5 (kernel_size)
    #stride=1
    #padding=2
    self.cnn_conv2 = nn.Conv2d(in_channels=16,
                               out_channels=32,
                               kernel_size=5,
                               stride=1,
                               padding=2)
    #activation fuction
    #will keep either x or 0, discards negative values
    self.relu2 = nn.ReLU()
    
    #maxpooling layer 2; 2*2 pooling feature
    self.maxpool2 = nn.MaxPool2d(kernel_size=2)
    
    #for flattened images
    #recall linear regression
    #32 feature_map from conv layer 2
    #then image size reduces due to pooling
    #therefore, 32 pooled feature map each of 7*7
    #find numbers between 0 to 9, 10 outputs
    self.fc1 = nn.Linear(32*7*7, 10)

In [0]:
#forward pass
def forward(self, x):
  #Convolution1
  out = self.cnn_conv1(x)
  out = self.relu1(out)
  
  #Maxpool1
  out = self.maxpool1(out)
  
  #c1  
  out = self.cnn_conv2(x)
  out = self.relu2(out)
  
  #Maxpool2
  out = self.maxpool2(out)
  
  #Output to flattened images
  out = out.view(out.size(0), -1)
    
  #Linear Function
  out = self.fc1(out)
  
  return out

In [0]:
#optimizing the model
model = CNN()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.5)

In [0]:
#Training the model
iterations = 0
for epochs in range(epochs):
  for i, (images,labels) in enumerate(train_data_loader):
    #loading training data which has images and variables
    images = Variable(images)
    labels = Variable(labels)
    
    #clearning the  gradients
    optimizer.zero_grad()
    
    #passing the input into a model
    output = model(images)
    
    #finding loss using ouput above
    loss = criterion(output, labels)
    
    
    #backward pass
    loss.backward()
    optimizser.step()
    iterations = iterations + 1
    if (i+1) % 100 == 0:
      print('Epoch [%d/%d], Iterations [%d/%d] Loss: %.6f'
            %(epoch+1, num_epochs, i+1, len(train_data)//batch_size, 
              loss.data[0]))

In [0]:
#Testing the model
model.eval()
correct=0
total=0
for images, labels in test_data_loader:
    #converting it into tensor
    images = Variable(images)
    #passing it to model
    outputs = model(images)
     #predictions
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Test Accuracy of the model on the 10000 test images: %d %%' 
      % (100 * correct / total))