In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable

#Downloading Datasets

In [0]:
train_dataset=dsets.MNIST(root='.',train=True,transform=transforms.ToTensor(),download=True)
test_dataset=dsets.MNIST(root='.',train=False,transform=transforms.ToTensor(),download=True)

#Defining hyperparameters


In [0]:
batch_size=100
num_iters=3000
epochs=int(num_iters*batch_size/len(train_dataset))
input_dim=28*28
hidden_dim=100
output_dim=10
learning_rate=0.1


#Making datasets iterable 

In [0]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

test_loader=torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

#Creating model class

In [0]:
class FeedforwardNeuralNetwork(nn.Module):
  
  def __init__(self,input_dim,hidden_dim,output_dim):
    
    super(FeedforwardNeuralNetwork,self).__init__ ()
    
    #first linear layer
    self.linear1=nn.Linear(input_dim,hidden_dim)
    
    #non-linearity
    self.sigmoid=nn.Sigmoid()
    
    #second linear layer
    self.linear2=nn.Linear(hidden_dim,output_dim)
  
  def forward(self,x):
    
    out=self.linear1(x)
    
    out=self.sigmoid(out)
    
    out=self.linear2(out)
    
    return out

#Instantiating Model Class

In [0]:
model=FeedforwardNeuralNetwork(input_dim,hidden_dim,output_dim)

#selecting cross entropy loss function
criterion=nn.CrossEntropyLoss()

#selecting optimizer - Stochastic gradient descent 
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)


#Training the model

In [82]:
iters=0

for num_epochs in range(epochs):
  for i, (images,labels) in enumerate(train_loader):
    
    images=Variable(images.view(-1,784))  #to calculate gradients
    
    labels=Variable(labels)               #to calculate gradients
    
    outputs=model(images)                 #calculating output
    
    optimizer.zero_grad()                 #clearing gradient buffers
    
    loss=criterion(outputs,labels)        #calculating loss 
    
    loss.backward()                       #calculating gradients
    
    optimizer.step()                      #updating parameters
    
    iters+=1
    
    if iters%500==0:                      #to calculate accuracy
      
      correct=0
      
      total=0
      
      for images,labels in test_loader:
        images=Variable(images.view(-1,784))
        
        outputs=model(images)
        
        _, predicted= torch.max(outputs.data,1)
        
        correct += (predicted==labels).sum().item()
        
        total+=labels.size(0)
        
      print('Iteration:{}  Loss:{}   Accuracy:{}'.format(iters, loss , 100*correct/total))

Iteration:500  Loss:0.5005639791488647   Accuracy:86.43
Iteration:1000  Loss:0.5518031716346741   Accuracy:89.63
Iteration:1500  Loss:0.4898417592048645   Accuracy:90.57
Iteration:2000  Loss:0.20130138099193573   Accuracy:91.22
Iteration:2500  Loss:0.27877938747406006   Accuracy:91.75
Iteration:3000  Loss:0.21195927262306213   Accuracy:92.15
