In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
import numpy as np
import math
from torch.nn import functional as F

In [0]:
'''
STEP 1 - LOADING DATASET

'''

train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download= True)
test_dataset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download= True)

In [13]:
'''
STEP 2 - MAKING DATASET ITERABLE

'''

batch_size= 100
n_iters= 6000
num_epochs = n_iters/(len(train_dataset)/batch_size)
num_epochs= int(num_epochs)

cuda = torch.cuda.is_available()
print(cuda)

train_loader= torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size, shuffle=True)
test_loader= torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size, shuffle= False)

True


LSTM class for nn.lstm implementation

In [0]:
class LSTM(nn.Module):
  def __init__(self, input_size, hidden_size, bias=True):
    super(LSTM, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.bias = bias
    if cuda:
      self.weight_ih = nn.Parameter(torch.Tensor(4 * hidden_size, input_size)).cuda()
      self.weight_hh = nn.Parameter(torch.Tensor(4 * hidden_size, hidden_size)).cuda()
    else:
      self.weight_ih = nn.Parameter(torch.Tensor(4 * hidden_size, input_size))
      self.weight_hh = nn.Parameter(torch.Tensor(4 * hidden_size, hidden_size))
    if bias:
      if cuda:
        self.bias_ih = nn.Parameter(torch.Tensor(4 * hidden_size)).cuda()
        self.bias_hh = nn.Parameter(torch.Tensor(4 * hidden_size)).cuda()
      else:
        self.bias_ih = nn.Parameter(torch.Tensor(4 * hidden_size))
        self.bias_hh = nn.Parameter(torch.Tensor(4 * hidden_size))
    else:
      self.register_parameter('bias_ih', None)
      self.register_parameter('bias_hh', None)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in self.parameters():
      weight.data.uniform_(-stdv, stdv)
    
  def forward(self,input, hidden):
    if input.cuda:
        igates = F.linear(input, self.weight_ih)
        hgates = F.linear(hidden[0], self.weight_hh)

    hx, cx = hidden
    gates_1 = F.linear(input, self.weight_ih, self.bias_ih) 
    gates_2= F.linear(hx, self.weight_hh, self.bias_hh)
    print(gates_1.shape, gates_2.shape)
    #gates_1 = torch.reshape(gates_1,(28,100,400))
    gates = gates_1+gates_2

    ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

    ingate = torch.sigmoid(ingate)
    forgetgate = torch.sigmoid(forgetgate)
    cellgate = torch.tanh(cellgate)
    outgate = torch.sigmoid(outgate)

    cy = (forgetgate * cx) + (ingate * cellgate)
    hy = outgate * torch.tanh(cy)

    return out,(hy, cy)
  
  
  

GRU class for nn.Gru implemenation

In [0]:
class GRU(nn.Module):
  def __init__(self, input_size, hidden_size, bias=True):
    super(GRU, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.bias = bias
    if cuda:
      self.weight_ih = nn.Parameter(torch.Tensor(3 * hidden_size, input_size)).cuda()
      self.weight_hh = nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)).cuda()
    else:
      self.weight_ih = nn.Parameter(torch.Tensor(3 * hidden_size, input_size))
      self.weight_hh = nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size))
    if bias:
      if cuda:
        self.bias_ih = nn.Parameter(torch.Tensor(3 * hidden_size)).cuda()
        self.bias_hh = nn.Parameter(torch.Tensor(3 * hidden_size)).cuda()
      else:
        self.bias_ih = nn.Parameter(torch.Tensor(3 * hidden_size))
        self.bias_hh = nn.Parameter(torch.Tensor(3 * hidden_size))
    else:
      self.register_parameter('bias_ih', None)
      self.register_parameter('bias_hh', None)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in self.parameters():
      weight.data.uniform_(-stdv, stdv)
    
  def forward(self,input, hidden):
    if input.is_cuda:
        gi = F.linear(input, weight_ih)
        gh = F.linear(hidden, weight_hh)

    gi = F.linear(input, weight_ih, bias_ih)
    gh = F.linear(hidden, weight_hh, bias_hh)
    i_r, i_i, i_n = gi.chunk(3, 1)
    h_r, h_i, h_n = gh.chunk(3, 1)

    resetgate = F.sigmoid(i_r + h_r)
    inputgate = F.sigmoid(i_i + h_i)
    newgate = F.tanh(i_n + resetgate * h_n)
    hy = newgate + inputgate * (hidden - newgate)

    return hy
  

In [0]:
'''
STEP 3 - CREATE MODEL CLASS

'''

class LSTMModel(nn.Module):
  def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
    super(LSTMModel, self).__init__()
    #hidden dimensions/neurons
    self.hidden_dim=hidden_dim
    
    #no. of hidden layers
    self.layer_dim = layer_dim
    
    #output dimensions/neurons
    self.output_dim=output_dim
    
    # Building your LSTM
    # batch_first=True causes input/output tensors to be of shape
    # (batch_dim, seq_dim, feature_dim)
    #self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
    self.lstm = LSTM(input_dim, hidden_dim, bias=True)
    #Readout layer
    self.fc = nn.Linear(hidden_dim, output_dim)
    
    
  def forward(self,x):
    # Initialize hidden state with zeros
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        #print(x.shape,"x.shape")100, 28, 28
        if torch.cuda.is_available():
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        else:
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
         
        # Initialize cell state
        if torch.cuda.is_available():
            c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        else:
            c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        
        # One time step
        out, (hn, cn) = self.lstm.forward(x, (h0,c0))#or None!
        
        #[b_ig | b_fg | b_gg | b_og]
        #print(self.lstm._all_weights) [['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'], ['weight_ih_l1', 'weight_hh_l1', 'bias_ih_l1', 'bias_hh_l1'], ['weight_ih_l2', 'weight_hh_l2', 'bias_ih_l2', 'bias_hh_l2']]
        for names in self.lstm._all_weights:
            for name in filter(lambda n: "bias" in n,  names):
                bias = getattr(self.lstm, name)
                n = bias.size(0)
                start, end = n//4, n//2
                bias.data[start:end].fill_(1.)

        # Index hidden state of last time step
        # out.size() --> 100, 28, 100
        # out[:, -1, :] --> 100, 100 --> just want last time step hidden states! 
        out = self.fc(out[:, -1, :]) 
        # out.size() --> 100, 10
        return out

In [71]:
'''
STEP 4 - INSTANTIATE MODEL CLASS

'''

input_dim = 28
hidden_dim = 100
output_dim = 10
layer_dim = 3

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

#######################
#  USE GPU FOR MODEL  #
#######################

#if cuda:
 # model.cuda()

'''
STEP 5 - INSTANTIATE LOSS CLASS

'''
criterion = nn.CrossEntropyLoss()

'''
STEP 6 -INSTANTIATE OPTIMIZER CLASS

'''
learning_rate= 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

'''
STEP 7 - TRAIN THE MODEL

'''
#no. f steps to unroll
seq_dim = 28

iter = 0
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # Load images as Variable
    #######################
    #  USE GPU FOR MODEL  #
    #######################
    
    if cuda:
      images=Variable(images.view(-1, seq_dim, input_dim).cuda())
      labels=Variable(labels.cuda())    
    else:
      images=Variable(images.view(-1, seq_dim, input_dim))
      labels=Variable(labels)
      
    # clear gradients wrt parameters
    optimizer.zero_grad()
    
    # Forward pass to get outputs/logits
    # outputs.size() --> 100, 10
    outputs=model(images)
    
    # Calculate Loss: softmax --> cross entropy loss
    loss=criterion(outputs,labels)
    
    # Getting gradients w.r.t. parameters
    loss.backward()
    
    # Updating parameters
    optimizer.step()
    
    iter+=1
    if iter%500==0:
      #Calculate Accuracy
      correct = 0
      total = 0
      #Iterate through test dataset
      for images,labels in test_loader:
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        if cuda:
          images=Variable(images.view(-1, seq_dim, input_dim).cuda())
        else:
          images=Variable(images.view(-1, seq_dim, input_dim))
        
        # Forward pass only to get logits/output
        outputs = model(images)
        
        # Get predictions from the maximum value
        _,predicted = torch.max(outputs.data,1)
        
        # Total no. of labels
        total += labels.size(0)
        
        # Total correct predictions
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        
        if cuda:
          correct += (predicted.cpu()==labels.cpu()).sum()
        else:
          correct += (predicted.cpu()==labels.cpu())
       
      accuracy = 100 * correct / total
        
      # Print Loss
      print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
        
          
      
    

  

torch.Size([100, 28, 400]) torch.Size([3, 100, 400])


RuntimeError: ignored