# First: Load the dataset
### we will use MNIST 

In [2]:
!pip install torchvision

Collecting torchvision
  Using cached https://files.pythonhosted.org/packages/fb/01/03fd7e503c16b3dc262483e5555ad40974ab5da8b9879e164b56c1f4ef6f/torchvision-0.2.2.post3-py2.py3-none-any.whl
Installing collected packages: torchvision
Successfully installed torchvision-0.2.2.post3


In [9]:
import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as mnistdt
# from Model.Models import LSTMModel

In [3]:
# setup the trainning and test data 

traindt = mnistdt.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

testdt = mnistdt.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

In [5]:
print('Trainning dataset size', traindt.train_data.size())
print('Trainning dataset labels', traindt.train_labels.size())
print('Testing dataset size', testdt.test_data.size())
print('Testning dataset labels', testdt.test_labels.size())

Trainning dataset size torch.Size([60000, 28, 28])
Trainning dataset labels torch.Size([60000])
Testing dataset size torch.Size([10000, 28, 28])
Testning dataset labels torch.Size([10000])


# Now we use pytorch dataloader to iterate, set number of batch and apply transformation


In [19]:
batch_size = 100
n_iters = 3000

num_epochs = n_iters / (len(traindt) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=traindt, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=testdt, 
                                          batch_size=batch_size, 
                                          shuffle=False)

# Create a LSTM class 
#### or import if using python script

In [20]:
# create LSTM model class
class LSTMModel(nn.Module):
    # Initialize class
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        
        # Initialize cell state
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        
        # 28 time steps
        out, (hn, cn) = self.lstm(x, (h0,c0))

        out = self.fc(out[:, -1, :]) 
        # out.size() --> 100, 10
        return out

In [21]:
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

In [22]:
# define loss function and setup optimizer
criterion = nn.CrossEntropyLoss()

learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

In [23]:
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

torch.Size([400, 28])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([10, 100])
torch.Size([10])


# Start Training

In [24]:
# Number of steps to unroll
seq_dim = 28  

iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = images.view(-1, seq_dim, input_dim).requires_grad_()
        
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        outputs = model(images)
        
        # calculate Loss
        loss = criterion(outputs, labels)
        
        # apply backpropagate
        loss.backward()
        
        # update
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                
                images = images.view(-1, seq_dim, input_dim).requires_grad_()
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                
                #  correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 500. Loss: 2.2540340423583984. Accuracy: 22
Iteration: 1000. Loss: 0.9567198753356934. Accuracy: 69
Iteration: 1500. Loss: 0.4429047703742981. Accuracy: 87
Iteration: 2000. Loss: 0.3745232820510864. Accuracy: 90
Iteration: 2500. Loss: 0.24429967999458313. Accuracy: 95
Iteration: 3000. Loss: 0.18545639514923096. Accuracy: 95
