If you have any problem contact me at:

1) goldshakil (kakao)

2) omarshakil100@gmail.com (e-mail)

###Seq2Seq modeling using LSTM
###(1000 Sequence to 2000 Sequence)

### Importing Header Files:

In [None]:
#Importing header files
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence, pack_sequence, pack_padded_sequence, pad_packed_sequence
import numpy as np
import unicodedata

### Preprocess the data here:

In [1]:
# add the code for preprocessing data here
# the data should be represented as : (x,y) -> where x is input and y target
# x: current address
# y: next 

# load the data as follows:
### train_loader: for training set
### valid_loader: for validation set
### test_loader: for training set

### LSTM Model Class:

In [None]:
batch_size = 8 # each batch  is 1000 Integers
num_layers = 4 # fine tuning needed
input_size = 1000 #The input sequence is 1000 Integers
hidden_size = 128      #the output size of lstm (our choice)
output_size=2000 #The output sequence is 2000 Integers

class Model(nn.Module):
  def __init__(self):
      super(Model, self).__init__() 

      # A look up table for embeddings 
      self.embedding=nn.Embedding(input_size,hidden_size)


      # LSTM Layer: It outputs the hidden state (here the layer is more like a cell)
      # Remember:
        # input_size: is a single number represented by the hot vector size
        # hidden_size: is our choice it is just an intermediate LSTM/RNN output size
      # The input should be of shape (seq_len, batch_size, input_size)
      # output1: outputs of shape (seq_len, batch, num_directions * hidden_size)
      # output2: h_n of shape (num_layers * num_directions, batch, hidden_size)
      # output3: c_n of shape (num_layers * num_directions, batch, hidden_size)

      self.lstm = nn.LSTM(hidden_size, hidden_size)
        

      # Fully connected layer: takes the hidden state and outputs a vector of size "output_size"
      # here the output is categories
      self.fc = nn.Linear(hidden_size, output_size)

  def forward(self,x,lengths):

    # input -> embed -> pack -> LSTM -> unpack -> FC

    batches=x.size(0) #input: Batches* Sequence

    x=x.permute(1,0)  #input: Batches* Sequence -> output: Sequence* Batches

    embedded=self.embedding(x) #input : sequence*batches -> output:  sequence*batches*embedding size

    packed_input = pack_padded_sequence(embedded, lengths)

    hidden1=self.initHidden(batches) #hidden shape: num_layers * num_directions, batch, hidden_size)
    hidden2=self.initHidden(batches)

    out, (h_n, c_n) = self.lstm(packed_input, (hidden1,hidden2)) # out shape: (seq_len, batch, num_directions * hidden_size)

    #lstm_out, _ = pad_packed_sequence(packed_output)  -> no need for unpacking we can use the hidden state simpler
    #print(out.data.shape)
    h_n=h_n.permute(1,0,2)
    out = self.fc(h_n.view(batches,-1)) #easier than using out
    out=  nn.functional.log_softmax(out,dim=1)

    return out

  # a function for initializing the h_n, c_n
  def initHidden(self,batches):
    return torch.zeros(num_layers,batches, hidden_size)

### Driver Code:

In [None]:
# Select GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print("This model is running on" , torch.cuda.get_device_name())

#Model
net=Model().to(device)

#Get adjustable parameters(weights) and optimize them 
optimizer=optim.Adam(net.parameters(),lr=0.001,weight_decay=0.0001) #weight decay is multiplied to weight to prevent them from growing too large

#Error Function
criterion = nn.CrossEntropyLoss() 

# Learning rate scheduler: adjusts learning rate as the epoch increases
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) #Decays the learning rate by multiplyin by gamma every step_size epochs

#How many times we pass our full data (the same data)
total_epoch=50 

### Training and Validation: 
Make sure your data is represented as described above

In [None]:
for cur_epoch in range(total_epoch):
  train_correct=0
  train_total=0
  train_loss=0 #loss per epoch

  valid_correct=0
  valid_total=0
  valid_loss=0 #loss per epoch
  
  net.train() #put the model in training mode
  for data in train_loader:

    #every data consits of (batch_size)
    X,y=data[0].to(device), data[1].to(device) #(X batch_size), label(X batch_size) -> #batch size comes first #note that the label here is a number which is index in labels list
    
    net.zero_grad()  
    output = net(X)  
    loss = criterion(output, y) #calculate the error/ loss for the that batch (data)

    loss.backward()  #computes dloss/dw for every parameter w  (loss for every parameter)
    optimizer.step() #update weights
    train_loss+=loss.item()

    #calculate how many right do you have in every training data until the end of all training datas
    #output is Batch_size*10 tensor
    for k, i in enumerate(output): # the output is batch_size* 10 tensor   # k is the index of the data # i the data itself
        if torch.argmax(i) == y[k]: # in every row find the highest prediction index and compare it to y[k]
                train_correct += 1
        train_total += 1

  exp_lr_scheduler.step() #learning rate adjustment
  
  net.eval() #put the model in evaluation mode
  #validate for each epoch
  with torch.no_grad(): # no gradient
    for data in valid_loader:
      X, y = data[0].to(device), data[1].to(device) # store the images in X and labels in y
      output = net(X) 
      loss = criterion(output, y)

      valid_loss += loss.item()

      for k, i in enumerate(output): # the output is batch_size* 10 ARRAY
          if torch.argmax(i) == y[k]: # in every row find the highest prediction and comprae its index
              valid_correct += 1
          valid_total += 1
  
  #if the model is better than the previous best store it
  if((valid_correct/valid_total)>best_valid_acc):
    best_valid_acc= (valid_correct/valid_total)
    torch.save(net.state_dict(), "./save_best.pth") #save early stopping point

  if((cur_epoch+1)%(total_epoch*0.1)==0):
    print(' Epoch {}/{}: Training Accuracy {} |  Training Loss {} || Validation Accuracy {} |  Validation Loss {}'.format(cur_epoch+1, total_epoch, train_correct/train_total,train_loss/len(train_loader),valid_correct/valid_total,valid_loss/len(valid_loader))) #accuray for each epoch
    print(' Best validation so far {}'.format(best_valid_acc))
    print('-------------------------------------------------------------------------------------------------------------------------------')

### Testing

In [None]:
#load the best validation accuracy model so far
load_model = Model().to(device)
load_model.load_state_dict(torch.load("./save_best.pth")) 

load_model.eval()

correct =0
total=0
with torch.no_grad(): # no gradient
  for data in test_loader:
      X, y = data[0].to(device), data[1].to(device) # store the Xs and labels
      output = load_model(X) 
      for k, i in enumerate(output): # 
          if torch.argmax(i) == y[k]: # in every row find the highest prediction and comprae its index
              correct += 1
          total += 1

print("Test Accuracy: ", correct/total)