<a href="https://colab.research.google.com/github/arunm917/CS6910_Assignment_3/blob/main/CS6910_Assignment_3_PartA_V5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading necessary packages and files

In [1]:
import csv
import gdown
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import random
from torch.optim.lr_scheduler import StepLR

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# downloading file from gdrive
output = 'tam_train'
file_id = '1pdJVD8P71fpqGRnvFfOp_6TbVft9NlnH' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdJVD8P71fpqGRnvFfOp_6TbVft9NlnH
To: /content/tam_train
100%|██████████| 2.69M/2.69M [00:00<00:00, 118MB/s]

DONE.





In [4]:
# downloading file from gdrive
output = 'tam_valid'
file_id = '1pdp6ojHltRRNLXsmoQbGRc2Qn8X1EUJV' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdp6ojHltRRNLXsmoQbGRc2Qn8X1EUJV
To: /content/tam_valid
100%|██████████| 164k/164k [00:00<00:00, 81.2MB/s]

DONE.





In [5]:
# downloading file from gdrive
output = 'tam_test'
file_id = '1pdaTq-g2ZKhRKv6fRrSbEsJkOH5gdrEQ' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdaTq-g2ZKhRKv6fRrSbEsJkOH5gdrEQ
To: /content/tam_test
100%|██████████| 157k/157k [00:00<00:00, 42.2MB/s]

DONE.





#Preprocessing

In [6]:
train_data_df = pd.read_csv('tam_train')
valid_data_df = pd.read_csv('tam_valid')
test_data_df = pd.read_csv('tam_test')

In [7]:
train_data_df.columns = ['English','Tamil']
# valid_data_df.columns = ['English','Tamil']
# test_data_df.columns = ['English','Tamil']

# Creating vocabulary and padding

In [8]:
# Creating vocabulary

char_list_eng = []
for i in range(len(train_data_df['English'])):
  char = [*train_data_df.loc[i, 'English']]
  char_list_eng.extend(char)

char_list_tam = []
for i in range(len(train_data_df['Tamil'])):
  char = [*train_data_df.loc[i, 'Tamil']]
  char_list_tam.extend(char)

print(len(char_list_eng))
print(len(char_list_tam))

721198
621903


In [9]:
# Indexing

SOS_token = '<SOS>'
EOS_token = '<EOS>'
PAD_token = '<PAD>'
UNK_token = '<UNK>'

vocabulary_eng = list(set(char_list_eng))
vocabulary_eng = [PAD_token] + [UNK_token] + [SOS_token] + [EOS_token] + vocabulary_eng 

vocabulary_tam = list(set(char_list_tam))
vocabulary_tam = [PAD_token] + [UNK_token] + [SOS_token] + [EOS_token] + vocabulary_tam

In [10]:
print(len(vocabulary_eng))
print(vocabulary_eng)
print(len(vocabulary_tam))
print(vocabulary_tam)

30
['<PAD>', '<UNK>', '<SOS>', '<EOS>', 'n', 'm', 'e', 'r', 'y', 'u', 'c', 'a', 'x', 'z', 'v', 'f', 'w', 'l', 'd', 'p', 'o', 'g', 'k', 'b', 'h', 't', 'j', 's', 'i', 'q']
50
['<PAD>', '<UNK>', '<SOS>', '<EOS>', 'ு', 'ௌ', 'ள', 'ய', 'ே', 'ஐ', 'ண', 'உ', 'அ', 'ோ', 'ஸ', 'ஜ', 'ச', 'ஏ', 'ொ', 'ற', 'ழ', 'இ', '்', 'ம', 'ஃ', 'ப', 'ட', 'த', 'ஆ', 'ீ', 'ஷ', 'ங', 'ந', 'எ', 'ா', 'ெ', 'ஈ', 'ஹ', 'ல', 'ை', 'ஒ', 'ஓ', 'ஊ', 'ி', 'வ', 'ஞ', 'க', 'ூ', 'ன', 'ர']


In [11]:
char_index_eng = {value: index for index, value in enumerate(vocabulary_eng)}
char_index_tam = {value: index for index, value in enumerate(vocabulary_tam)}
# num_list = [char_index[char] for char in vocabulary]

print(char_index_eng)
print(char_index_tam)
# print(num_list)

{'<PAD>': 0, '<UNK>': 1, '<SOS>': 2, '<EOS>': 3, 'n': 4, 'm': 5, 'e': 6, 'r': 7, 'y': 8, 'u': 9, 'c': 10, 'a': 11, 'x': 12, 'z': 13, 'v': 14, 'f': 15, 'w': 16, 'l': 17, 'd': 18, 'p': 19, 'o': 20, 'g': 21, 'k': 22, 'b': 23, 'h': 24, 't': 25, 'j': 26, 's': 27, 'i': 28, 'q': 29}
{'<PAD>': 0, '<UNK>': 1, '<SOS>': 2, '<EOS>': 3, 'ு': 4, 'ௌ': 5, 'ள': 6, 'ய': 7, 'ே': 8, 'ஐ': 9, 'ண': 10, 'உ': 11, 'அ': 12, 'ோ': 13, 'ஸ': 14, 'ஜ': 15, 'ச': 16, 'ஏ': 17, 'ொ': 18, 'ற': 19, 'ழ': 20, 'இ': 21, '்': 22, 'ம': 23, 'ஃ': 24, 'ப': 25, 'ட': 26, 'த': 27, 'ஆ': 28, 'ீ': 29, 'ஷ': 30, 'ங': 31, 'ந': 32, 'எ': 33, 'ா': 34, 'ெ': 35, 'ஈ': 36, 'ஹ': 37, 'ல': 38, 'ை': 39, 'ஒ': 40, 'ஓ': 41, 'ஊ': 42, 'ி': 43, 'வ': 44, 'ஞ': 45, 'க': 46, 'ூ': 47, 'ன': 48, 'ர': 49}


In [12]:
idx2char_eng = {value: key for key, value in char_index_eng.items()}
idx2char_tam = {value: key for key, value in char_index_tam.items()}
print(idx2char_eng)
print(idx2char_tam)

{0: '<PAD>', 1: '<UNK>', 2: '<SOS>', 3: '<EOS>', 4: 'n', 5: 'm', 6: 'e', 7: 'r', 8: 'y', 9: 'u', 10: 'c', 11: 'a', 12: 'x', 13: 'z', 14: 'v', 15: 'f', 16: 'w', 17: 'l', 18: 'd', 19: 'p', 20: 'o', 21: 'g', 22: 'k', 23: 'b', 24: 'h', 25: 't', 26: 'j', 27: 's', 28: 'i', 29: 'q'}
{0: '<PAD>', 1: '<UNK>', 2: '<SOS>', 3: '<EOS>', 4: 'ு', 5: 'ௌ', 6: 'ள', 7: 'ய', 8: 'ே', 9: 'ஐ', 10: 'ண', 11: 'உ', 12: 'அ', 13: 'ோ', 14: 'ஸ', 15: 'ஜ', 16: 'ச', 17: 'ஏ', 18: 'ொ', 19: 'ற', 20: 'ழ', 21: 'இ', 22: '்', 23: 'ம', 24: 'ஃ', 25: 'ப', 26: 'ட', 27: 'த', 28: 'ஆ', 29: 'ீ', 30: 'ஷ', 31: 'ங', 32: 'ந', 33: 'எ', 34: 'ா', 35: 'ெ', 36: 'ஈ', 37: 'ஹ', 38: 'ல', 39: 'ை', 40: 'ஒ', 41: 'ஓ', 42: 'ஊ', 43: 'ி', 44: 'வ', 45: 'ஞ', 46: 'க', 47: 'ூ', 48: 'ன', 49: 'ர'}


In [13]:
# Define the tokenizer
# max_length = 10
def tokenize_eng(word):
    chars = [*word]
    tokens_eng = [char_index_eng[char] if char in char_index_eng else 0 for char in chars]
    
    return tokens_eng

def tokenize_tam(word):
    chars = [*word]
    tokens_tam = [char_index_tam[char] if char in char_index_tam else 0 for char in chars]
    
    return tokens_tam

In [14]:
# Define the training pairs
training_pairs = train_data_df.values.tolist()
val_pairs = valid_data_df.values.tolist()
test_pairs = test_data_df.values.tolist()

In [15]:
len(val_pairs)

4095

In [16]:
# testing the tokenize function
tokenize_eng('arun')

[11, 7, 9, 4]

In [17]:
eng_words = [tokenize_eng(pair[0]) for pair in training_pairs]
tam_words = [tokenize_tam(pair[1]) for pair in training_pairs]

In [18]:
# Determining max length english

lengths_eng = []
# max_length_eng = max([len(words) for words in eng_words])
for word in eng_words:

    word_length = len(word)
    lengths_eng.append(word_length)
print(lengths_eng)
# lengths_eng = np.array(lengths_eng)


[11, 9, 11, 15, 22, 23, 16, 13, 13, 8, 8, 11, 17, 13, 18, 19, 9, 18, 18, 14, 18, 11, 9, 12, 10, 16, 16, 12, 14, 17, 16, 12, 5, 11, 16, 18, 11, 12, 14, 14, 7, 17, 18, 16, 11, 16, 23, 11, 22, 9, 15, 17, 16, 10, 15, 11, 9, 6, 12, 15, 6, 23, 13, 14, 13, 22, 17, 16, 14, 13, 13, 10, 17, 7, 17, 13, 14, 12, 9, 18, 18, 13, 21, 21, 14, 14, 9, 13, 10, 7, 20, 10, 17, 12, 16, 23, 8, 19, 9, 17, 14, 12, 12, 14, 19, 5, 15, 11, 9, 12, 6, 18, 15, 17, 16, 20, 14, 11, 19, 11, 14, 14, 17, 20, 12, 9, 21, 8, 12, 18, 9, 15, 9, 13, 15, 11, 10, 15, 15, 13, 14, 19, 13, 23, 7, 15, 11, 20, 17, 11, 7, 7, 12, 21, 16, 13, 13, 16, 5, 8, 19, 21, 21, 8, 15, 17, 21, 12, 17, 15, 16, 22, 19, 9, 7, 13, 18, 11, 11, 10, 12, 13, 13, 10, 16, 13, 11, 14, 13, 14, 18, 16, 15, 14, 7, 9, 18, 15, 18, 14, 10, 5, 14, 15, 14, 20, 11, 16, 20, 8, 15, 9, 9, 13, 18, 11, 18, 7, 11, 7, 14, 12, 14, 12, 21, 15, 16, 10, 16, 9, 19, 13, 10, 13, 12, 13, 18, 20, 23, 10, 17, 15, 20, 18, 19, 21, 13, 16, 10, 16, 12, 19, 19, 11, 16, 10, 22, 14, 18, 15, 

In [19]:
# Determining max length tamil
max_length_tam = max([len(words) for words in tam_words])
print(max_length_tam)

# Determining max length english and tamil
max_length = max([len(words) for words in eng_words + tam_words])
print(max_length)

23
30


In [20]:
def padding(word_pairs):
  ''' Function to pad the input and target sequences. Padding is done to ensure that
      all the training, validation and test samples are of equal size.'''
  # padded_input_sequences = []
  # padded_target_sequences = []
  
  eng_words = [tokenize_eng(pair[0]) for pair in word_pairs]
  tam_words = [tokenize_tam(pair[1]) for pair in word_pairs]

  
  padded_input_sequences = [torch.tensor([char_index_eng['<SOS>']] + eng_words + [char_index_eng['<EOS>']] + [(char_index_eng['<PAD>'])]*(max_length - len(eng_words))) for eng_words in eng_words]
  padded_target_sequences = [torch.tensor([char_index_eng['<SOS>']] + tam_words + [char_index_tam['<EOS>']] + [(char_index_tam['<PAD>'])]*(max_length - len(tam_words))) for tam_words in tam_words]
  tensor = torch.tensor([char_index_eng['<PAD>']]*(max_length+2))
  padded_input_sequences.append(tensor)
  padded_target_sequences.append(tensor)
  padded_input_sequences = torch.stack(padded_input_sequences)
  padded_target_sequences = torch.stack(padded_target_sequences)
  
  return(padded_input_sequences,padded_target_sequences)


In [21]:
# Creating datasets
training_input_sequences, training_target_sequences = padding(training_pairs)
train_dataset = torch.utils.data.TensorDataset(training_input_sequences, training_target_sequences)

val_input_sequences, val_target_sequences = padding(val_pairs)
val_dataset = torch.utils.data.TensorDataset(val_input_sequences, val_target_sequences)

test_input_sequences, test_target_sequences = padding(test_pairs)
test_dataset = torch.utils.data.TensorDataset(test_input_sequences, test_target_sequences)

# Architecture

In [22]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, bidirectionality, cell_type_encoder):
        super(Encoder, self).__init__()
        # print('encoder initialized')
        self.bidirectionality = bidirectionality

        if self.bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1

        self.cell_type_encoder = cell_type_encoder
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)

        if self.cell_type_encoder == 'RNN':
          self.rnn = nn.RNN(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)
        if self.cell_type_encoder == 'LSTM':
          self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)
        if self.cell_type_encoder == 'GRU':
          self.rnn = nn.GRU(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)

        self.fc_hidden = nn.Linear(hidden_size*self.directions, hidden_size)
        self.fc_cell = nn.Linear(hidden_size*self.directions, hidden_size)
        self.dropout = nn.Dropout(p)

    def forward(self, x):
        # x shape: (seq_length, N) where N is batch size

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (seq_length, N, embedding_size)

        if self.cell_type_encoder == 'LSTM':
          outputs, (hidden, cell) = self.rnn(embedding)
          # print('cell:', cell.shape)
        else:
          outputs, hidden = self.rnn(embedding)
        # print('\nhidden in encoder after going through cell:', hidden.shape)
        # outputs shape: (seq_length, N, hidden_size)
        if self.cell_type_encoder == 'LSTM':
          if self.bidirectionality == 'YES':
            row = 1
            hidden_list = []
            cell_list = []
            for i in range(hidden.shape[0]//2):
              # print('i',i)
              hidden_concatenated = self.fc_hidden(torch.cat((hidden[row-1:row], hidden[row:row+1]), dim=2))
              # print('hidden in for loop', hidden_bid.shape)
              cell_concatenated = self.fc_cell(torch.cat((cell[row-1:row], cell[row:row+1]), dim=2))
              # print('cell in for loop', cell_bid.shape)
              hidden_list.append(hidden_concatenated)
              cell_list.append(cell_concatenated)
              row += 2

            hidden_tensor = torch.stack(hidden_list)
            cell_tensor = torch.stack(cell_list)
            hidden_squeezed = hidden_tensor.squeeze()
            cell_squeezed = cell_tensor.squeeze()
          else:
            hidden_squeezed = hidden
            cell_squeezed = cell
        else:
          if self.bidirectionality == 'YES':
            row = 1
            hidden_list = []
            for i in range(hidden.shape[0]//2):
              # print('i',i)
              hidden_concatenated = self.fc_hidden(torch.cat((hidden[row-1:row], hidden[row:row+1]), dim=2))
              # print('hidden in for loop', hidden_bid.shape)
              hidden_list.append(hidden_concatenated)
              row += 2

            hidden_tensor = torch.stack(hidden_list)
            hidden_squeezed = hidden_tensor.squeeze()
            # print('hidden_squeezed:', hidden_squeezed.shape)
          else:
            hidden_squeezed = hidden

        if self.cell_type_encoder == 'LSTM':
          return hidden_squeezed, cell_squeezed
        else:
          return hidden_squeezed

In [23]:
class Decoder(nn.Module):
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p, cell_type_decoder):
        super(Decoder, self).__init__()
        # print('decoder intialized')

        # if bidirectionality == 'YES':
        #   bidirectional = True
        #   self.directions = 2
        # else:
        #   bidirectional = False
        #   self.directions = 1

        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type_decoder = cell_type_decoder

        self.embedding = nn.Embedding(input_size, embedding_size)
        if self.cell_type_decoder == 'RNN':
          self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, dropout=p)
        if self.cell_type_decoder == 'LSTM':
          self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
        if self.cell_type_decoder == 'GRU':
          self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, dropout=p)
        
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell = None):
        # x shape: (N) where N is for batch size, we want it to be (1, N), seq_length
        # is 1 here because we are sending in a single word and not a sentence
        x = x.unsqueeze(0)

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (1, N, embedding_size)
        if self.cell_type_decoder == 'LSTM':
          outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
        else:
          outputs, hidden = self.rnn(embedding, hidden)
        
        # outputs shape: (1, N, hidden_size)

        predictions = self.fc(outputs)
        predictions = predictions.squeeze(0)

        if self.cell_type_decoder == 'LSTM':
          return predictions, hidden, cell
        else:
          return predictions, hidden

In [24]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, cell_type_encoder, cell_type_decoder, bidirectionality):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.cell_type_encoder = cell_type_encoder
        self.cell_type_decoder = cell_type_decoder
        if bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1

    def forward(self, source, target, teacher_force_ratio=0.5):
        # cell = None
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(vocabulary_tam)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        if self.cell_type_encoder != 'LSTM':
          cell = torch.zeros(num_layers, batch_size, hidden_size).to(device)

        if self.cell_type_encoder == 'LSTM':
          hidden, cell = self.encoder(source)
        else:
          hidden = self.encoder(source)
          # print('shape of hidden in forward in seq2seq after encoder:', hidden.shape)

        # first input to the Decoder will be <SOS> token
        x = target[0]
        predicted_sequences = torch.zeros([32, batch_size]).to(device)

        for t in range(1, target_len):
            # Use previous hidden, cell as context from encoder at start
            # output, hidden = self.decoder(x, hidden)
            if self.cell_type_decoder == 'LSTM':
              output, hidden, cell = self.decoder(x, hidden, cell)
            else:
              output, hidden = self.decoder(x, hidden)

            # Store next output prediction
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            predicted_token = output.argmax(1)
            # print('predicted token:', predicted_token.shape)
            x = target[t] if random.random() < teacher_force_ratio else predicted_token

            predicted_sequences[t] = predicted_token.squeeze()
        
        predicted_sequences_copy = predicted_sequences[1:].t()
    # print('predicted_sequence', predicted_sequences.shape)
    # print('target sensor shape', target_tensor.shape)
        target_copy = target[1:].t()
        correct_predictions_batch = correct_sequences_count(predicted_sequences_copy, target_copy)
        return outputs, correct_predictions_batch

In [25]:
def correct_sequences_count(predicted_sequences, target_sequences):
  
    correct_predictions_batch = 0
    for i in range(batch_size):
        # print('predicted sequence:', predicted_sequences[i].shape)
        # print('target sequence:', target_tensor[i].shape)
        target_word_list = []
        target_word_length = 0
        predicted_word_length = 0
        flag_target = 1
        flag_predicted = 1

        for element in target_sequences[i]:
            idx = element.item()
            target_char =  idx2char_tam[idx]
            target_word_list.append(target_char)
            if flag_target == 1:
              target_word_length += 1
              if idx == char_index_tam['<EOS>']:
                flag_target = 0
                break

        target_word_length = target_word_length - 1
        # print('target word:', target_word_list)
        # print('target word length:', target_word_length)
    
        predicted_word_list = []
        for element in predicted_sequences[i]:
            idx = element.item()
            predicted_char =  idx2char_tam[idx]
            predicted_word_list.append(predicted_char)
            if flag_predicted == 1:
              predicted_word_length += 1
              if idx == char_index_tam['<EOS>']:
                flag_predicted = 0
                break
        
        predicted_word_length = predicted_word_length - 1
        # print('predicted word:', predicted_word_list)
        # print('predicted word length:', predicted_word_length)
        
        if target_word_length == predicted_word_length:
          if all(x == y for x, y in zip(target_word_list, predicted_word_list)):
              correct_predictions_batch += 1
    # print('correct_predictions_batch:', correct_predictions_batch)
    return correct_predictions_batch

In [26]:
def accuracy(dataloader):
  model.eval()

  with torch.no_grad():
    total_loss = 0
    correct_predictions_total = 0
    correct_predictions_batch = 0

    for batch_idx, (input_seq, target_seq) in enumerate(dataloader):
        batch_idx += 1
        # Get input and targets and get to cuda
        inp_data = input_seq.t().to(device)
        target = target_seq.t().to(device)

        # Forward prop
        output, correct_predictions_batch = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        total_loss += loss.item()
        correct_predictions_total += correct_predictions_batch
    eval_loss = total_loss/batch_idx
    accuracy = (correct_predictions_total/((batch_idx*batch_size) - 1))*100
    model.train()
  return eval_loss, accuracy

# Hyperparameters

In [64]:
# Training hyperparameters
num_epochs = 2
learning_rate = 0.001
batch_size = 1024

# Model hyperparameters
# load_model = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
input_size_encoder = len(vocabulary_eng)
input_size_decoder = len(vocabulary_tam)
output_size = len(vocabulary_tam)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 512
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5
bidirectionality = 'YES'
cell_type_encoder = 'GRU'
cell_type_decoder = 'LSTM'

In [65]:
encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout, bidirectionality, cell_type_encoder).to(device)
decoder_net = Decoder(input_size_decoder,decoder_embedding_size, hidden_size, output_size, num_layers, dec_dropout, cell_type_decoder).to(device)

In [66]:
model = Seq2Seq(encoder_net, decoder_net, cell_type_encoder, cell_type_decoder, bidirectionality).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
pad_idx = char_index_eng['<PAD>']
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
scheduler = StepLR(optimizer, step_size = 6, gamma = 0.5)
# criterion = nn.CrossEntropyLoss()

# Training

In [62]:
# Creating Dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers = 2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, num_workers = 2)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=True, num_workers = 2)

In [67]:
for epoch in tqdm(range(num_epochs)):

    total_loss = 0
    correct_predictions_epoch = 0
    correct_predictions_batch = 0

    for batch_idx, (input_seq, target_seq) in enumerate(train_loader):
        batch_idx += 1
        # Get input and targets and get to cuda
        inp_data = input_seq.t().to(device)
        target = target_seq.t().to(device)

        # Forward prop
        output, correct_predictions_batch = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        total_loss += loss.item()

        # Back prop
        loss.backward()

        # Clip to avoid exploding gradient issues
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        # Gradient descent step
        optimizer.step()
        correct_predictions_epoch += correct_predictions_batch
    scheduler.step()
    # print('\nbatch_idx:', batch_idx)
    loss_epoch = total_loss/batch_idx
    train_accuracy = (correct_predictions_epoch/((batch_idx*batch_size) - 1))*100
    val_loss, val_accuracy = accuracy(val_loader)
    print('\nEpoc loss: %.4f' % loss_epoch, '\nCorrect predictions per epoch:',correct_predictions_epoch,
          '\nTraining accuracy: %.2f'% train_accuracy)
    print('\nValidation loss: %.4f'% val_loss, '\nValidation accuracy: %.2f'% val_accuracy)

test_loss, test_accuracy = accuracy(test_loader)
print('Test loss: %.4f'% test_loss, '\nTest accuracy: %.2f'% test_accuracy)


 50%|█████     | 1/2 [00:58<00:58, 58.89s/it]


Epoc loss: 2.8882 
Correct predictions per epoch: 0 
Training accuracy: 0.00

Validation loss: 2.6645 
Validation accuracy: 0.00


100%|██████████| 2/2 [01:57<00:00, 58.55s/it]


Epoc loss: 2.5040 
Correct predictions per epoch: 0 
Training accuracy: 0.00

Validation loss: 2.3881 
Validation accuracy: 0.00





Test loss: 2.4313 
Test accuracy: 0.00
