<a href="https://colab.research.google.com/github/arunm917/CS6910_Assignment_3/blob/main/CS6910_Assignment_3_PartA_V5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading necessary packages and files

In [None]:
import csv
import gdown
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import random
from torch.optim.lr_scheduler import StepLR

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# downloading file from gdrive
output = 'tam_train'
file_id = '1pdJVD8P71fpqGRnvFfOp_6TbVft9NlnH' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdJVD8P71fpqGRnvFfOp_6TbVft9NlnH
To: /content/tam_train
100%|██████████| 2.69M/2.69M [00:00<00:00, 91.5MB/s]

DONE.





In [None]:
# downloading file from gdrive
output = 'tam_valid'
file_id = '1pdp6ojHltRRNLXsmoQbGRc2Qn8X1EUJV' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdp6ojHltRRNLXsmoQbGRc2Qn8X1EUJV
To: /content/tam_valid
100%|██████████| 164k/164k [00:00<00:00, 13.2MB/s]

DONE.





In [None]:
# downloading file from gdrive
output = 'tam_test'
file_id = '1pdaTq-g2ZKhRKv6fRrSbEsJkOH5gdrEQ' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdaTq-g2ZKhRKv6fRrSbEsJkOH5gdrEQ
To: /content/tam_test
100%|██████████| 157k/157k [00:00<00:00, 128MB/s]

DONE.





#Preprocessing

In [None]:
train_data_df = pd.read_csv('tam_train')
valid_data_df = pd.read_csv('tam_valid')
test_data_df = pd.read_csv('tam_test')

In [None]:
train_data_df.columns = ['English','Tamil']
# valid_data_df.columns = ['English','Tamil']
# test_data_df.columns = ['English','Tamil']

# Creating vocabulary and padding

In [None]:
# Creating vocabulary

char_list_eng = []
for i in range(len(train_data_df['English'])):
  char = [*train_data_df.loc[i, 'English']]
  char_list_eng.extend(char)

char_list_tam = []
for i in range(len(train_data_df['Tamil'])):
  char = [*train_data_df.loc[i, 'Tamil']]
  char_list_tam.extend(char)

In [None]:
# Indexing

SOS_token = '<SOS>'
EOS_token = '<EOS>'
PAD_token = '<PAD>'
UNK_token = '<UNK>'

vocabulary_eng = list(set(char_list_eng))
vocabulary_eng = [PAD_token] + [UNK_token] + [SOS_token] + [EOS_token] + vocabulary_eng 

vocabulary_tam = list(set(char_list_tam))
vocabulary_tam = [PAD_token] + [UNK_token] + [SOS_token] + [EOS_token] + vocabulary_tam

In [None]:
char_index_eng = {value: index for index, value in enumerate(vocabulary_eng)}
char_index_tam = {value: index for index, value in enumerate(vocabulary_tam)}

In [None]:
idx2char_eng = {value: key for key, value in char_index_eng.items()}
idx2char_tam = {value: key for key, value in char_index_tam.items()}
print(idx2char_eng)
print(idx2char_tam)

{0: '<PAD>', 1: '<UNK>', 2: '<SOS>', 3: '<EOS>', 4: 't', 5: 'c', 6: 'u', 7: 's', 8: 'f', 9: 'k', 10: 'x', 11: 'a', 12: 'b', 13: 'o', 14: 'i', 15: 'p', 16: 'l', 17: 'n', 18: 'd', 19: 'j', 20: 'r', 21: 'v', 22: 'e', 23: 'q', 24: 'm', 25: 'g', 26: 'y', 27: 'w', 28: 'z', 29: 'h'}
{0: '<PAD>', 1: '<UNK>', 2: '<SOS>', 3: '<EOS>', 4: 'க', 5: 'எ', 6: 'உ', 7: 'த', 8: 'ள', 9: 'ை', 10: 'ண', 11: 'ட', 12: 'ஒ', 13: 'ெ', 14: 'ே', 15: 'ஊ', 16: 'ந', 17: 'இ', 18: 'ஹ', 19: 'ஸ', 20: 'ர', 21: 'ஓ', 22: 'ற', 23: 'ஃ', 24: 'ா', 25: 'ு', 26: 'ஐ', 27: 'ல', 28: 'வ', 29: 'ஆ', 30: 'ீ', 31: 'ஜ', 32: '்', 33: 'ழ', 34: 'ோ', 35: 'ய', 36: 'ி', 37: 'ொ', 38: 'ச', 39: 'ௌ', 40: 'ப', 41: 'ம', 42: 'ஈ', 43: 'அ', 44: 'ஏ', 45: 'ன', 46: 'ஞ', 47: 'ங', 48: 'ூ', 49: 'ஷ'}


In [None]:
# Defining the tokenizer
def tokenize_eng(word):
    chars = [*word]
    tokens_eng = [char_index_eng[char] if char in char_index_eng else 0 for char in chars]
    
    return tokens_eng

def tokenize_tam(word):
    chars = [*word]
    tokens_tam = [char_index_tam[char] if char in char_index_tam else 0 for char in chars]
    
    return tokens_tam

In [None]:
# Define the training pairs
training_pairs = train_data_df.values.tolist()
val_pairs = valid_data_df.values.tolist()
test_pairs = test_data_df.values.tolist()

In [None]:
eng_words = [tokenize_eng(pair[0]) for pair in training_pairs]
tam_words = [tokenize_tam(pair[1]) for pair in training_pairs]

In [None]:
# Determining max length english

lengths_eng = []
# max_length_eng = max([len(words) for words in eng_words])
for word in eng_words:
    word_length = len(word)
    lengths_eng.append(word_length)

In [None]:
# Determining max length tamil
max_length_tam = max([len(words) for words in tam_words])

# Determining max length english and tamil
max_length = max([len(words) for words in eng_words + tam_words])


In [None]:
def padding(word_pairs):
  ''' Function to pad the input and target sequences. Padding is done to ensure that
      all the training, validation and test samples are of equal size.'''
  
  eng_words = [tokenize_eng(pair[0]) for pair in word_pairs]
  tam_words = [tokenize_tam(pair[1]) for pair in word_pairs]

  
  padded_input_sequences = [torch.tensor([char_index_eng['<SOS>']] + eng_words + [char_index_eng['<EOS>']] + [(char_index_eng['<PAD>'])]*(max_length - len(eng_words))) for eng_words in eng_words]
  padded_target_sequences = [torch.tensor([char_index_eng['<SOS>']] + tam_words + [char_index_tam['<EOS>']] + [(char_index_tam['<PAD>'])]*(max_length - len(tam_words))) for tam_words in tam_words]
  tensor = torch.tensor([char_index_eng['<PAD>']]*(max_length+2))
  padded_input_sequences.append(tensor)
  padded_target_sequences.append(tensor)
  padded_input_sequences = torch.stack(padded_input_sequences)
  padded_target_sequences = torch.stack(padded_target_sequences)
  
  return(padded_input_sequences,padded_target_sequences)


In [None]:
# Creating datasets
training_input_sequences, training_target_sequences = padding(training_pairs)
train_dataset = torch.utils.data.TensorDataset(training_input_sequences, training_target_sequences)

val_input_sequences, val_target_sequences = padding(val_pairs)
val_dataset = torch.utils.data.TensorDataset(val_input_sequences, val_target_sequences)

test_input_sequences, test_target_sequences = padding(test_pairs)
test_dataset = torch.utils.data.TensorDataset(test_input_sequences, test_target_sequences)

# Architecture

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, bidirectionality, cell_type_encoder):
        super(Encoder, self).__init__()
        ''' The encoder encodes the input characters and converts it into a hidden representation'''

        self.bidirectionality = bidirectionality # bidirectionality adds another layer to the RNN which reads the characters in the reverse direction

        if self.bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1

        self.cell_type_encoder = cell_type_encoder
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, embedding_size)

        # The lines below implement the different types of RNN units used
        if self.cell_type_encoder == 'RNN':
          self.rnn = nn.RNN(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)
        if self.cell_type_encoder == 'LSTM':
          self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)
        if self.cell_type_encoder == 'GRU':
          self.rnn = nn.GRU(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)

        self.fc_hidden = nn.Linear(hidden_size*self.directions, hidden_size)
        self.fc_cell = nn.Linear(hidden_size*self.directions, hidden_size)
        self.dropout = nn.Dropout(p)

    def forward(self, x):
        # x is the input sequence of shape: (seq_length, N) where N is batch size
        embedding = self.dropout(self.embedding(x))
        # embeddings created using nn.Embedding of shape: (seq_length, N, embedding_size)

        if self.cell_type_encoder == 'LSTM':
          outputs, (hidden, cell) = self.rnn(embedding)
        else:
          outputs, hidden = self.rnn(embedding)

        '''After recieving the hidden states from the bidriectional RNN, the number of layers get
           converted to num_layers*2. Hence the forward and backward directions have to be concatenated
           and resized to the correct dimensions. The fc_hidden and fc_cell layers are used for resizing
           the dimensions.
           The steps below perform the above mentioned operations for the case of LSTM, GRU and RNN.
           Bidirectionality can be switched off if not required'''

        if self.cell_type_encoder == 'LSTM':
          # This implementation is for LSTM
          if self.bidirectionality == 'YES':
            row = 1
            hidden_list = []
            cell_list = []
            ''' The lines below implement the contamination operation'''
            for i in range(hidden.shape[0]//2):
              hidden_concatenated = self.fc_hidden(torch.cat((hidden[row-1:row], hidden[row:row+1]), dim=2)) 
              cell_concatenated = self.fc_cell(torch.cat((cell[row-1:row], cell[row:row+1]), dim=2))
              hidden_list.append(hidden_concatenated)
              cell_list.append(cell_concatenated)
              row += 2

            hidden_tensor = torch.stack(hidden_list)
            cell_tensor = torch.stack(cell_list)
            hidden_squeezed = hidden_tensor.squeeze()
            cell_squeezed = cell_tensor.squeeze()
            # If bidirectionality is switched off there is not concatenation
          else:
            hidden_squeezed = hidden 
            cell_squeezed = cell
        else:
          # The following implementation is for RNN and GRU
          if self.bidirectionality == 'YES':
            row = 1
            hidden_list = []
            for i in range(hidden.shape[0]//2):
              # print('i',i)
              hidden_concatenated = self.fc_hidden(torch.cat((hidden[row-1:row], hidden[row:row+1]), dim=2))
              # print('hidden in for loop', hidden_bid.shape)
              hidden_list.append(hidden_concatenated)
              row += 2

            hidden_tensor = torch.stack(hidden_list)
            hidden_squeezed = hidden_tensor.squeeze()
          else:
            hidden_squeezed = hidden

        if self.cell_type_encoder == 'LSTM':
          return hidden_squeezed, cell_squeezed
        else:
          return hidden_squeezed

In [None]:
class Decoder(nn.Module):
  ''' The decoder decodes the input from the encoder and produces an output at each timestep '''
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p, cell_type_decoder):
        super(Decoder, self).__init__()

        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type_decoder = cell_type_decoder

        self.embedding = nn.Embedding(input_size, embedding_size)

        # The lines below implement the different types of RNN units used
        if self.cell_type_decoder == 'RNN':
          self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, dropout=p)
        if self.cell_type_decoder == 'LSTM':
          self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
        if self.cell_type_decoder == 'GRU':
          self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, dropout=p)
        
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell = None):
        
        x = x.unsqueeze(0)
        embedding = self.dropout(self.embedding(x))
        if self.cell_type_decoder == 'LSTM':
          outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
        else:
          outputs, hidden = self.rnn(embedding, hidden)
        predictions = self.fc(outputs)

        # Reshaping predictions to send it to loss function we want it to be (N, length_target_vocabulary)
        predictions = predictions.squeeze(0)
        if self.cell_type_decoder == 'LSTM':
          return predictions, hidden, cell
        else:
          return predictions, hidden

In [None]:
class Seq2Seq(nn.Module):
  ''' This is the model. This function recieves the input and sends it to the encoder and receives the output 
      and the hidden states from the encoder and sends it to the decoder for decoding'''
    def __init__(self, encoder, decoder, cell_type_encoder, cell_type_decoder, bidirectionality):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.cell_type_encoder = cell_type_encoder
        self.cell_type_decoder = cell_type_decoder
        if bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(vocabulary_tam)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        if self.cell_type_encoder != 'LSTM':
          cell = torch.zeros(num_layers, batch_size, hidden_size).to(device)

        if self.cell_type_encoder == 'LSTM':
          hidden, cell = self.encoder(source)
        else:
          hidden = self.encoder(source)

        # first input to the Decoder will be <SOS> token
        x = target[0]
        predicted_sequences = torch.zeros([32, batch_size]).to(device)

        for t in range(1, target_len):
            # Use previous hidden, cell as context from encoder at start
            # output, hidden = self.decoder(x, hidden)
            if self.cell_type_decoder == 'LSTM':
              output, hidden, cell = self.decoder(x, hidden, cell)
            else:
              output, hidden = self.decoder(x, hidden)

            # Store next output prediction
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            predicted_token = output.argmax(1)
            x = target[t] if random.random() < teacher_force_ratio else predicted_token # Teacher forcing is used to 
                                                                                        #ensure the model is not making too many mistakes during initial training
            predicted_sequences[t] = predicted_token.squeeze()

        predicted_sequences_copy = predicted_sequences[1:].t()
        target_copy = target[1:].t()
        correct_predictions_batch = correct_sequences_count(predicted_sequences_copy, target_copy)
        return outputs, correct_predictions_batch

In [None]:
def correct_sequences_count(predicted_sequences, target_sequences):
''' This function is used to aggregate the predicted vectors and check if the predictions match the
    target words. The function returns the number of correct predictions per batch'''
    correct_predictions_batch = 0
    words = []

    for i in range(batch_size):
        target_word_list = []
        target_word_length = 0
        predicted_word_length = 0
        flag_target = 1
        flag_predicted = 1

        for element in target_sequences[i]:
            idx = element.item()
            target_char =  idx2char_tam[idx]
            target_word_list.append(target_char)
            if flag_target == 1:
              target_word_length += 1
              if idx == char_index_tam['<EOS>']:
                flag_target = 0
                break

        target_word_length = target_word_length - 1
        target_word = ''.join(target_word_list[:-1])
    
        predicted_word_list = []
        for element in predicted_sequences[i]:
            idx = element.item()
            predicted_char =  idx2char_tam[idx]
            predicted_word_list.append(predicted_char)
            if flag_predicted == 1:
              predicted_word_length += 1
              if idx == char_index_tam['<EOS>']:
                flag_predicted = 0
                break
        
        predicted_word_length = predicted_word_length - 1
        predicted_word = ''.join(predicted_word_list[:-1])
        words.append([target_word, predicted_word])
        
        if target_word_length == predicted_word_length:
          if all(x == y for x, y in zip(target_word_list, predicted_word_list)):
              correct_predictions_batch += 1
    '''Use the below lines only when the predictions need to be written to a text file'''
    # with open('predictions_vanilla.txt', 'w') as file:
    #     # Write each predicted word to the file
    #     for word in words[:-1]:
    #         line = str(word) + '\n'
    #         file.write(line)
    ''' End of code for writing into text file '''
    return correct_predictions_batch

In [None]:
def accuracy(dataloader):
  ''' This function predicts the accuracy by getting the correct words from correct_sequences_count function'''
  model.eval()
  with torch.no_grad():
    total_loss = 0
    correct_predictions_total = 0
    correct_predictions_batch = 0

    for batch_idx, (input_seq, target_seq) in enumerate(dataloader):
        batch_idx += 1
        # Get input and targets and get to cuda
        inp_data = input_seq.t().to(device)
        target = target_seq.t().to(device)

        # Forward prop
        output, correct_predictions_batch = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        total_loss += loss.item()
        correct_predictions_total += correct_predictions_batch
    eval_loss = total_loss/batch_idx
    accuracy = (correct_predictions_total/((batch_idx*batch_size) - 1))*100
    model.train()
  return eval_loss, accuracy

# Hyperparameters

In [None]:
input_size_encoder = len(vocabulary_eng)
input_size_decoder = len(vocabulary_tam)
output_size = len(vocabulary_tam)

# Hyperparameters
num_epochs = 18
learning_rate = 0.001
batch_size = 256
embedding_size = 200
hidden_size = 1024
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5
bidirectionality = 'YES'
cell_type_encoder = 'LSTM'
cell_type_decoder = 'LSTM'

In [None]:
encoder_net = Encoder(input_size_encoder, embedding_size, hidden_size, num_layers, enc_dropout, bidirectionality, cell_type_encoder).to(device)
decoder_net = Decoder(input_size_decoder,embedding_size, hidden_size, output_size, num_layers, dec_dropout, cell_type_decoder).to(device)

In [None]:
model = Seq2Seq(encoder_net, decoder_net, cell_type_encoder, cell_type_decoder, bidirectionality).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
pad_idx = char_index_eng['<PAD>']
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
scheduler = StepLR(optimizer, step_size = 5, gamma = 0.5)

# Training

In [None]:
# Creating Dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers = 2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, num_workers = 2)

In [None]:
# Training loop
for epoch in tqdm(range(num_epochs)):

    total_loss = 0
    correct_predictions_epoch = 0
    correct_predictions_batch = 0

    for batch_idx, (input_seq, target_seq) in enumerate(train_loader):
        batch_idx += 1
        # Get input and targets and assign to cuda
        inp_data = input_seq.t().to(device)
        target = target_seq.t().to(device)

        # Forward prop
        output, correct_predictions_batch = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        total_loss += loss.item()

        # Back prop
        loss.backward()

        # Clip to avoid exploding gradient issues
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        # Gradient descent step
        optimizer.step()
        correct_predictions_epoch += correct_predictions_batch
    scheduler.step()
    loss_epoch = total_loss/batch_idx
    train_accuracy = (correct_predictions_epoch/((batch_idx*batch_size) - 1))*100
    val_loss, val_accuracy = accuracy(val_loader)
    print('\nEpoc loss: %.4f' % loss_epoch, '\nCorrect predictions per epoch:',correct_predictions_epoch,
          '\nTraining accuracy: %.2f'% train_accuracy)
    print('\nValidation loss: %.4f'% val_loss, '\nValidation accuracy: %.2f'% val_accuracy)


  6%|▌         | 1/18 [03:05<52:32, 185.46s/it]


Epoc loss: 1.6366 
Correct predictions per epoch: 2160 
Training accuracy: 4.22

Validation loss: 0.6730 
Validation accuracy: 33.58


 11%|█         | 2/18 [06:09<49:18, 184.91s/it]


Epoc loss: 0.4256 
Correct predictions per epoch: 21218 
Training accuracy: 41.44

Validation loss: 0.4701 
Validation accuracy: 52.94


 17%|█▋        | 3/18 [09:14<46:10, 184.67s/it]


Epoc loss: 0.2189 
Correct predictions per epoch: 31972 
Training accuracy: 62.45

Validation loss: 0.4461 
Validation accuracy: 57.58


 22%|██▏       | 4/18 [12:18<43:01, 184.40s/it]


Epoc loss: 0.1579 
Correct predictions per epoch: 35940 
Training accuracy: 70.20

Validation loss: 0.4490 
Validation accuracy: 58.78


 28%|██▊       | 5/18 [15:22<39:54, 184.19s/it]


Epoc loss: 0.1218 
Correct predictions per epoch: 38723 
Training accuracy: 75.63

Validation loss: 0.4450 
Validation accuracy: 60.12


 33%|███▎      | 6/18 [18:26<36:49, 184.13s/it]


Epoc loss: 0.0699 
Correct predictions per epoch: 43493 
Training accuracy: 84.95

Validation loss: 0.4783 
Validation accuracy: 61.90


 39%|███▉      | 7/18 [21:31<33:48, 184.39s/it]


Epoc loss: 0.0456 
Correct predictions per epoch: 45832 
Training accuracy: 89.52

Validation loss: 0.5009 
Validation accuracy: 61.98


 44%|████▍     | 8/18 [24:34<30:41, 184.16s/it]


Epoc loss: 0.0350 
Correct predictions per epoch: 47305 
Training accuracy: 92.39

Validation loss: 0.4926 
Validation accuracy: 62.30


 50%|█████     | 9/18 [27:38<27:37, 184.12s/it]


Epoc loss: 0.0276 
Correct predictions per epoch: 48317 
Training accuracy: 94.37

Validation loss: 0.5230 
Validation accuracy: 62.91


 56%|█████▌    | 10/18 [30:42<24:32, 184.02s/it]


Epoc loss: 0.0246 
Correct predictions per epoch: 48783 
Training accuracy: 95.28

Validation loss: 0.5313 
Validation accuracy: 62.32


 61%|██████    | 11/18 [33:46<21:28, 184.03s/it]


Epoc loss: 0.0146 
Correct predictions per epoch: 50055 
Training accuracy: 97.77

Validation loss: 0.5566 
Validation accuracy: 62.59


 67%|██████▋   | 12/18 [36:50<18:23, 183.98s/it]


Epoc loss: 0.0086 
Correct predictions per epoch: 50690 
Training accuracy: 99.01

Validation loss: 0.6299 
Validation accuracy: 62.98


 72%|███████▏  | 13/18 [39:54<15:20, 184.00s/it]


Epoc loss: 0.0066 
Correct predictions per epoch: 50856 
Training accuracy: 99.33

Validation loss: 0.5790 
Validation accuracy: 63.49


 78%|███████▊  | 14/18 [42:58<12:15, 183.95s/it]


Epoc loss: 0.0052 
Correct predictions per epoch: 50949 
Training accuracy: 99.51

Validation loss: 0.5790 
Validation accuracy: 63.17


 83%|████████▎ | 15/18 [46:02<09:11, 183.99s/it]


Epoc loss: 0.0047 
Correct predictions per epoch: 50986 
Training accuracy: 99.58

Validation loss: 0.6190 
Validation accuracy: 63.35


 89%|████████▉ | 16/18 [49:06<06:08, 184.07s/it]


Epoc loss: 0.0038 
Correct predictions per epoch: 51037 
Training accuracy: 99.68

Validation loss: 0.6501 
Validation accuracy: 63.52


 94%|█████████▍| 17/18 [52:10<03:03, 183.96s/it]


Epoc loss: 0.0030 
Correct predictions per epoch: 51105 
Training accuracy: 99.82

Validation loss: 0.6376 
Validation accuracy: 63.25


100%|██████████| 18/18 [55:14<00:00, 184.15s/it]


Epoc loss: 0.0026 
Correct predictions per epoch: 51122 
Training accuracy: 99.85

Validation loss: 0.6420 
Validation accuracy: 63.25





#Saving and loading model parameters

In [None]:
torch.save(model.state_dict(), './final_model_vanilla.pt')

In [None]:
model = Seq2Seq(encoder_net, decoder_net, cell_type_encoder, cell_type_decoder, bidirectionality).to(device)
model.load_state_dict(torch.load('./final_model_vanilla.pt'))

<All keys matched successfully>

In [None]:
batch_size = len(test_dataset)

In [None]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, num_workers = 2)

In [None]:
test_loss, test_accuracy = accuracy(test_loader)
print('Test loss: %.4f'% test_loss, '\nTest accuracy: %.2f'% test_accuracy)

Test loss: 1.1347 
Test accuracy: 53.38
