<a href="https://colab.research.google.com/github/arunm917/CS6910_Assignment_3/blob/main/CS6910_Assignment_3_PartB_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading necessary packages and files

In [1]:
import csv
import gdown
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import random
from torch.optim.lr_scheduler import StepLR

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# downloading file from gdrive
output = 'tam_train'
file_id = '1pdJVD8P71fpqGRnvFfOp_6TbVft9NlnH' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdJVD8P71fpqGRnvFfOp_6TbVft9NlnH
To: /content/tam_train
100%|██████████| 2.69M/2.69M [00:00<00:00, 123MB/s]

DONE.





In [4]:
# downloading file from gdrive
output = 'tam_valid'
file_id = '1pdp6ojHltRRNLXsmoQbGRc2Qn8X1EUJV' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdp6ojHltRRNLXsmoQbGRc2Qn8X1EUJV
To: /content/tam_valid
100%|██████████| 164k/164k [00:00<00:00, 79.2MB/s]

DONE.





In [5]:
# downloading file from gdrive
output = 'tam_test'
file_id = '1pdaTq-g2ZKhRKv6fRrSbEsJkOH5gdrEQ' # Google drive ID
#Download the file
gdown.download('https://drive.google.com/uc?id=' + file_id, output, quiet=False)
print('DONE.')

Downloading...
From: https://drive.google.com/uc?id=1pdaTq-g2ZKhRKv6fRrSbEsJkOH5gdrEQ
To: /content/tam_test
100%|██████████| 157k/157k [00:00<00:00, 78.2MB/s]

DONE.





#Preprocessing

In [6]:
train_data_df = pd.read_csv('tam_train')
valid_data_df = pd.read_csv('tam_valid')
test_data_df = pd.read_csv('tam_test')

In [7]:
train_data_df.columns = ['English','Tamil']
valid_data_df.columns = ['English','Tamil']
test_data_df.columns = ['English','Tamil']

# Creating vocabulary and padding

In [8]:
# Checkign unique chars
############################################## Train data #########################################################
char_list_eng_train = []
for i in range(len(train_data_df['English'])):
  char = [*train_data_df.loc[i, 'English']]
  char_list_eng_train.extend(char)

char_list_tam_train = []
for i in range(len(train_data_df['Tamil'])):
  char = [*train_data_df.loc[i, 'Tamil']]
  char_list_tam_train.extend(char)

############################################## Validation data #########################################################
char_list_eng_val = []
for i in range(len(valid_data_df['English'])):
  char = [*valid_data_df.loc[i, 'English']]
  char_list_eng_val.extend(char)

char_list_tam_val = []
for i in range(len(valid_data_df['Tamil'])):
  char = [*valid_data_df.loc[i, 'Tamil']]
  char_list_tam_val.extend(char)

############################################## Test data #########################################################
char_list_eng_test = []
for i in range(len(test_data_df['English'])):
  char = [*test_data_df.loc[i, 'English']]
  char_list_eng_test.extend(char)

char_list_tam_test = []
for i in range(len(test_data_df['Tamil'])):
  char = [*test_data_df.loc[i, 'Tamil']]
  char_list_tam_test.extend(char)

In [9]:
unique_tam_char_train = list(set(char_list_tam_train))
unique_tam_char_val = list(set(char_list_tam_val))
unique_tam_char_test = list(set(char_list_tam_test))

In [10]:
i = 0; j=0
for char in unique_tam_char_train:
  # print(char)
  if char in unique_tam_char_val:
    j += 1
  else:
    i = i + 1
print('No. of char in test but not in train:', i)

No. of char in test but not in train: 0


In [11]:
i = 0
for char in unique_tam_char_test:
  if char in unique_tam_char_train:
    i = 0
  else:
    i += 1
print('No. of char in test but not in train:', i)

No. of char in test but not in train: 0


In [12]:
# Indexing
SOS_token = '<SOS>'
EOS_token = '<EOS>'
PAD_token = '<PAD>'
UNK_token = '<UNK>'

vocabulary_eng = list(set(char_list_eng_train))
vocabulary_eng = [PAD_token] + [UNK_token] + [SOS_token] + [EOS_token] + vocabulary_eng 

vocabulary_tam = list(set(char_list_tam_train))
vocabulary_tam = [PAD_token] + [UNK_token] + [SOS_token] + [EOS_token] + vocabulary_tam

In [13]:
char_index_eng = {value: index for index, value in enumerate(vocabulary_eng)}
char_index_tam = {value: index for index, value in enumerate(vocabulary_tam)}

In [14]:
idx2char_eng = {value: key for key, value in char_index_eng.items()}
idx2char_tam = {value: key for key, value in char_index_tam.items()}

In [15]:
# Define the tokenizer

def tokenize_eng(word):
    chars = [*word]
    tokens_eng = [char_index_eng[char] if char in char_index_eng else 0 for char in chars]
    
    return tokens_eng

def tokenize_tam(word):
    chars = [*word]
    tokens_tam = [char_index_tam[char] if char in char_index_tam else 0 for char in chars]
    
    return tokens_tam

In [16]:
# Define the training pairs
training_pairs = train_data_df.values.tolist()
val_pairs = valid_data_df.values.tolist()
test_pairs = test_data_df.values.tolist()

In [17]:
eng_words = [tokenize_eng(pair[0]) for pair in training_pairs]
tam_words = [tokenize_tam(pair[1]) for pair in training_pairs]

In [18]:
# Determining max length english

lengths_eng = []

for word in eng_words:

    word_length = len(word)
    lengths_eng.append(word_length)

In [19]:
# Determining max length tamil
max_length_tam = max([len(words) for words in tam_words])

# Determining max length english and tamil
max_length = max([len(words) for words in eng_words + tam_words])

In [20]:
def padding(word_pairs):
  ''' Function to pad the input and target sequences. Padding is done to ensure that
      all the training, validation and test samples are of equal size.'''
  
  eng_words = [tokenize_eng(pair[0]) for pair in word_pairs]
  tam_words = [tokenize_tam(pair[1]) for pair in word_pairs]

  
  padded_input_sequences = [torch.tensor([char_index_eng['<SOS>']] + eng_words + [char_index_eng['<EOS>']] + [(char_index_eng['<PAD>'])]*(max_length - len(eng_words))) for eng_words in eng_words]
  padded_target_sequences = [torch.tensor([char_index_eng['<SOS>']] + tam_words + [char_index_tam['<EOS>']] + [(char_index_tam['<PAD>'])]*(max_length - len(tam_words))) for tam_words in tam_words]
  tensor = torch.tensor([char_index_eng['<PAD>']]*(max_length+2))
  padded_input_sequences.append(tensor)
  padded_target_sequences.append(tensor)
  padded_input_sequences = torch.stack(padded_input_sequences)
  padded_target_sequences = torch.stack(padded_target_sequences)
  
  return(padded_input_sequences,padded_target_sequences)

In [21]:
# Creating datasets
training_input_sequences, training_target_sequences = padding(training_pairs)
train_dataset = torch.utils.data.TensorDataset(training_input_sequences, training_target_sequences)

val_input_sequences, val_target_sequences = padding(val_pairs)
val_dataset = torch.utils.data.TensorDataset(val_input_sequences, val_target_sequences)

test_input_sequences, test_target_sequences = padding(test_pairs)
test_dataset = torch.utils.data.TensorDataset(test_input_sequences, test_target_sequences)

# Architecture

In [22]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, bidirectionality, cell_type_encoder):
        super(Encoder, self).__init__()
        ''' The encoder encodes the input characters and converts it into a hidden representation'''
        self.bidirectionality = bidirectionality

        if self.bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1

        self.cell_type_encoder = cell_type_encoder
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)

        if self.cell_type_encoder == 'RNN':
          self.rnn = nn.RNN(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)
        if self.cell_type_encoder == 'LSTM':
          self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)
        if self.cell_type_encoder == 'GRU':
          self.rnn = nn.GRU(embedding_size, hidden_size, num_layers,bidirectional = bidirectional, dropout=p)

        self.fc_hidden = nn.Linear(hidden_size*self.directions, hidden_size)
        self.fc_cell = nn.Linear(hidden_size*self.directions, hidden_size)
        self.dropout = nn.Dropout(p)

    def forward(self, x):

        embedding = self.dropout(self.embedding(x))

        if self.cell_type_encoder == 'LSTM':
          encoder_outputs, (hidden, cell) = self.rnn(embedding)
        else:
          encoder_outputs, hidden = self.rnn(embedding)

############### Applying Bidirectionality ###########################

        if self.cell_type_encoder == 'LSTM':
          if self.bidirectionality == 'YES':
            row = 1
            hidden_list = []
            cell_list = []
            for i in range(hidden.shape[0]//2):
              hidden_concatenated = self.fc_hidden(torch.cat((hidden[row-1:row], hidden[row:row+1]), dim=2))
              cell_concatenated = self.fc_cell(torch.cat((cell[row-1:row], cell[row:row+1]), dim=2))
              hidden_list.append(hidden_concatenated)
              cell_list.append(cell_concatenated)
              row += 2

            hidden_tensor = torch.stack(hidden_list)
            cell_tensor = torch.stack(cell_list)
            hidden_squeezed = hidden_tensor.squeeze()
            cell_squeezed = cell_tensor.squeeze()
          else:
            hidden_squeezed = hidden
            cell_squeezed = cell
        else:
          if self.bidirectionality == 'YES':
            row = 1
            hidden_list = []
            for i in range(hidden.shape[0]//2):
              hidden_concatenated = self.fc_hidden(torch.cat((hidden[row-1:row], hidden[row:row+1]), dim=2))
              hidden_list.append(hidden_concatenated)
              row += 2

            hidden_tensor = torch.stack(hidden_list)
            hidden_squeezed = hidden_tensor.squeeze()
          else:
            hidden_squeezed = hidden

        if self.cell_type_encoder == 'LSTM':
          return encoder_outputs, hidden_squeezed, cell_squeezed
        else:
          return encoder_outputs, hidden_squeezed


In [23]:
class Decoder(nn.Module):
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p, cell_type_decoder, bidirectionality):
        super(Decoder, self).__init__()
        ''' The decoder decodes the input from the encoder and produces an output at each timestep '''
        self.bidirectionality = bidirectionality
        if self.bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type_decoder = cell_type_decoder

        self.embedding = nn.Embedding(input_size, embedding_size)
        if self.cell_type_decoder == 'RNN':
          self.rnn = nn.RNN((hidden_size*self.directions + embedding_size), hidden_size, num_layers, dropout=p)
        if self.cell_type_decoder == 'LSTM':
          self.rnn = nn.LSTM((hidden_size*self.directions + embedding_size), hidden_size, num_layers, dropout=p)
        if self.cell_type_decoder == 'GRU':
          self.rnn = nn.GRU((hidden_size*self.directions + embedding_size), hidden_size, num_layers, dropout=p)
        
        self.energy = nn.Linear(hidden_size*(self.directions + 1), 1)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(p)
        self.softmax = nn.Softmax(dim = 0)
        self.relu = nn.ReLU()

    def forward(self, x, encoder_outputs, hidden, cell = None):
        x = x.unsqueeze(0)

        embedding = self.dropout(self.embedding(x))

        input_length = encoder_outputs.shape[0] # Decoder input is encoder output

        encoder_outputs_reshaped = encoder_outputs.repeat(num_layers,1,1)
        hidden_reshaped = hidden.repeat(input_length, 1, 1 ) # Reshaping decoder hidden so that it can be concatenated

        energy = self.relu(self.energy(torch.cat((hidden_reshaped, encoder_outputs_reshaped), dim = 2)))
        self.attention_scores = self.softmax(energy)
        ## Using einsum to get the respective element wise products
        context_vector = torch.einsum("snk,snl->knl", self.attention_scores, encoder_outputs_reshaped)
        rnn_input = torch.cat((context_vector, embedding), dim = 2)

        if self.cell_type_decoder == 'LSTM':
          outputs, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        else:
          outputs, hidden = self.rnn(rnn_input, hidden)

        predictions = self.fc(outputs)
        predictions = predictions.squeeze(0)

        if self.cell_type_decoder == 'LSTM':
          return predictions, hidden, cell, self.attention_scores
        else:
          return predictions, hidden, self.attention_scores

In [24]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, cell_type_encoder, cell_type_decoder, bidirectionality):
        super(Seq2Seq, self).__init__()
        ''' This is the model. This function recieves the input and sends it to the encoder and receives the output 
            and the hidden states from the encoder and sends it to the decoder for decoding'''
        self.encoder = encoder
        self.decoder = decoder
        self.cell_type_encoder = cell_type_encoder
        self.cell_type_decoder = cell_type_decoder
        if bidirectionality == 'YES':
          bidirectional = True
          self.directions = 2
        else:
          bidirectional = False
          self.directions = 1

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(vocabulary_tam)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        if self.cell_type_encoder != 'LSTM':
          cell = torch.zeros(num_layers, batch_size, hidden_size).to(device)

        if self.cell_type_encoder == 'LSTM':
          encoder_outputs, hidden, cell = self.encoder(source)
        else:
          encoder_outputs, hidden = self.encoder(source)

        # first input to the Decoder will be <SOS> token
        x = target[0]
        predicted_sequences = torch.zeros([32, batch_size]).to(device)

        for t in range(1, target_len):
            if self.cell_type_decoder == 'LSTM':
              output, hidden, cell, attention_scores_batch = self.decoder(x, encoder_outputs, hidden, cell)
            else:
              output, hidden, attention_scores_batch = self.decoder(x, encoder_outputs, hidden)

            # Store next output prediction
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            predicted_token = output.argmax(1)
            x = target[t] if random.random() < teacher_force_ratio else predicted_token

            predicted_sequences[t] = predicted_token.squeeze()
        
        predicted_sequences_copy = predicted_sequences[1:].t()
        target_copy = target[1:].t()
        correct_predictions_batch = correct_sequences_count(predicted_sequences_copy, target_copy)
        return outputs, correct_predictions_batch, attention_scores_batch

In [43]:
''' This function is used to aggregate the predicted vectors and check if the predictions match the
    target words. The function returns the number of correct predictions per batch'''
def correct_sequences_count(predicted_sequences, target_sequences):
  
    correct_predictions_batch = 0
    words = []
    for i in range(batch_size):
        # print('predicted sequence:', predicted_sequences[i].shape)
        # print('target sequence:', target_tensor[i].shape)
        target_word_list = []
        target_word_length = 0
        predicted_word_length = 0
        flag_target = 1
        flag_predicted = 1

        for element in target_sequences[i]:
            idx = element.item()
            target_char =  idx2char_tam[idx]
            target_word_list.append(target_char)
            if flag_target == 1:
              target_word_length += 1
              if idx == char_index_tam['<EOS>']:
                flag_target = 0
                break

        target_word_length = target_word_length - 1
        target_word = ''.join(target_word_list[:-1])
    
        predicted_word_list = []
        for element in predicted_sequences[i]:
            idx = element.item()
            predicted_char =  idx2char_tam[idx]
            predicted_word_list.append(predicted_char)
            if flag_predicted == 1:
              predicted_word_length += 1
              if idx == char_index_tam['<EOS>']:
                flag_predicted = 0
                break
        
        predicted_word_length = predicted_word_length - 1
        predicted_word = ''.join(predicted_word_list[:-1])
        words.append([target_word, predicted_word])
        
        if target_word_length == predicted_word_length:
          if all(x == y for x, y in zip(target_word_list, predicted_word_list)):
              correct_predictions_batch += 1
              
    '''Use the below lines only when the predictions need to be written to a text file'''
    # with open('predictions_attention.txt', 'w') as file:
    #     # Write each predicted word to the file
    #     for word in words[:-1]:
    #         line = str(word) + '\n'
    #         file.write(line)
    ''' End of code for writing into text file '''
    return correct_predictions_batch

In [26]:
''' This function predicts the accuracy by getting the correct words from correct_sequences_count function'''
def accuracy(dataloader):
  model.eval()

  with torch.no_grad():
    total_loss = 0
    correct_predictions_total = 0
    correct_predictions_batch = 0

    for batch_idx, (input_seq, target_seq) in enumerate(dataloader):
        batch_idx += 1
        # Get input and targets and get to cuda
        inp_data = input_seq.t().to(device)
        target = target_seq.t().to(device)

        # Forward prop
        output, correct_predictions_batch, attention_scores_batch = model(inp_data, target)
        # Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
        # doesn't take input in that form. For example if we have MNIST we want to have
        # output to be: (N, 10) and targets just (N). Here we can view it in a similar
        # way that we have output_words * batch_size that we want to send in into
        # our cost function, so we need to do some reshapin. While we're at it
        # Let's also remove the start token while we're at it
        
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        total_loss += loss.item()
        correct_predictions_total += correct_predictions_batch
    eval_loss = total_loss/batch_idx
    accuracy = (correct_predictions_total/((batch_idx*batch_size) - 1))*100
    model.train()
  return eval_loss, accuracy, attention_scores_batch

# Hyperparameters

In [44]:
# Hyperparameters
num_epochs = 16
learning_rate = 0.001
batch_size = 256

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
input_size_encoder = len(vocabulary_eng)
input_size_decoder = len(vocabulary_tam)
output_size = len(vocabulary_tam)
encoder_embedding_size = 200
decoder_embedding_size = 200
hidden_size = 1024
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5
bidirectionality = 'NO'
cell_type_encoder = 'GRU'
cell_type_decoder = 'LSTM'

In [45]:
encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout, bidirectionality, cell_type_encoder).to(device)
decoder_net = Decoder(input_size_decoder,decoder_embedding_size, hidden_size, output_size, num_layers, dec_dropout, cell_type_decoder, bidirectionality).to(device)

In [46]:
model = Seq2Seq(encoder_net, decoder_net, cell_type_encoder, cell_type_decoder, bidirectionality).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
pad_idx = char_index_eng['<PAD>']
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
scheduler = StepLR(optimizer, step_size = 5, gamma = 0.5)

# Training

In [30]:
# Creating Dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers = 2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, num_workers = 2)

In [31]:
# Training loop
for epoch in tqdm(range(num_epochs)):

    total_loss = 0
    correct_predictions_epoch = 0
    correct_predictions_batch = 0

    for batch_idx, (input_seq, target_seq) in enumerate(train_loader):
        batch_idx += 1
        # Get input and targets and get to cuda
        inp_data = input_seq.t().to(device)
        target = target_seq.t().to(device)

        # Forward prop
        output, correct_predictions_batch, attention_scores_batch = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        total_loss += loss.item()

        # Back prop
        loss.backward()

        # Clip to avoid exploding gradient issues
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        # Gradient descent step
        optimizer.step()
        correct_predictions_epoch += correct_predictions_batch
    scheduler.step()
    loss_epoch = total_loss/batch_idx
    train_accuracy = (correct_predictions_epoch/((batch_idx*batch_size) - 1))*100
    val_loss, val_accuracy, attention_scores_batch = accuracy(val_loader)
    print('\nEpoc loss: %.4f' % loss_epoch, '\nCorrect predictions during epoch:',correct_predictions_epoch,
          '\nTraining accuracy: %.2f'% train_accuracy)
    print('\nValidation loss: %.4f'% val_loss, '\nValidation accuracy: %.2f'% val_accuracy)


  6%|▋         | 1/16 [02:22<35:31, 142.07s/it]


Epoc loss: 2.3099 
Correct predictions during epoch: 7 
Training accuracy: 0.01

Validation loss: 1.4719 
Validation accuracy: 0.76


 12%|█▎        | 2/16 [04:47<33:37, 144.09s/it]


Epoc loss: 0.9104 
Correct predictions during epoch: 4978 
Training accuracy: 9.72

Validation loss: 0.6109 
Validation accuracy: 37.34


 19%|█▉        | 3/16 [07:12<31:18, 144.52s/it]


Epoc loss: 0.3888 
Correct predictions during epoch: 20846 
Training accuracy: 40.72

Validation loss: 0.4980 
Validation accuracy: 53.63


 25%|██▌       | 4/16 [09:37<28:57, 144.81s/it]


Epoc loss: 0.2583 
Correct predictions during epoch: 28137 
Training accuracy: 54.96

Validation loss: 0.4647 
Validation accuracy: 56.63


 31%|███▏      | 5/16 [12:03<26:35, 145.07s/it]


Epoc loss: 0.2045 
Correct predictions during epoch: 31547 
Training accuracy: 61.62

Validation loss: 0.4620 
Validation accuracy: 58.24


 38%|███▊      | 6/16 [14:28<24:11, 145.17s/it]


Epoc loss: 0.1421 
Correct predictions during epoch: 36101 
Training accuracy: 70.51

Validation loss: 0.4726 
Validation accuracy: 60.44


 44%|████▍     | 7/16 [16:54<21:47, 145.24s/it]


Epoc loss: 0.1176 
Correct predictions during epoch: 37975 
Training accuracy: 74.17

Validation loss: 0.4502 
Validation accuracy: 61.00


 50%|█████     | 8/16 [19:19<19:22, 145.26s/it]


Epoc loss: 0.1026 
Correct predictions during epoch: 39053 
Training accuracy: 76.28

Validation loss: 0.4853 
Validation accuracy: 61.15


 56%|█████▋    | 9/16 [21:44<16:56, 145.25s/it]


Epoc loss: 0.0937 
Correct predictions during epoch: 40076 
Training accuracy: 78.27

Validation loss: 0.4835 
Validation accuracy: 61.61


 62%|██████▎   | 10/16 [24:09<14:31, 145.23s/it]


Epoc loss: 0.0869 
Correct predictions during epoch: 40648 
Training accuracy: 79.39

Validation loss: 0.4944 
Validation accuracy: 62.20


 69%|██████▉   | 11/16 [26:35<12:06, 145.23s/it]


Epoc loss: 0.0660 
Correct predictions during epoch: 42788 
Training accuracy: 83.57

Validation loss: 0.5147 
Validation accuracy: 62.86


 75%|███████▌  | 12/16 [29:00<09:40, 145.19s/it]


Epoc loss: 0.0566 
Correct predictions during epoch: 43705 
Training accuracy: 85.36

Validation loss: 0.5484 
Validation accuracy: 63.15


 81%|████████▏ | 13/16 [31:25<07:15, 145.20s/it]


Epoc loss: 0.0518 
Correct predictions during epoch: 44327 
Training accuracy: 86.58

Validation loss: 0.5706 
Validation accuracy: 63.42


 88%|████████▊ | 14/16 [33:50<04:50, 145.24s/it]


Epoc loss: 0.0472 
Correct predictions during epoch: 44843 
Training accuracy: 87.59

Validation loss: 0.5275 
Validation accuracy: 63.27


 94%|█████████▍| 15/16 [36:16<02:25, 145.28s/it]


Epoc loss: 0.0438 
Correct predictions during epoch: 45379 
Training accuracy: 88.63

Validation loss: 0.5293 
Validation accuracy: 62.98


100%|██████████| 16/16 [38:41<00:00, 145.09s/it]


Epoc loss: 0.0366 
Correct predictions during epoch: 46312 
Training accuracy: 90.45

Validation loss: 0.5660 
Validation accuracy: 63.47





# Saving model and loading model parameters

In [32]:
torch.save(model.state_dict(), './final_model_attention.pt')

In [47]:
model = Seq2Seq(encoder_net, decoder_net, cell_type_encoder, cell_type_decoder, bidirectionality).to(device)
model.load_state_dict(torch.load('./final_model_attention.pt'))

<All keys matched successfully>

In [48]:
batch_size = len(test_dataset)

In [49]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, num_workers = 2)

In [50]:
test_loss, test_accuracy, attention_scores = accuracy(test_loader)
print('Test loss: %.4f'% test_loss, '\nTest accuracy: %.2f'% test_accuracy)

Test loss: 0.7663 
Test accuracy: 52.50


In [37]:
print(attention_scores.shape)

torch.Size([64, 4096, 1])
