In [1]:
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# imports
import random
import torch
from tqdm import tqdm
import pandas as pd
import math
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.optim as optim
import torchvision
import numpy as np
import pathlib
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch import optim
import os
from torch import nn
import torchvision.datasets as datasets
from torch.utils.data import (
    DataLoader, random_split
)
from torchvision.datasets import ImageFolder

In [3]:

os.environ["PYTHONHASHSEED"] = str(1)
random.seed(1)
torch.cuda.manual_seed(1)
torch.cuda.manual_seed_all(1)
np.random.seed(1)
torch.manual_seed(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [4]:

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

'''
The class Vocabulary is employed to generate Word_Vocab from the training dataset.
'''
class Word_Vocab:
    """
    Parameters:
      trg_lang (string): The name of the target language.
      src_lang (string): The name of the source language.
      file_path (string): The path to the CSV file containing the training dataset.

    Raises:
      ValueError: If the specified file_path does not exist.


    """
    def __init__(self, file_path, src_lang, trg_lang):
        # Read the CSV file into a Pandas DataFrame.
        def get_translations():
          return pd.read_csv(file_path, header=None, names=[src_lang, trg_lang])
        self.translations = get_translations()
        # It will drop any rows with missing values
        self.translations.dropna()
        def enumeration_across_trg():
           return {char: i+3 for i, char in enumerate(sorted(list(set(''.join(self.translations[trg_lang].tolist())))))}
        self.src_lang = src_lang
        def enumeration_across_src():
            return {char: i+3 for i, char in enumerate(sorted(list(set(''.join(self.translations[src_lang].tolist())))))}
        self.trg_lang = trg_lang
        # Create a dictionary that maps each character in the source language to an integer index.
        self.trg_vocab = enumeration_across_trg()
        # Create a dictionary that maps each character in the target language to an integer index.
        self.src_vocab = enumeration_across_src()

        def set_0():
          return 0
        # Add special tokens to the vocabularies.
        self.trg_vocab['<'] = set_0()
        self.src_vocab['<'] = set_0()
        def set_1():
            return 1
        def set_2():
            return 2
        self.trg_vocab['<unk>'] = set_2()
        self.src_vocab['<pad>'] = set_1()
        self.trg_vocab['<pad>'] = set_1()

        self.src_vocab['<unk>'] = set_2()

        # Extract the unique characters in the source and target languages
        src_chars = sorted(set(''.join(self.translations[src_lang])))
        trg_chars = sorted(set(''.join(self.translations[trg_lang])))

        def get_char_to_idx1():
          return {char: idx+3 for idx, char in enumerate(trg_chars)}
        # Assign an index to each character in the source and target languages
        self.t_char_to_idx = get_char_to_idx1()
        self.t_char_to_idx['<unk>']=2
        self.t_idx_to_char = {idx: char for char, idx in self.t_char_to_idx.items()}
        def get_char_to_idx2():
            return {char: idx+3 for idx, char in enumerate(src_chars)}
        self.s_char_to_idx = get_char_to_idx2()
        self.s_char_to_idx['<unk>']=2

        self.s_idx_to_char = {idx: char for char, idx in self.s_char_to_idx.items()}


    def utitlity_3(x,y):
        if(x>y):
          return 1
        else:
          return 0
    def ret_all_vocab(self):
           return self.src_vocab,self.trg_vocab,self.t_char_to_idx,self.t_idx_to_char,self.s_char_to_idx,self.s_idx_to_char
    def get(self):
         # This function returns the source and target vocabularies, as well as the dictionaries that map characters to integer indexes and vice versa.
        return self.ret_all_vocab()



class TransliterationDataset(Dataset):
    """
   Function Parameters:
    - src_lang (string): Specifies the source language from which translation originates.
    - trg_lang (string): Specifies the target language into which translation is done.
    - trg_vocab (Word_Vocab): Refers to the vocabulary tailored for the target language.
    - file_path (string): Indicates the precise location of the CSV file containing the training data.
    - src_vocab (Word_Vocab): Refers to the vocabulary customized for the source language.
    Raises:
     - ValueError: Raised if the provided file_path does not exist.

    """
    def __init__(self, file_path, src_lang, trg_lang,src_vocab,trg_vocab,t_char_to_idx):
        self.src_lang = src_lang
        def set_reading_csv():
          return pd.read_csv(file_path, header=None, names=[src_lang, trg_lang])
        def set_max_scr_len():
          return max([len(word) for word in self.translations[src_lang].tolist()])+1
        self.translations = set_reading_csv()
        self.translations.dropna()
        def set_trg_len():
          return max([len(word) for word in self.translations[trg_lang].tolist()])+1
        self.t_char_to_idx = t_char_to_idx
        self.trg_lang = trg_lang
        self.trg_vocab = trg_vocab
        self.src_vocab = src_vocab
        self.max_src_len = set_max_scr_len()

        self.max_trg_len = set_trg_len()

    def __len__(self):
        return len(self.translations)

    def __getitem__(self, idx):
        def set_trans_trg():
            return self.translations.iloc[idx][self.trg_lang]

        src_word = self.translations.iloc[idx][self.src_lang]
        def trg_vocab():
          return [self.trg_vocab.get(char, self.src_vocab['<unk>']) for char in trg_word]
        trg_word = set_trans_trg()
        # Initialize the start-of-word token
        sow=0

        # Convert source and target words to lists of Word_Vocab indices
        src = [self.src_vocab.get(char, self.src_vocab['<unk>']) for char in src_word]
        trg = trg_vocab()
        # Insert the start-of-word token at the beginning
        trg.insert(0, sow)
        def ret_len_tar():
            return len(trg);

        src.insert(0, sow)
        def ret_src_len():
            return len(src)
        def trg_pad_set():
          return [self.trg_vocab['<pad>']] * (self.max_trg_len - trg_len)

        trg_len = ret_len_tar()
        src_len = ret_src_len()


        # Pad the source and target sequences with the <pad> token
        src_pad = [self.src_vocab['<pad>']] * (self.max_src_len - src_len)
        trg_pad = trg_pad_set()
        # Extend the source and target sequences with padding
        src.extend(src_pad)
        trg.extend(trg_pad)
        def ret_trg_len():
          return torch.LongTensor(trg)
        # Convert source and target sequences to tensors
        src = torch.LongTensor(src)
        trg = ret_trg_len()

        return src, trg, src_len, trg_len


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
def data_loading(bs):
    '''
    This function is designed to load data into batches, with the batch size being specified as an argument.
    '''
    # Define the paths for the train, validation, and test CSV files
    def get_test_data_path():
      return "/content/drive/MyDrive/aksharantar_sampled/hin/hin_test.csv"
    def get_val_data_path():
      return "/content/drive/MyDrive/aksharantar_sampled/hin/hin_valid.csv"
    def get_train_data_path():
      return "/content/drive/MyDrive/aksharantar_sampled/hin/hin_train.csv"
    test_path  = get_test_data_path()
    val_path  = get_val_data_path()
    train_path  = get_train_data_path()
    vocab = Word_Vocab(train_path, 'src', 'trg')
    def set_data_p():
        return True
    src_vocab,trg_vocab,t_char_to_idx,t_idx_to_char,s_char_to_idx,s_idx_to_char=vocab.get()
    test_loader = DataLoader(TransliterationDataset(test_path, 'src', 'trg',src_vocab,trg_vocab,t_char_to_idx), batch_size=bs, shuffle=False)
    val_loader =DataLoader(TransliterationDataset(val_path, 'src', 'trg',src_vocab,trg_vocab,t_char_to_idx), batch_size=bs, shuffle=False)
    train_loader = DataLoader(TransliterationDataset(train_path, 'src', 'trg',src_vocab,trg_vocab,t_char_to_idx), batch_size=bs, shuffle=True)
    set_data_p()
    return train_loader,test_loader,val_loader,t_idx_to_char,s_idx_to_char
train_loader,test_loader,val_loader,t_idx_to_char,s_idx_to_char=data_loading(32)

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_dim, embedded_size,hidden_dim, num_layers,bidirectional, cell_type,dp):
        def utility_u1(x):
            return x>0
        super(Encoder, self).__init__()
        self.bidirectional=bidirectional
        self.input_dim = input_dim
        def ret_linear_layer1():
            return nn.Linear(hidden_dim * 2, hidden_dim)
        self.hidden_dim = hidden_dim
        self.cell_type = cell_type
        def ret_dropout():
            return nn.Dropout(dp)
        self.embedded_size=embedded_size
        self.num_layers = num_layers
        self.dropout = ret_dropout()
        def check_bid():
            if self.bidirectional:
                return 2
            else:
                return 1
        self.fc_h = ret_linear_layer1()
        self.fc_c = ret_linear_layer1()
        self.dir=check_bid()

        self.embedding = nn.Embedding(input_dim,embedded_size)
        def get_gru():
            return nn.GRU(embedded_size, hidden_dim, num_layers,bidirectional=bidirectional)
        def get_lstm():
            return nn.LSTM(embedded_size, hidden_dim, num_layers,bidirectional=bidirectional)
        def get_rnn():
            return nn.RNN(embedded_size, hidden_dim, num_layers,bidirectional=bidirectional)
        if cell_type == 'gru':
              self.rnn = get_gru()
        elif cell_type == 'lstm':
              self.rnn = get_lstm()
        elif cell_type == 'rnn':
              self.rnn = get_rnn()
        else:
            raise ValueError("Invalid cell type. Choose 'rnn', 'lstm', or 'gru'.")

    def forward(self, src):
        def get_emdd():
            return self.dropout(self.embedding(src))
        def get_hidden(hidden):
            return self.fc_h(torch.cat((hidden[0:1], hidden[1:2]), dim=2))
        embedded = get_emdd()
        if self.bidirectional:
            if self.cell_type != 'lstm':
                output, hidden = self.rnn(embedded)
                hidden = get_hidden(hidden)
                return output,hidden



            else:
                output, (hidden, cell) = self.rnn(embedded)
                hidden = get_hidden(hidden)
                def get_cell():
                    return self.fc_c(torch.cat((cell[0:1], cell[1:2]), dim=2))
                cell = get_cell()
                return output, (hidden, cell)


        else:
            if self.cell_type != 'lstm':
                output, hidden = self.rnn(embedded)
                return output,hidden

            else:
                output, (hidden, cell) = self.rnn(embedded)
                return output, (hidden, cell)


class Decoder(nn.Module):
    def __init__(self, output_dim,embedded_size, hidden_dim, num_layers,bidirectional,cell_type,dp):
        def utility(x):
          return x>1
        super(Decoder, self).__init__()
        self.bidirectional=bidirectional
        self.output_dim = output_dim
        def get_hidden_dim():
            return hidden_dim
        self.num_layers = num_layers
        self.cell_type = cell_type
        def get_dropout():
            return nn.Dropout(dp)
        self.embedded_size=embedded_size
        self.hidden_dim = get_hidden_dim()
        def check_bid():
            if self.bidirectional:
                return 2
            else:
                return 1
        self.dropout = get_dropout()
        self.dir=check_bid()
        self.embedding = nn.Embedding(output_dim,embedded_size)

        def get_gru():
            return nn.GRU((hidden_dim*self.dir)+embedded_size, hidden_dim, num_layers)
        def get_lstm():
            return nn.LSTM((hidden_dim*self.dir)+embedded_size, hidden_dim, num_layers)
        def get_rnn():
            return nn.RNN((hidden_dim*self.dir)+embedded_size, hidden_dim, num_layers)
        if cell_type == 'gru':
            self.rnn = get_gru()
        elif cell_type == 'lstm':
            self.rnn = get_lstm()
        elif cell_type == 'rnn':
            self.rnn = get_rnn()
        else:
            raise ValueError("Invalid cell type. Choose 'rnn', 'lstm', or 'gru'.")

        def get_energy():
            return nn.Linear((hidden_dim *(self.dir+1) ), 1)
        self.dropout = nn.Dropout(dp)
        def get_outfunc():
            return nn.Softmax(dim=0)
        self.relu = nn.ReLU()
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.softmax = get_outfunc()
        self.energy = get_energy()

    def forward(self, input,encoder_states,hidden):
        def get_reshaped():
            return hidden[0].repeat(sequence_length,1,1)
        input = input.unsqueeze(0)
        embedded = self.dropout(self.embedding(input))
        sequence_length = encoder_states.shape[0]
        h_reshaped = get_reshaped()
        def get_energy():
            return self.relu(self.energy(torch.cat((h_reshaped,encoder_states), dim=2)))
        energy = get_energy()
        attention = self.softmax(energy)
        def get_context_v(attention,encoder_states):
            return torch.bmm(attention, encoder_states).permute(1,0,2)
        attention =attention.permute(1,2,0)

        encoder_states =encoder_states.permute(1,0,2)

        context_vector = get_context_v(attention,encoder_states)

        rnn_input = torch.cat((context_vector, embedded), dim=2)
        def get_fc_out(output):
            return self.fc_out(output)

        output, hidden = self.rnn(rnn_input, hidden)
        def squeezing(output):
            return output.squeeze(0)

        output = get_fc_out(output)
        output = squeezing(output)
        return output, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder,cell_type,bidirectional):
        def utility(x):
            return x>1
        super(Seq2Seq, self).__init__()
        self.bidirectional=bidirectional
        self.encoder = encoder
        def get_cell_t():
            return cell_type
        def set_decoder():
            return decoder
        self.cell_type = get_cell_t()
        self.decoder = set_decoder()

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        def get_trg_shape():
            return trg.shape[1]
        batch_size = get_trg_shape()
        def max_len_output(max_len, batch_size, trg_vocab_size):
            return torch.zeros(max_len, batch_size, trg_vocab_size).to(device)

        max_len = trg.shape[0]

        trg_vocab_size = self.decoder.output_dim

        outputs = max_len_output(max_len, batch_size, trg_vocab_size)

        encoder_states, encoder_hidden = self.encoder(src)

        decoder_input = trg[0]

        t=1
        while t<(max_len ):
            decoder_output, decoder_hidden = self.decoder(decoder_input,encoder_states,encoder_hidden)
            def decoder_ouput(max_pr):
                a=trg[t] if random.random()<teacher_forcing_ratio else max_pr
                return a
            outputs[t] = decoder_output
            max_pr=decoder_output.argmax(1)
            decoder_input = decoder_ouput(max_pr)
            t+=1
        return outputs


In [8]:
def string_indices(trg, t_idx_to_char):
    """
    This function processes batches of indices into strings with the assistance of the supplied index-to-character mapping.

    Parameters:
        t_idx_to_char (Dict): A dictionary associating indices with characters.
        trg (Tensor): Tensor data containing encoder words, structured as batch_size x sequence_length.

    """

    sq=trg.shape[1]
    bs=trg.shape[0]
    strings = []

    i=0
    while i<(bs):
        chars = []
        for j in range(sq):
            def get_char(t_idx_to_char,trg,i,j):
                return t_idx_to_char[trg[i,j].item()]
            if trg[i,j].item() in t_idx_to_char:
                char = get_char(t_idx_to_char,trg,i,j)
                chars.append(char)
        string = ''.join(chars)

        strings.append(string)
        i+=1
    return strings


In [9]:

def Word_Accuracy1(model,t_idx_to_char,data_loader, criterion):
    '''
    This function computes the word-level accuracy following each epoch of training.

    Parameters:
    model: The trained model instance.
    t_idx_to_char: A mapping from target indices to characters.
    data_loader: DataLoader object for the validation or test dataset.
    criterion: The loss criterion employed during model training.
    '''
    model.eval()
    def set_zero():
        return 0
    epoch_loss = set_zero()
    num_total = set_zero()
    num_correct = set_zero()
    with torch.no_grad():
        for batch_idx, (src, trg, src_len, trg_len) in enumerate(data_loader):
            # Convert target indices to string for comparison
            string_trg=string_indices(trg,t_idx_to_char)
            # Move tensors to the device
            def set_permute(var):
                return var.permute(1, 0)
            src = set_permute(src)
            src = src.to(device)
            def output_reshape(output):
                return output[1:].reshape(-1, output.shape[2])
            trg = set_permute(trg)
            trg = trg.to(device)
            # Perform forward pass through the model
            output = model(src, trg, 0)
            # turn off teacher forcing
            output = output_reshape(output)
            trg = trg[1:].reshape(-1) # exclude the start-of-sequence token

            # Calculate the loss
            output = output.to(device)
            def get_bs(trg_len):
                return trg_len.shape[0]
            loss = criterion(output, trg)
            epoch_loss += loss.item()

            batch_size = get_bs(trg_len)


            seq_length = int(trg.numel() / batch_size)

            def get_predicted_indices(seq_length,predicted_indices):
                return predicted_indices.reshape(seq_length,-1)

            # Convert the output to predicted characters
            predicted_indices = torch.argmax(output, dim=1)
            predicted_indices = get_predicted_indices(seq_length,predicted_indices)
            predicted_indices = predicted_indices.permute(1, 0)
            # Convert predicted indices to strings
            string_pred=string_indices(predicted_indices,t_idx_to_char)

            for i in range(batch_size):
                num_total+=1
                def getlen_str():
                    return string_pred[i][:len(string_trg[i])] == string_trg[i]
                # Compare the predicted string with the target string
                if getlen_str():
                    num_correct+=1

    print("Total",num_total)
    print("Correct",num_correct)
    # Calculate word-level accuracy and average loss
    return ((num_correct) /num_total) * 100, (epoch_loss/(len(data_loader)))


In [10]:

def Word_Accuracy(model,t_idx_to_char,s_idx_to_char,data_loader, criterion):
    '''
    This function is used for the test data
    Parameters:
    model: Trained model object.
    t_idx_to_char: Index-to-character mapping for the target language.
    s_idx_to_char: Index-to-character mapping for the source language.
    data_loader: DataLoader for the validation or test dataset.
    criterion: Loss criterion utilized during model training.
    '''

    model.eval()
    def set_zero():
        return 0
    i_pred=[]
    i_trg=[]
    num_correct = set_zero()
    c_pred=[]
    c_src=[]
    num_total = set_zero()
    c_trg=[]
    epoch_loss = set_zero()
    i_src=[]

    with torch.no_grad():
        def get_s_indices(trg,t_idx_to_char):
            return string_indices(trg,t_idx_to_char)
        for batch_idx, (src, trg, src_len, trg_len) in enumerate(data_loader):
            # Convert target indices to string for comparison
            string_trg = get_s_indices(trg,t_idx_to_char)
            def set_permute(var):
                return var.permute(1, 0)
            string_src=string_indices(src,s_idx_to_char)
            # Move tensors to the device
            src = set_permute(src)
            src = src.to(device)
            trg = set_permute(trg)
            trg = trg.to(device)
            # Perform forward pass through the model
            def output_reshape(output):
                return output[1:].reshape(-1, output.shape[2])
            output = model(src, trg, 0)
            # turn off teacher forcing
            output = output_reshape(output)
            #print("op after ",output.shape) # exclude the start-of-sequence token

            trg = trg[1:].reshape(-1) # exclude the start-of-sequence token
            #print("trg after reshape",trg.shape)
            def get_crit(output,trg):
                return criterion(output, trg)
            # Calculate the loss
            output = output.to(device)
            def get_seq_len(trg,batch_size):
              return int(trg.numel() / batch_size)
            loss = get_crit(output,trg)
            epoch_loss += loss.item()
            batch_size = trg_len.shape[0]
            #print("bs", batch_size)
            seq_length = get_seq_len(trg,batch_size)

            def get_indice_reshape(predicted_indices,seq_length):
                return predicted_indices.reshape(seq_length,-1)
            # Convert the output to predicted characters
            predicted_indices = torch.argmax(output, dim=1)
            predicted_indices = get_indice_reshape(predicted_indices,seq_length)
            predicted_indices = predicted_indices.permute(1, 0)
            # Convert predicted indices to strings
            string_pred=string_indices(predicted_indices,t_idx_to_char)

            for i in range(batch_size):
                num_total+=1
                def get_condition_check(string_pred,string_trg):
                    return string_pred[i][:len(string_trg[i])] == string_trg[i]
                # Compare the predicted string with the target string
                def update_trg(c_trg,string_trg):
                    c_trg.append(string_trg[i])
                    return c_trg
                if get_condition_check(string_pred,string_trg):
                    c_trg=update_trg(c_trg,string_trg)
                    c_src.append(string_src[i])
                    def ret_one():
                        return 1
                    c_pred.append(string_pred[i][:len(string_trg[i])])
                    num_correct+=ret_one()
                else :
                    i_trg.append(string_trg[i])
                    def get_updation():
                        return string_pred[i][:len(string_trg[i])]
                    i_src.append(string_src[i])
                    i_pred.append(get_updation())



    def cal_avg_acc(num_correct ,num_total):
        return num_correct /num_total
    print("Total",num_total)
    print("Correct",num_correct)
    acc=cal_avg_acc(num_correct ,num_total)
    loss_e=(epoch_loss/(len(data_loader)))
    return acc * 100,loss_e ,c_trg,c_src,c_pred,i_trg,i_src,i_pred




In [15]:
!pip install wandb -qU
from signal import signal,SIGPIPE, SIG_DFL
import wandb
signal(SIGPIPE,SIG_DFL)
!wandb login fbf80504ccef17f5f3b05723be7ea4caff805164

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m277.3/277.3 kB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [16]:

# Define hyperparameters
INPUT_DIM = 29
opt='adam'
OUTPUT_DIM = 67
bidirectional=False
dropout=0.2
LEARNING_RATE = 0.001
embedding_size=256
EPOCHS = 1
HIDDEN_DIM = 512
NUM_LAYERS = 1
TEACHER_FORCING_RATIO = 0.7
CELL_TYPE = 'lstm'
BATCH_SIZE = 32

wandb.init(project='Assignment_3_DL_test', name='Test Log')
# Load data and create data loaders
train_loader,test_loader,val_loader,t_idx_to_char,s_idx_to_char=data_loading(BATCH_SIZE)
def get_criterion():
  return nn.CrossEntropyLoss()
encoder = Encoder(INPUT_DIM,embedding_size,HIDDEN_DIM, NUM_LAYERS,bidirectional, CELL_TYPE,dropout).to(device)
decoder = Decoder(OUTPUT_DIM,embedding_size,HIDDEN_DIM, NUM_LAYERS,bidirectional,CELL_TYPE,dropout).to(device)
def get_opti():
  return optim.NAdam(model.parameters(), lr=LEARNING_RATE)
# Instantiate the Seq2Seq model with the Encoder and Decoder models
model = Seq2Seq(encoder, decoder,CELL_TYPE,bidirectional).to(device)

# Define the loss function and optimizer
criterion = get_criterion()
optimizer = get_opti()

for epoch in range(EPOCHS):
    epoch_loss = 0
    model.train()
    def get_permute(val):
      return val.permute(1, 0)
    for batch_idx, (src, trg, src_len, trg_len) in enumerate(train_loader):
        src = get_permute(src)
        src = src.to(device)
        trg = get_permute(trg)
        trg = trg.to(device)
        def output_reshape(output):
            return output[1:].reshape(-1, output.shape[2])
        optimizer.zero_grad()
        output = model(src, trg, TEACHER_FORCING_RATIO)
        output = output_reshape(output)
        def get_loss(output, trg):
            return criterion(output, trg)
        trg = trg[1:].reshape(-1)

        loss = get_loss(output, trg)
        loss.backward()
        optimizer.step()

        epoch_loss += (loss.item())

        if batch_idx % 1000 == 0:
            print(f"Epoch: {epoch}, Batch: {batch_idx}, Training...")
        def utility_u2(x):
            if(x>1):
               v_a,v_l = Word_Accuracy(model,t_idx_to_char, val_loader, criterion)
    train_acc ,train_loss= Word_Accuracy1(model,t_idx_to_char, train_loader,criterion)
    val_acc,val_loss = Word_Accuracy1(model,t_idx_to_char, val_loader, criterion)
    test_acc,test_loss = Word_Accuracy1(model,t_idx_to_char, test_loader, criterion)

    print(f"Epoch: {epoch}, Loss: {epoch_loss / (len(train_loader))}, Val Acc: {val_acc}, Val loss: {val_loss}")
    wandb.log({'epoch': epoch, 'train_loss': loss.item(),'train_acc': train_acc, 'val_acc': val_acc,'val_loss':val_loss,'test_acc': test_acc,'test_loss': test_loss})
def get_all_acc():
  return Word_Accuracy(model,t_idx_to_char,s_idx_to_char,test_loader,criterion)
val_acc,val_loss,c_trg,c_src,c_pred,i_trg,i_src,i_pred = get_all_acc()
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mcs23m030[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch: 0, Batch: 0, Training...
Epoch: 0, Batch: 1000, Training...
Total 51200
Correct 2140
Total 4096
Correct 369
Total 4096
Correct 323
Epoch: 0, Loss: 0.9318207755312323, Val Acc: 9.0087890625, Val loss: 1.4856582218781114
Total 4096
Correct 323


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_acc,▁
test_loss,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,0.0
test_acc,7.88574
test_loss,1.50312
train_acc,4.17969
train_loss,0.74648
val_acc,9.00879
val_loss,1.48566


In [17]:
def get_model_path():
    return 'best_model_AttnSeq2Seq.pth'
best_model_path = get_model_path()
def print_best():
  print(f"Best model saved to {best_model_path}")
torch.save(model.state_dict(), best_model_path)
print_best()

Best model saved to best_model_AttnSeq2Seq.pth


In [18]:
print(c_trg)
print(c_src)
print(c_pred)

import csv
def save_correct():
  save_to_csv(c_src,c_trg,c_pred,'correct_predictions.csv')
def save_incorrect():
  save_to_csv(i_src,i_trg,i_pred,'incorrect_predictions.csv')
def save_to_csv(src_list, trg_list, pred_list, file_name):
    rows = zip(src_list, trg_list, pred_list)
    def ret_list_words():
        return ['English', 'Target', 'Predicted']
    with open(file_name, mode='w', newline='') as file:
        writer = csv.writer(file)
        def get_rows():
            return rows
        writer.writerow(ret_list_words())
        writer.writerows(get_rows())
save_correct()
save_incorrect()

['सोहराई', 'ठेंगा', 'पंटर', 'बौनी', 'रिवास', 'मास', 'वेस्टिंग', 'फूस', 'नेको', 'गुडेपु', 'सभाएं', 'तनी', 'दीपन', 'क्रेन', 'सेरिव', 'शैली', 'रह्या', 'देवी', 'ब्रोक', 'ब्रोक', 'पिक', 'कुश', 'सचन', 'अकोला', 'फोड़ा', 'जोहरी', 'वेल्डर', 'माप', 'लास्ट', 'छह', 'वेगास', 'बरेली', 'जिंग', 'तनु', 'त्रिया', 'हैतो', 'मंगला', 'शावकों', 'वेब', 'अग्र', 'उपस्थियों', 'मुडिचु', 'बैला', 'लोग', 'देहरा', 'पहनो', 'बंगश', 'मध्य', 'अगवाई', 'तेजी', 'धुरिया', 'संचारी', 'धनिये', 'नौशाद', 'पिक', 'दमाए', 'हेस्टर', 'फटने', 'अर्थों', 'बिलाई', 'परवा', 'घोस्ट', 'ओझा', 'मूल', 'नून', 'रसल', 'मूड', 'शुटिंग', 'स्टोन', 'भीरा', 'लब्ध', 'पूल', 'अजित', 'झ', 'भुज', 'विज्ञान', 'विलास', 'चौधरी', 'दमाए', 'डिट', 'आस्तिक', 'उठाएँगे', 'चुने', 'विंटर', 'आदान', 'विक्रम', 'विद्याओं', 'मंजू', 'छविंद्र', 'रीता', 'डम', 'बहुमान', 'विरोध', 'अलीम', 'गौर', 'सेंधा', 'होल्डिंग', 'बहुंत', 'बुश', 'अपर', 'एमी', 'खेड़ा', 'रूकेंगे', 'महलों', 'सरका', 'तो', 'मांगी', 'सेटो', 'खलती', 'मोहिनी', 'छींक', 'बेक', 'कपूर', 'सेवाला', 'धोराजी', 'ची', 'बेतिया', 'व

In [19]:
!pip install wandb -qU
from signal import signal,SIGPIPE, SIG_DFL
import wandb
signal(SIGPIPE,SIG_DFL)
!wandb login fbf80504ccef17f5f3b05723be7ea4caff805164

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [20]:

# Load the CSV file
ca_dataframe = pd.read_csv("/content/correct_predictions.csv")
a_table = wandb.Table(dataframe=ca_dataframe)

def get_corr_path():
  return "/content/correct_predictions.csv"

ca_table_artifact = wandb.Artifact(
    "correct_predictions_Attn",
    type="dataset"
    )
ca_table_artifact.add(a_table, "Correct_predictions_Attn")

def log_run():
  run.log({"Attn_correct_predictions_table": a_table})
# Log the raw csv file within an artifact to preserve our data
ca_table_artifact.add_file(get_corr_path())
def log_run2():
  run.log_artifact(ca_table_artifact)
run = wandb.init(project='Assignment_3_DL_test')

# Log the table to visualize with a run...
log_run()

# and Log as an Artifact to increase the available row limit!
log_run2()
wandb.finish()


VBox(children=(Label(value='0.047 MB of 0.047 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [21]:

# Load the CSV file
i_dataframe = pd.read_csv("/content/incorrect_predictions.csv")
i_table = wandb.Table(dataframe=i_dataframe)
def get_path1():
  return "/content/incorrect_predictions.csv"
i_table_artifact = wandb.Artifact(
    "incorrect_predictions_Attn",
    type="dataset"
    )

i_table_artifact.add(i_table, "Incorrect_predictions_Attn")

def log_run1():
   run.log({"Attn_incorrect_predictions_table": i_table})
# Log the raw csv file within an artifact to preserve our data
i_table_artifact.add_file(get_path1())

# Display as a table

def log_run2():
   run.log_artifact(i_table_artifact)
run = wandb.init(project='Assignment_3_DL_test')

# Log the table to visualize with a run...
log_run1()

# and Log as an Artifact to increase the available row limit!
log_run2()

wandb.finish()

VBox(children=(Label(value='0.686 MB of 0.686 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [11]:
!pip install wandb -qU
from signal import signal,SIGPIPE, SIG_DFL
import wandb
signal(SIGPIPE,SIG_DFL)
!wandb login fbf80504ccef17f5f3b05723be7ea4caff805164

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [12]:
# wandb sweeps

sweep_config= {
    "name" : "Assignment_3_DL_test",
    "method" : "bayes",
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters' : {
        'hidden_size' : {'values' : [512,256,128]},
        'optim':{
            "values": ['nadam','adam']
        },
        'batch_size' : {'values' : [128,32,64]},
        'dropout' : { 'values' : [0,0.1,0.2,0.5]},
        'embedding_size' : {'values' : [64,128,256,512]},
        'teacher_forcing':{"values":[0.7,0.5,0.2]},
        'num_layers' : {'values' : [1]},


        'bidirectional' : {'values' : [False,True]},
        'cell_type' : { 'values' : ['lstm','gru','rnn'] },
        'learning_rate':{
            "values": [0.0001,0.0002,0.001,0.002]
        }


    }
}



def train():
    wandb.init()

    c= wandb.config
    def get_celltype():
      return c.cell_type
    # name = "attention_"+"cell_type_"+str(c.cell_type)+"_num_layers_"+str(c.num_layers)+"_dp_"+str(c.dropout)+"_bidir_"+str(c.bidirectional)+"_lr_"+str(c.learning_rate)+"_bs_"+str(c.batch_size)
    # wandb.run.name=name
    def get_tf():
        return c.teacher_forcing
    tf=get_tf()
    def get_dropout():
        return c.dropout
    def get_bidir():
        return c.bidirectional
    bidir = get_bidir()
    dp = get_dropout()
    def get_numlayers():
        return c.num_layers
    def get_batch_size():
        return c.batch_size
    bs = get_batch_size()
    epochs = 1
    def get_lr():
        return c.learning_rate
    lr = get_lr()
    hs=c.hidden_size
    opt= c.optim
    ct=get_celltype()
    trg_pad_idx=0
    em=c.embedding_size
    nlayer=get_numlayers()
    INPUT_DIM = 29
    OUTPUT_DIM = 67

    name = "attention_"+"cell_type_"+str(get_celltype())+"_num_layers_"+str(get_numlayers())+"_dp_"+str(get_dropout())+"_bidir_"+str(get_bidir())+"_lr_"+str(get_lr())+"_bs_"+str(get_batch_size())
    wandb.run.name=name
  # Load the dataset
    train_loader,val_loader,test_loader,idx_to_char,s_idx_to_char=data_loading(bs)

  #print("data loaded ====================================================")
    def get_critirion():
        return nn.CrossEntropyLoss()
  # Instantiate the Encoder and Decoder models
    encoder = Encoder(INPUT_DIM,em,hs,nlayer,bidir,ct,dp).to(device)
    decoder = Decoder(OUTPUT_DIM,em,hs,nlayer,bidir,ct,dp).to(device)

  # Instantiate the Seq2Seq model with the Encoder and Decoder models
    model = Seq2Seq(encoder,decoder,ct,bidir).to(device)
  #print("model ini==============================================================")

  # Define the loss function and optimizer
    criterion = get_critirion()
    def get_optim_nadam():
        return optim.NAdam(model.parameters(),lr=lr)
    def get_optim_adam():
        return optim.Adam(model.parameters(),lr=lr)
    if opt == "nadam":
          optimizer= get_optim_nadam()
    elif opt == "adam":
          optimizer = get_optim_adam()

  # Train Network
    epoch=0
    while epoch < (epochs):
        def permutation(val):
          return val.permute(1, 0)
        model.train()
        epoch_loss = 0
        for batch_idx, (src, trg, src_len, trg_len) in enumerate(train_loader):
            src = permutation(src)  # swapping the dimensions of src tensor
            src = src.to(device)
            trg = permutation(trg)  # swapping the dimensions of trg tensor
            trg = trg.to(device)

            optimizer.zero_grad()
            def reshaping(output):
                return output[1:].reshape(-1, output.shape[2])
            output = model(src,trg,tf)

            output = reshaping(output)
            def get_loss(output, trg):
                return criterion(output, trg)
            trg = trg[1:].reshape(-1)

            loss = get_loss(output, trg)
            loss.backward()
            def get_item(loss):
                return loss.item()
            optimizer.step()
            epoch_loss += get_item(loss)

            if batch_idx % 1000 == 0:
                print(f"Epoch: {epoch}, Batch: {batch_idx} , Training..")

        # Calculate word-level accuracy after every epoch
        train_acc ,train_loss= Word_Accuracy1(model,idx_to_char, train_loader,criterion)
        def get_test_acc():
          return Word_Accuracy1(model,idx_to_char, test_loader, criterion)
        val_acc,val_loss = Word_Accuracy1(model,idx_to_char, val_loader, criterion)
        test_acc,test_loss = get_test_acc()

        print(f"Epoch: {epoch}, Loss: {epoch_loss / len(train_loader)}, Train Acc: {train_acc}, Val Acc: {val_acc}")
    # Log the metrics to WandB
        wandb.log({'epoch': epochs,'train_acc':train_acc, 'train_loss': loss.item(),'val_acc': val_acc,'val_loss': val_loss, 'test_acc': test_acc,'test_loss': test_loss})
    # Save the best model
        epoch+=1
    wandb.run.save()
    wandb.run.finish()
    return



In [13]:

sweep_id = wandb.sweep(sweep_config, entity='cs23m030', project="Assignment_3_DL_test")
wandb.agent(sweep_id, function=train,count=1)

Create sweep with ID: q89qgagi
Sweep URL: https://wandb.ai/cs23m030/Assignment_3_DL_test/sweeps/q89qgagi


[34m[1mwandb[0m: Agent Starting Run: o8o2q6xj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optim: adam
[34m[1mwandb[0m: 	teacher_forcing: 0.7
[34m[1mwandb[0m: Currently logged in as: [33mcs23m030[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch: 0, Batch: 0 , Training..
Total 51200
Correct 0
Total 4096
Correct 0




Total 4096
Correct 0
Epoch: 0, Loss: 1.645507590174675, Train Acc: 0.0, Val Acc: 0.0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_acc,▁
test_loss,▁
train_acc,▁
train_loss,▁
val_acc,▁
val_loss,▁

0,1
epoch,1.0
test_acc,0.0
test_loss,1.51363
train_acc,0.0
train_loss,1.48935
val_acc,0.0
val_loss,1.61412
