In [1]:
import pandas as pd
import random
import wandb
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import seaborn as sns
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
wandb.login()
# 3dc8367198d0460ba99efb94e713de7e299e685d

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'inp_embed_size':{
            'values': [32, 64, 128, 256]
        },
        'dropout': {
            'values': [0.2, 0.3, 0.4]
        },
        'lr': {
            'values': [0.01, 0.001, 0.003]
        },
        'hidden_size': {
            'values': [64, 128, 256]
        },
        'bidirectional': {
            'values': ['Yes','No']
        },
        'batch_size': {
            'values': [32, 64, 128]
        },
        'cell_type':{
            'values': ['rnn', 'gru', 'lstm']
        }
    }
}

algorithms = {
    'rnn': nn.RNN,
    'gru': nn.GRU,
    'lstm': nn.LSTM
}

sweep_id = wandb.sweep(sweep=sweep_config, project='DL_Assignment3')

Create sweep with ID: iieguai8
Sweep URL: https://wandb.ai/arun_cs23m017/DL_Assignment3/sweeps/iieguai8


In [4]:
SOW_token = 0
EOW_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.letter2index = {}
        self.letter2count = {}
        self.index2letter = {0: "0", 1: "1"}
        self.n_letters = 2 # Count SOW and EOW

    def addWord(self, word):
        for ch in word:
            self.addLetter(ch)

    def addLetter(self, ch):
        if ch not in self.letter2index:
            self.letter2index[ch] = self.n_letters
            self.letter2count[ch] = 1
            self.index2letter[self.n_letters] = ch
            self.n_letters += 1
        else:
            self.letter2count[ch] += 1

In [5]:
input_lang = Lang('eng')
output_lang = Lang('hin')


x_train = pd.read_csv('/kaggle/input/aksharantar_sampled/hin/hin_train.csv', header=None) #, nrows=1000)
x_val = pd.read_csv('/kaggle/input/aksharantar_sampled/hin/hin_valid.csv', header=None)
x_test = pd.read_csv('/kaggle/input/aksharantar_sampled/hin/hin_test.csv', header=None)
sz = x_train[0]

In [6]:
MAX_LENGTH = 50

def indexesFromWord(lang, word):
    return [lang.letter2index[ch] for ch in word]

def tensorFromWord(lang, word):
    indexes = indexesFromWord(lang, word)
    indexes.append(EOW_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

def wordFromTensor(lang, tensor):
    s = ""
    for i in tensor:
        if(i.item()==1):
            break
        s += lang.index2letter[i.item()]
    return s

def get_dataloader(x, input_lang, output_lang, batch_size):
    n = len(x[0])
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)

    for i in range(n):
        input_lang.addWord(x[0][i])
        output_lang.addWord(x[1][i])
        inp_ids = indexesFromWord(input_lang, x[0][i])
        tgt_ids = indexesFromWord(output_lang, x[1][i])
        inp_ids.append(EOW_token)
        tgt_ids.append(EOW_token)
        input_ids[i, :len(inp_ids)] = inp_ids
        target_ids[i, :len(tgt_ids)] = tgt_ids

    data = TensorDataset(torch.LongTensor(input_ids).to(device),
                               torch.LongTensor(target_ids).to(device))

    sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

In [7]:
class EncoderRNN(nn.Module):
    def __init__(self, config, input_size):
        super(EncoderRNN, self).__init__()

        self.embedding = nn.Embedding(input_size, config.inp_embed_size)
        self.algo = algorithms[config.cell_type](config.inp_embed_size, config.hidden_size, batch_first=True)
        self.dropout = nn.Dropout(config.dropout)

    def forward(self, input):
        embedded = self.dropout(self.embedding(input))
        output, hidden = self.algo(embedded)
        return output, hidden

In [8]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze(2).unsqueeze(1)
        weights = F.softmax(scores, dim=-1)
        context = torch.bmm(weights, keys)
        return context, weights

class AttnDecoderRNN(nn.Module):
    def __init__(self, config, output_size):
        super(AttnDecoderRNN, self).__init__()
        self.dropout_p = config.dropout
        hidden_size = config.hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attention = Attention(hidden_size)
        self.algo = algorithms[config.cell_type](hidden_size + hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(self.dropout_p)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOW_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        attentions = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden, attn_weights = self.forward_step(
                decoder_input, decoder_hidden, encoder_outputs
            )
            decoder_outputs.append(decoder_output)
            attentions.append(attn_weights)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim=1)

        return decoder_outputs, decoder_hidden, attentions


    def forward_step(self, input, hidden, encoder_outputs):
        embedded =  self.dropout(self.embedding(input))

        query = hidden.permute(1, 0, 2)
        context, attn_weights = self.attention(query, encoder_outputs)
        input_gru = torch.cat((embedded, context), dim=2)

        output, hidden = self.algo(input_gru, hidden)
        output = self.out(output)

        return output, hidden, attn_weights

In [9]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion, batch_size, teacher_forcing = True):

    total_loss = 0
    correct = 0
    all_preds=[]
    all_labels=[]
    k = 0

    for data in dataloader:
        input_tensor, target_tensor = data

        target_tensor2 = None
        if (teacher_forcing):
            target_tensor2 = target_tensor

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        encoder_outputs, encoder_hidden = encoder(input_tensor)

        decoder_outputs, _, attentions = decoder(encoder_outputs, encoder_hidden, target_tensor2)

        outputs = decoder_outputs.view(-1, decoder_outputs.size(-1))
        labels = target_tensor.view(-1)

        loss = criterion(outputs, labels)
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)

        i = 0
        while (i < batch_size * MAX_LENGTH):
            j = 0
            while (j < MAX_LENGTH):
                if(predicted[i+j] != labels[i+j]):
                    break
                j+=1
            if(j==MAX_LENGTH):
                correct += 1
            i += MAX_LENGTH
        k += batch_size

        if(k%6400==0):
            print(k, loss.item(), correct)
            print(wordFromTensor(input_lang, input_tensor[0]), wordFromTensor(output_lang, target_tensor[0]), wordFromTensor(output_lang, predicted[:45]))
            
    return total_loss / len(dataloader), correct / k

In [10]:
def show_attention(input_sentence, output_words, attentions):
    # Convert list of attention weights to a 2D array
    attentions = np.array(attentions)
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.heatmap(attentions[:len(output_words), :len(input_sentence)],
                xticklabels=input_sentence, yticklabels=output_words,
                cmap='viridis', ax=ax)
    plt.xlabel('Input Sentence')
    plt.ylabel('Output Sentence')
    plt.show()

In [11]:
def train(train_dataloader, val_dataloader, test_dataloader, encoder, decoder, n_epochs, config):
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.lr)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.lr)
    criterion = nn.NLLLoss()

    for epoch in range(1, n_epochs + 1):
        print(epoch)
        loss, acc = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, config.batch_size)
        print("Train: accuracy:", acc, "loss:", loss)
        if(acc<0.01 and epoch>=15):
            break
        wandb.log({'train_accuracy': acc})
        wandb.log({'train_loss': loss})
        val_loss, val_acc = train_epoch(val_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, config.batch_size, teacher_forcing=False)
        print("Validation: accuracy:", val_acc, "Loss:", val_loss, "\n")
        wandb.log({'val_accuracy': val_acc})
        wandb.log({'val_loss': val_loss})
    
    test_loss, test_acc = train_epoch(test_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, config.batch_size, teacher_forcing=False)
    print("Test: accuracy:", test_acc, "Loss: ", test_loss, "\n")

In [12]:
num_epochs = 25

best_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'inp_embed_size':{
            'values': [32]
        },
        'dropout': {
            'values': [0.2]
        },
        'lr': {
            'values': [0.001]
        },
        'hidden_size': {
            'values': [256]
        },
        'bidirectional': {
            'values': ['No']
        },
        'batch_size': {
            'values': [128]
        },
        'cell_type':{
            'values': ['gru']
        }
    }
}

sweep_id = wandb.sweep(sweep=best_config, project='DL_Ass3')

def main():
    with wandb.init() as run:
#         wandb.run.name =
        train_dataloader = get_dataloader(x_train, input_lang, output_lang, wandb.config.batch_size)
        val_dataloader = get_dataloader(x_val, input_lang, output_lang, wandb.config.batch_size)
        test_dataloader = get_dataloader(x_test, input_lang, output_lang, wandb.config.batch_size)
        encoder = EncoderRNN(wandb.config, input_lang.n_letters).to(device)
        decoder = AttnDecoderRNN(wandb.config, output_lang.n_letters).to(device)
        print(input_lang.n_letters, output_lang.n_letters)
        train(train_dataloader, val_dataloader, test_dataloader, encoder, decoder, num_epochs, wandb.config)
        encoder.eval()
        decoder.eval()
        evaluate(encoder, decoder)

wandb.agent(sweep_id, function=main, count=1) # calls main function for count number of times.
wandb.finish()

Create sweep with ID: 05pksl1i
Sweep URL: https://wandb.ai/arun_cs23m017/DL_Ass3/sweeps/05pksl1i


[34m[1mwandb[0m: Agent Starting Run: brafl68b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: No
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	inp_embed_size: 32
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: Currently logged in as: [33mcs23m017[0m ([33marun_cs23m017[0m). Use [1m`wandb login --relogin`[0m to force relogin


28 67
1
6400 0.5879917144775391 0
bamhrauli बमरौली साााा
12800 0.4968850016593933 0
chhatrarajaniti छात्रराजनीति सार्रा्
19200 0.49629971385002136 0
namki नमकी सिा्
25600 0.4747169613838196 0
giltiyaan गिल्टियाँ सुल्यिया
32000 0.42543068528175354 0
hibernation हाइबर्नेशन सिरंर्यिं्
38400 0.4174657464027405 0
pedamma पेदाम्मा साल्लाय
44800 0.3811451196670532 0
krenman क्रेनमैन रररान्ान
51200 0.38597580790519714 1
daanye दाँये नानगां000000000000000000000000000000000000000
Train: accuracy: 1.953125e-05 loss: 0.5008761431276798
Validation: accuracy: 0.001220703125 Loss: 0.49657717254012823 

2
6400 0.2875199615955353 33
yavishesh याविशेष अासिसे
12800 0.22325775027275085 217
mahilavargachi महिलावर्गाची महिलववर्वीची
19200 0.18409422039985657 599
margatahi मार्गातही मररगतततिि
25600 0.1704285442829132 1195
israrul इसरारुल इसरारुल
32000 0.15305383503437042 1906
kaydon कायदों कायोों
38400 0.1379275768995285 2779
kachiguda काचीगुड़ा काचिगुड़ा
44800 0.1391189694404602 3728
manita मनिता मािता
51200

Traceback (most recent call last):
  File "/tmp/ipykernel_34/3224865044.py", line 48, in main
    evaluate(encoder, decoder)
NameError: name 'evaluate' is not defined


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_accuracy,▁▂▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
val_loss,█▅▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁

0,1
train_accuracy,0.49607
train_loss,0.04123
val_accuracy,0.44653
val_loss,0.16627


Run brafl68b errored:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/tmp/ipykernel_34/3224865044.py", line 48, in main
    evaluate(encoder, decoder)
NameError: name 'evaluate' is not defined

[34m[1mwandb[0m: [32m[41mERROR[0m Run brafl68b errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipykernel_34/3224865044.py", line 48, in main
[34m[1mwandb[0m: [32m[41mERROR[0m     evaluate(encoder, decoder)
[34m[1mwandb[0m: [32m[41mERROR[0m NameError: name 'evaluate' is not defined
[34m[1mwandb[0m: [32m[41mERROR[0m 
