In [1]:
import torch
from torch import nn
import pandas as pd
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import copy
from torch.utils.data import Dataset, DataLoader
import gc
import random
import wandb
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.font_manager import FontProperties

In [2]:
wandb.login(key="986fd96a25245251243e3084fc375526692b03b6")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdhamu2908[0m ([33mm_dhamu2908[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Checking if CUDA is available, else use CPU
print(device)  # Printing the device being used (CUDA or CPU)
END_TOKEN = '>'  # Defining the end token for sequences
START_TOKEN = '<'  # Defining the start token for sequences
PAD_TOKEN = '_'  # Defining the padding token for sequences
TEACHER_FORCING_RATIO = 0.5  # Ratio of teacher forcing during training

# Paths to the train, test, and validation CSV files
train_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_train.csv"
test_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_test.csv"
val_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_valid.csv"

# Reading the train, test, and validation CSV files into pandas dataframes
train_df = pd.read_csv(train_csv, header=None)
test_df = pd.read_csv(test_csv, header=None)
val_df = pd.read_csv(val_csv, header=None)

# Extracting source and target sequences from train, test, and validation dataframes
train_source, train_target = train_df[0].to_numpy(), train_df[1].to_numpy()
val_source, val_target = val_df[0].to_numpy(), val_df[1].to_numpy()
test_source, test_target = test_df[0].to_numpy(), test_df[1].to_numpy()

cuda


In [4]:
# Function to add padding to source sequences
def add_padding(source_data, MAX_LENGTH):
    """
    Add padding to source sequences and truncate if necessary.
    
    Args:
    - source_data: List of source sequences
    - MAX_LENGTH: Maximum length of source sequences
    
    Returns:
    - padded_source_strings: List of padded source sequences
    """
    padded_source_strings = []
    for i in range(len(source_data)):
        source_str = START_TOKEN + source_data[i] + END_TOKEN  # Add start and end tokens
        source_str = source_str[:MAX_LENGTH]  # Truncate if longer than MAX_LENGTH
        source_str += PAD_TOKEN * (MAX_LENGTH - len(source_str))  # Pad with PAD_TOKEN

        padded_source_strings.append(source_str)
        
    return padded_source_strings


# Function to convert source strings to sequences of indices
def generate_string_to_sequence(source_data, source_char_index_dict):
    """
    Convert source strings to sequences of indices using char_index_dict.
    
    Args:
    - source_data: List of padded source strings
    - source_char_index_dict: Dictionary mapping characters to their indices
    
    Returns:
    - source_sequences: Padded sequence of character indices
    """
    source_sequences = []
    for i in range(len(source_data)):
        source_sequences.append(get_chars(source_data[i], source_char_index_dict))
    source_sequences = pad_sequence(source_sequences, batch_first=True, padding_value=2)
    return source_sequences


# Function to convert characters to their corresponding indices
def get_chars(string, char_index_dict):
    """
    Convert characters in a string to their corresponding indices using char_index_dict.
    
    Args:
    - string: Input string
    - char_index_dict: Dictionary mapping characters to their indices
    
    Returns:
    - chars_indexes: List of character indices
    """
    chars_indexes = []
    for char in string:
        chars_indexes.append(char_index_dict[char])
    return torch.tensor(chars_indexes, device=device)


# Preprocess the data, including adding padding, generating sequences, and updating dictionaries
def preprocess_data(source_data, target_data):
    """
    Preprocess source and target data.
    
    Args:
    - source_data: List of source strings
    - target_data: List of target strings
    
    Returns:
    - data: Preprocessed data dictionary
    """
    data = {
        "source_chars": [START_TOKEN, END_TOKEN, PAD_TOKEN],
        "target_chars": [START_TOKEN, END_TOKEN, PAD_TOKEN],
        "source_char_index": {START_TOKEN: 0, END_TOKEN: 1, PAD_TOKEN: 2},
        "source_index_char": {0: START_TOKEN, 1: END_TOKEN, 2: PAD_TOKEN},
        "target_char_index": {START_TOKEN: 0, END_TOKEN: 1, PAD_TOKEN: 2},
        "target_index_char": {0: START_TOKEN, 1: END_TOKEN, 2: PAD_TOKEN},
        "source_len": 3,
        "target_len": 3,
        "source_data": source_data,
        "target_data": target_data,
        "source_data_seq": [],
        "target_data_seq": []
    }
    
    # Calculate the maximum length of input and output sequences
    data["INPUT_MAX_LENGTH"] = max(len(string) for string in source_data) + 2
    data["OUTPUT_MAX_LENGTH"] = max(len(string) for string in target_data) + 2

    # Pad the source and target sequences and update character dictionaries
    padded_source_strings = add_padding(source_data, data["INPUT_MAX_LENGTH"])
    padded_target_strings = add_padding(target_data, data["OUTPUT_MAX_LENGTH"])
    
    for i in range(len(padded_source_strings)):
        for char in padded_source_strings[i]:
            if data["source_char_index"].get(char) is None:
                data["source_chars"].append(char)
                idx = len(data["source_chars"]) - 1
                data["source_char_index"][char] = idx
                data["source_index_char"][idx] = char
        for char in padded_target_strings[i]:
            if data["target_char_index"].get(char) is None:
                data["target_chars"].append(char)
                idx = len(data["target_chars"]) - 1
                data["target_char_index"][char] = idx
                data["target_index_char"][idx] = char

    # Generate sequences of indexes for source and target data
    data['source_data_seq'] = generate_string_to_sequence(padded_source_strings, data['source_char_index'])
    data['target_data_seq'] = generate_string_to_sequence(padded_target_strings, data['target_char_index'])
    
    # Update lengths of source and target character lists
    data["source_len"] = len(data["source_chars"])
    data["target_len"] = len(data["target_chars"])
    
    return data


In [5]:
def get_cell_type(cell_type):
    # Function to return the appropriate RNN cell based on the specified type
    if(cell_type == "RNN"):
        return nn.RNN
    elif(cell_type == "LSTM"):
        return nn.LSTM
    elif(cell_type == "GRU"):
        return nn.GRU
    else:
        print("Specify correct cell type")

class Attention(nn.Module):
    def __init__(self, hidden_size):
        # Initialize the attention mechanism module
        super(Attention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        # Forward pass of the attention mechanism
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze().unsqueeze(1)
        weights = F.softmax(scores, dim=0)
        weights = weights.permute(2,1,0)
        keys = keys.permute(1,0,2)
        context = torch.bmm(weights, keys)
        return context, weights

class Encoder(nn.Module):
    def __init__(self, h_params, data, device ):
        # Initialize the Encoder module
        super(Encoder, self).__init__()
        # Embedding layer for input characters
        self.embedding = nn.Embedding(data["source_len"], h_params["char_embd_dim"])
        # RNN cell for encoding
        self.cell = get_cell_type(h_params["cell_type"])(h_params["char_embd_dim"], h_params["hidden_layer_neurons"],num_layers=h_params["number_of_layers"], batch_first=True)
        self.device=device
        self.h_params = h_params
        self.data = data
        
    def forward(self, input , encoder_curr_state):
        # Forward pass of the Encoder module
        input_length = self.data["INPUT_MAX_LENGTH"]
        batch_size = self.h_params["batch_size"]
        hidden_neurons = self.h_params["hidden_layer_neurons"]
        layers = self.h_params["number_of_layers"]
        encoder_states  = torch.zeros(input_length, layers, batch_size, hidden_neurons, device=self.device )
        for i in range(input_length):
            current_input = input[:, i].view(batch_size,1)
            _, encoder_curr_state = self.forward_step(current_input, encoder_curr_state)
            if self.h_params["cell_type"] == "LSTM":
                encoder_states[i] = encoder_curr_state[1]
            else:
                encoder_states[i] = encoder_curr_state
        return encoder_states, encoder_curr_state
    
    def forward_step(self, current_input, prev_state):
        # Perform forward pass for one time step
        embd_input = self.embedding(current_input)
        output, prev_state = self.cell(embd_input, prev_state)
        return output, prev_state
        
    def getInitialState(self):
        # Initialize initial hidden state for encoder
        return torch.zeros(self.h_params["number_of_layers"],self.h_params["batch_size"],self.h_params["hidden_layer_neurons"], device=self.device)

class Decoder(nn.Module):
    def __init__(self, h_params, data,device):
        # Initialize the Decoder module
        super(Decoder, self).__init__()
        # Attention mechanism
        self.attention = Attention(h_params["hidden_layer_neurons"]).to(device)
        # Embedding layer for target characters
        self.embedding = nn.Embedding(data["target_len"], h_params["char_embd_dim"])
        # RNN cell for decoding
        self.cell = get_cell_type(h_params["cell_type"])(h_params["hidden_layer_neurons"] +h_params["char_embd_dim"], h_params["hidden_layer_neurons"],num_layers=h_params["number_of_layers"], batch_first=True)
        # Fully connected layer for output
        self.fc = nn.Linear(h_params["hidden_layer_neurons"], data["target_len"])
        # Softmax activation for output probabilities
        self.softmax = nn.LogSoftmax(dim=2)
        self.h_params = h_params
        self.data = data
        self.device = device

    def forward(self, decoder_current_state, encoder_final_layers, target_batch, loss_fn, teacher_forcing_enabled=True):
        # Forward pass of the Decoder module
        batch_size = self.h_params["batch_size"]
        decoder_current_input = torch.full((batch_size,1),self.data["target_char_index"][START_TOKEN], device=self.device)
        embd_input = self.embedding(decoder_current_input)
        curr_embd = F.relu(embd_input)
        decoder_actual_output = []
        attentions = []
        loss = 0
        
        use_teacher_forcing = False
        if(teacher_forcing_enabled):
            use_teacher_forcing = True if random.random() < TEACHER_FORCING_RATIO else False
        for i in range(self.data["OUTPUT_MAX_LENGTH"]):
            # Perform one step of decoding
            decoder_output, decoder_current_state, attn_weights = self.forward_step(decoder_current_input, decoder_current_state, encoder_final_layers)
            attentions.append(attn_weights)
            topv, topi = decoder_output.topk(1)
            decoder_current_input = topi.squeeze().detach()
            decoder_actual_output.append(decoder_current_input)

            if(target_batch==None):
                decoder_current_input = decoder_current_input.view(self.h_params["batch_size"], 1)
            else:
                curr_target_chars = target_batch[:, i]
                if(i<self.data["OUTPUT_MAX_LENGTH"]-1):
                    if use_teacher_forcing:
                        decoder_current_input = target_batch[:, i+1].view(self.h_params["batch_size"], 1)
                    else:
                        decoder_current_input = decoder_current_input.view(self.h_params["batch_size"], 1)
                decoder_output = decoder_output[:, -1, :]
                loss+=(loss_fn(decoder_output, curr_target_chars))

        decoder_actual_output = torch.cat(decoder_actual_output,dim=0).view(self.data["OUTPUT_MAX_LENGTH"], self.h_params["batch_size"]).transpose(0,1)

        correct = (decoder_actual_output == target_batch).all(dim=1).sum().item()
        return decoder_actual_output, attentions, loss, correct
    
    def forward_step(self, current_input, prev_state, encoder_final_layers):
        # Perform one step of decoding
        embd_input = self.embedding(current_input)
        if self.h_params["cell_type"] == "LSTM":
            context , attn_weights = self.attention(prev_state[1][-1,:,:], encoder_final_layers)
        else:
            context , attn_weights = self.attention(prev_state[-1,:,:], encoder_final_layers)
        curr_embd = F.relu(embd_input)
        input_gru = torch.cat((curr_embd, context), dim=2)
        output, prev_state = self.cell(input_gru, prev_state)
        output = self.softmax(self.fc(output))
        return output, prev_state, attn_weights


In [6]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.source_data_seq = data[0]
        self.target_data_seq = data[1]
    
    def __len__(self):
        return len(self.source_data_seq)
    
    def __getitem__(self, idx):
        source_data = self.source_data_seq[idx]
        target_data = self.target_data_seq[idx]
        return source_data, target_data


In [7]:
def evaluate(encoder, decoder, data, dataloader, device, h_params, loss_fn, use_teacher_forcing = False):
    # Function to evaluate the performance of the model on a dataset
    correct_predictions = 0
    total_loss = 0
    total_predictions = len(dataloader.dataset)
    number_of_batches = len(dataloader)
    encoder.eval()
    decoder.eval()
    
    with torch.no_grad():
        for batch_num, (source_batch, target_batch) in enumerate(dataloader):

            encoder_initial_state = encoder.getInitialState()
            if h_params["cell_type"] == "LSTM":
                encoder_initial_state = (encoder_initial_state, encoder.getInitialState())
            encoder_states, encoder_final_state = encoder(source_batch,encoder_initial_state)

            decoder_current_state = encoder_final_state
            encoder_final_layer_states = encoder_states[:, -1, :, :]

            loss = 0
            correct = 0

            decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)

            correct_predictions+=correct
            total_loss +=loss

        accuracy = correct_predictions / total_predictions
        total_loss /= number_of_batches

        return accuracy, total_loss


In [8]:
def make_strings(data, source, target, output):
    # Function to convert indices to strings for source, target, and output sequences
    source_string = ""
    target_string = ""
    output_string = ""
    for i in source:
        source_string+=(data['source_index_char'][i.item()])
    for i in target:
        target_string+=(data['target_index_char'][i.item()])
    for i in output:
        output_string+=(data['target_index_char'][i.item()])
    return source_string, target_string, output_string


def train_loop(encoder, decoder,h_params, data, data_loader, device, val_dataloader, use_teacher_forcing=True):
    # Function to train the encoder-decoder model
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=h_params["learning_rate"])
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=h_params["learning_rate"])
    
    loss_fn = nn.NLLLoss()
    
    total_predictions = len(data_loader.dataset)
    total_batches = len(data_loader)
    
    for ep in range(h_params["epochs"]):
        total_correct = 0
        total_loss = 0
        encoder.train()
        decoder.train()
        for batch_num, (source_batch, target_batch) in enumerate(data_loader):
            encoder_initial_state = encoder.getInitialState()
            
            if h_params["cell_type"] == "LSTM":
                encoder_initial_state = (encoder_initial_state, encoder.getInitialState())
            encoder_states, encoder_final_state = encoder(source_batch,encoder_initial_state)
            
            decoder_current_state = encoder_final_state
            encoder_final_layer_states = encoder_states[:, -1, :, :]
            
            
            loss = 0
            correct = 0
            
            decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)
            total_correct +=correct
            total_loss += loss.item()/data["OUTPUT_MAX_LENGTH"]
            
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            
            
        train_acc = total_correct/total_predictions
        train_loss = total_loss/total_batches
        val_acc, val_loss = evaluate(encoder, decoder, data, val_dataloader,device, h_params, loss_fn, False)
        print("ep: ", ep, " train acc:", train_acc, " train loss:", train_loss, " val acc:", val_acc, " val loss:", val_loss.item()/data["OUTPUT_MAX_LENGTH"])
        wandb.log({"train_accuracy":train_acc, "train_loss":train_loss, "val_accuracy":val_acc, "val_loss":val_loss, "epoch":ep})
    return loss_fn

In [9]:
h_params={
    "char_embd_dim" : 256,  
    "hidden_layer_neurons": 512,  
    "batch_size": 64, 
    "number_of_layers": 3,  
    "learning_rate": 0.0001,  
    "epochs": 20, 
    "cell_type": "LSTM", 
    "dropout": 0, 
    "optimizer": "adam" 
}

def prepare_dataloaders(train_source, train_target, val_source, val_target,test_source, test_target, h_params):
    # Preparing data loaders for training and validation
    data = preprocess_data(copy.copy(train_source), copy.copy(train_target))
    
    # Training data
    training_data = [data["source_data_seq"], data['target_data_seq']]
    train_dataset = MyDataset(training_data)
    train_dataloader = DataLoader(train_dataset, batch_size=h_params["batch_size"], shuffle=False)

    # Validation data
    val_padded_source_strings = add_padding(val_source, data["INPUT_MAX_LENGTH"])
    val_padded_target_strings = add_padding(val_target, data["OUTPUT_MAX_LENGTH"])
    val_source_sequences = generate_string_to_sequence(val_padded_source_strings, data['source_char_index'])
    val_target_sequences = generate_string_to_sequence(val_padded_target_strings, data['target_char_index'])
    validation_data = [val_source_sequences, val_target_sequences]
    val_dataset = MyDataset(validation_data)
    val_dataloader = DataLoader(val_dataset, batch_size=h_params["batch_size"], shuffle=False)
    
    # test data
    test_padded_source_strings = add_padding(test_source, data["INPUT_MAX_LENGTH"])
    test_padded_target_strings = add_padding(test_target, data["OUTPUT_MAX_LENGTH"])
    test_source_sequences = generate_string_to_sequence(test_padded_source_strings, data['source_char_index'])
    test_target_sequences = generate_string_to_sequence(test_padded_target_strings, data['target_char_index'])
    test_data = [test_source_sequences, test_target_sequences]
    test_dataset = MyDataset(test_data)
    test_dataloader = DataLoader(test_dataset, batch_size=h_params["batch_size"], shuffle=False)
    
    return train_dataloader, val_dataloader, test_dataloader, data


In [10]:
def train(h_params, data, device, data_loader, val_dataloader, use_teacher_forcing=True):
    encoder = Encoder(h_params, data, device).to(device)
    decoder = Decoder(h_params, data, device).to(device)
    loss_fn = train_loop(encoder, decoder,h_params, data, data_loader,device, val_dataloader, use_teacher_forcing)
    return encoder, decoder, loss_fn

In [11]:
#It will print the test accura
acc, loss = evaluate(encoder,decoder, data, test_dataloader, device, config, loss_fn)
print(acc, loss/data["OUTPUT_MAX_LENGTH"])
def remove_padding(str):
    padding_removed_string = ""
    for ch in str:
        if ch =="<" or ch == ">" or ch =="_":
            continue
        padding_removed_string+=ch
    return padding_removed_string


#It will generate the attention heatmap
def plot_attention_heatmap(attention_matrix, input_sequence, output_sequence , id):

    plt.figure(figsize=(15, 10))

    ax = sns.heatmap(attention_matrix, cmap='viridis', annot=False, xticklabels=input_sequence, yticklabels=output_sequence)

    # Set font properties for Telugu characters
    font_path = '/kaggle/input/fonts-bro-1/NotoSansTelugu-VariableFont_wdth,wght.ttf'  # Replace with the path to a Telugu font file
    telugu_font = FontProperties(fname=font_path)

    ax.set_xticklabels(input_sequence, fontproperties=telugu_font)
    ax.set_yticklabels(output_sequence, fontproperties=telugu_font)

    ax.set_xlabel('Input Sequence')
    ax.set_ylabel('Output Sequence')
    plt.title('Attention Heatmap')
    wandb.log({"Attention_Heatmap"+str(id)+ "temp": wandb.Image(plt)})

    plt.close()
#     plt.show()

def generate_predictions_report(encoder, decoder, data, dataloader, device, config, loss_fn):
    encoder.eval()
    decoder.eval()
    
    zeroth_batch_attention = ""
    zeroth_source_batch = ""
    zeroth_target_batch = ""
    
    # The CSV file will be saved in the current working directory
    with open('predictions_report.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Source String', 'Target String', 'Predicted String'])
        
        with torch.no_grad():
            for batch_num, (source_batch, target_batch) in enumerate(dataloader):

                encoder_initial_state = encoder.getInitialState()
                if config["cell_type"] == "LSTM":
                    encoder_initial_state = (encoder_initial_state, encoder.getInitialState())
                encoder_states, encoder_final_state = encoder(source_batch, encoder_initial_state)

                decoder_current_state = encoder_final_state
                encoder_final_layer_states = encoder_states[:, -1, :, :]

                # Generate decoder output
                decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, False)
                if batch_num == 0:
                    zeroth_batch_attention = attentions
                    zeroth_source_batch = source_batch
                    zeroth_target_batch = target_batch

                # Generate the list of true and predicted words
                for j in range(config["batch_size"]):
                    src_str, target_str, pred_str = make_strings(data, source_batch[j], target_batch[j], decoder_output[j])
                    writer.writerow([remove_padding(src_str), remove_padding(target_str), remove_padding(pred_str)])

            
            processed_atten = torch.zeros(config["batch_size"], data["OUTPUT_MAX_LENGTH"], data["INPUT_MAX_LENGTH"])
            
            for i in range(data["OUTPUT_MAX_LENGTH"]):
                temp = zeroth_batch_attention[i][:, 0, :]
                for j in range(config["batch_size"]):
                    processed_atten[j][i] = temp[j]
               
            for i in range(10):
                curr_src_seq = zeroth_source_batch[i]
                curr_trg_seq = zeroth_target_batch[i]
                curr_src_seq = [data["source_index_char"][k.item()] for k in curr_src_seq]
                curr_trg_seq = [data["target_index_char"][k.item()] for k in curr_trg_seq]
                plot_attention_heatmap(processed_atten[i], curr_src_seq, curr_trg_seq, i)


generate_predictions_report(encoder, decoder, data, test_dataloader, device, config, loss_fn)

NameError: name 'encoder' is not defined

In [12]:
#Run this cell to run a sweep with appropriate parameters
sweep_params = {
    'method' : 'bayes',
    'name'   : 'DL Assignment 3 With Attention',
    'metric' : {
        'goal' : 'maximize',
        'name' : 'val_accuracy',
    },
    'parameters' : {
        'epochs':{'values' : [15, 20]},
        'learning_rate':{'values' : [0.001, 0.0001]},
        'batch_size':{'values':[32,64, 128]},
        'char_embd_dim':{'values' : [64, 128, 256] } ,
        'number_of_layers':{'values' : [1,2,3,4]},
        'optimizer':{'values':['nadam','adam']},
        'cell_type':{'values' : ["RNN","LSTM", "GRU"]},
        'hidden_layer_neurons':{'values': [ 128, 256, 512]},
        'dropout':{'values': [0,0.2, 0.3]}
    }
}

sweep_id = wandb.sweep(sweep=sweep_params, project="Deep_Learning_Assignment3a")
def main():
    wandb.init(project="Deep_Learning_Assignment3a" )
    config = wandb.config
    with wandb.init(project="Deep_Learning_Assignment3a", name=f"{config['cell_type']}_{config['optimizer']}_ep_{config['epochs']}_lr_{config['learning_rate']}_embd_{config['char_embd_dim']}_hid_lyr_neur_{config['hidden_layer_neurons']}_bs_{config['batch_size']}_enc_layers_{config['number_of_layers']}_dec_layers_{config['number_of_layers']}_dropout_{config['dropout']}", config=config):
        train_dataloader, val_dataloader,test_dataloader, data = prepare_dataloaders(train_source, train_target, val_source, val_target,test_source, test_target, config)
        train(config, data, device, train_dataloader, val_dataloader, True)

Create sweep with ID: n15yi8bx
Sweep URL: https://wandb.ai/m_dhamu2908/Deep_Learning_Assignment3a/sweeps/n15yi8bx


In [None]:
wandb.agent("n15yi8bx", function=main, count=100)

In [15]:
config = h_params
run = wandb.init(project="Deep_Learning_Assignment3a", name=f"{config['cell_type']}_{config['optimizer']}_ep_{config['epochs']}_lr_{config['learning_rate']}_embd_{config['char_embd_dim']}_hid_lyr_neur_{config['hidden_layer_neurons']}_bs_{config['batch_size']}_enc_layers_{config['number_of_layers']}_dec_layers_{config['number_of_layers']}_dropout_{config['dropout']}", config=config)
train_dataloader, val_dataloader, test_dataloader, data = prepare_dataloaders(train_source, train_target, val_source, val_target,test_source, test_target, h_params)
encoder, decoder, loss_fn = train(h_params, data, device, train_dataloader, val_dataloader, True)

[34m[1mwandb[0m: Currently logged in as: [33mjaswanth431[0m. Use [1m`wandb login --relogin`[0m to force relogin


ep:  0  train acc: 0.05705078125  train loss: 1.3567165669150976  val acc: 0.0  val loss: 1.2356311963952107
ep:  1  train acc: 0.35916015625  train loss: 0.7328936720931022  val acc: 0.0068359375  val loss: 0.7459257374639097
ep:  2  train acc: 0.4886328125  train loss: 0.38329930001343426  val acc: 0.13232421875  val loss: 0.47324748661207117
ep:  3  train acc: 0.567421875  train loss: 0.25723866042559584  val acc: 0.307373046875  val loss: 0.3494106790293818
ep:  4  train acc: 0.64888671875  train loss: 0.19122661505066532  val acc: 0.353759765625  val loss: 0.3150609057882558
ep:  5  train acc: 0.68744140625  train loss: 0.16484403335261558  val acc: 0.38818359375  val loss: 0.2963656342547873
ep:  6  train acc: 0.70115234375  train loss: 0.14890617834454717  val acc: 0.403076171875  val loss: 0.2789236358974291
ep:  7  train acc: 0.715703125  train loss: 0.13896221505174322  val acc: 0.408447265625  val loss: 0.2738995137421981
ep:  8  train acc: 0.73595703125  train loss: 0.12299