<a href="https://colab.research.google.com/github/akshaygrao77/DeepLearning-Assignment3/blob/main/Assignment_3_part_B_with_attn_and_Question6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CS6910 Assignment 3 -part-B-with attention

In [1]:
!pip install wandb
!pip install wordcloud
!pip install colour



In [2]:
import os
import random
import time
import wandb
import re, string
import numpy as np
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from wordcloud import WordCloud, STOPWORDS
from collections import Counter
from colour import Color
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from google.colab import files
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.text import Tokenizer

In [3]:
wandb.login()
defaults = {"embedding_dim": 512, 
                       "enc_dec_layers": 2,
                       "define_layer": "LSTM",
                       "units": 512,
                       "dropout": 0,
                       "attention": False,
                       "beam_width": 3,
                       "teacher_forcing_ratio":0.5
                       }
wandb.init(config=defaults, project='Assignment3-new_outputs', entity='cs21s002-ee21s113-dlassignment-1')

[34m[1mwandb[0m: Currently logged in as: [33mmanu_data_analyst[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33mmanu_data_analyst[0m ([33mcs21s002-ee21s113-dlassignment-1[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Loading the Data

In [5]:
## Download the dataset ##
import requests
import tarfile

def download_data(save_path):

    data_url = r"https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar"

    r = requests.get(data_url, allow_redirects=True)
    tar_path = "data_assignment3.tar"

    if r.status_code == 200:
        with open(tar_path, 'wb') as f:
            f.write(r.content)

    tar_file = tarfile.open(tar_path)
    tar_file.extractall(save_path)
    tar_file.close()


# downloading and extracting the data to drive 
# uncomment the line below if downloading data for the 1st time
#download_data("/content/drive/MyDrive/DakshinaDataset")

In [6]:
# Files with English to kannada 
def get_data(language):
    """ Function fo read data 
    """

    ## REPLACE THIS PATH UPTO dakshina_dataset_v1.0 with your own dataset path ##
    template = "/content/drive/MyDrive/DakshinaDataset/dakshina_dataset_v1.0/{}/lexicons/{}.translit.sampled.{}.tsv"

    train_input = template.format(language, language, "train")
    val_input = template.format(language, language, "dev")
    test_input = template.format(language, language, "test")

    return train_input, val_input, test_input

## Utility functions for preprocessing data ##

def add_Tokens(data_Frame, cols, sos="\t", eos="\n"):
    """ Adds EOS and SOS tokens 
    """
    def add(s):  
        # \t = starting token
        # \n = ending token
        return sos + str(s) + eos

    for i in cols:
        data_Frame[i] = data_Frame[i].apply(add) 
    
def tokenize(lang, tokenizer=None):
    """ Uses keras tokenizer to tokenize 
    """

    if tokenizer is None:
        tokenizer = Tokenizer(char_level=True)
        tokenizer.fit_on_texts(lang)

        tensor_token = tokenizer.texts_to_sequences(lang)
        tensor_token = tf.keras.preprocessing.sequence.pad_sequences(tensor_token,
                                                            padding='post')

    else: 
        tensor_token = tokenizer.texts_to_sequences(lang)
        tensor_token = tf.keras.preprocessing.sequence.pad_sequences(tensor_token,
                                                            padding='post')

    return tensor_token, tokenizer

def data_Preprocess(fpath, input_lang_tokenizer=None, targ_lang_tokenizer=None):
    """ Reads, tokenizes and adds SOS/EOS tokens to data 
    """

    data_Frame = pd.read_csv(fpath, sep="\t", header=None)

    # adding start and end tokens to know when to stop predicting 
    add_Tokens(data_Frame, [0,1])
    
    input_token, input_tokenizer = tokenize(data_Frame[1].astype(str).tolist(), 
                                                    tokenizer=input_lang_tokenizer)
    
    targ_tensor, targ_tokenizer = tokenize(data_Frame[0].astype(str).tolist(),
                                                    tokenizer=targ_lang_tokenizer) 
    
    dataset = tf.data.Dataset.from_tensor_slices((input_token, targ_tensor))
    dataset = dataset.shuffle(len(dataset))
    
    return dataset, input_tokenizer, targ_tokenizer

# Model 

In [7]:
## functions ##
def define_Layers(name, units, dropout, return_state=False, return_sequences=False):

    if name=="RNN":
        return layers.SimpleRNN(units=units, dropout=dropout, 
                                return_state=return_state,
                                return_sequences=return_sequences)

    if name=="GRU":
        return layers.GRU(units=units, dropout=dropout, 
                          return_state=return_state,
                          return_sequences=return_sequences)

    if name=="LSTM":
        return layers.LSTM(units=units, dropout=dropout, 
                           return_state=return_state,
                           return_sequences=return_sequences)

class attention_Layer_RNN(tf.keras.layers.Layer):
  def __init__(self, units):
    super(attention_Layer_RNN, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, encoder_state, encoder_output):
    
    encoder_state = tf.concat(encoder_state, 1)
    encoder_state = tf.expand_dims(encoder_state, 1)

    score = self.V(tf.nn.tanh(self.W1(encoder_state) + self.W2(encoder_output)))

    attn_weights = tf.nn.softmax(score, axis=1)

    attn_vector = attn_weights * encoder_output
    attn_vector = tf.reduce_sum(attn_vector, axis=1)

    return attn_vector, attn_weights


class Encoder(tf.keras.Model):
    def __init__(self, define_layer, number_layers, units, encoder_vocab_size, embedding_dim, dropout):
        super(Encoder, self).__init__()
        self.define_layer = define_layer
        self.number_layers = number_layers
        self.units = units
        self.dropout = dropout
        self.embedding = tf.keras.layers.Embedding(encoder_vocab_size, embedding_dim)
        self.final_RNN_layers()

    def call(self, x, hidden):
        x = self.embedding(x)
        x = self.RNN_layers[0](x, initial_state=hidden)

        for layer in self.RNN_layers[1:]:
            x = layer(x)

        out,state = x[0], x[1:]

        return out,state
    
    def final_RNN_layers(self):
        self.RNN_layers = []

        for i in range(self.number_layers):
            self.RNN_layers.append(define_Layers(self.define_layer, self.units, self.dropout,
                                                return_sequences=True,
                                                return_state=True))


    def hidden_states(self, batch_size):

        if self.define_layer != "LSTM":
            return [tf.zeros((batch_size, self.units))]
        else:
            return [tf.zeros((batch_size, self.units))]*2

class Decoder(tf.keras.Model):
    def __init__(self, define_layer, number_layers, units, dec_size, embedding_dim, dropout, attention=False):
        super(Decoder, self).__init__()

        self.define_layer = define_layer
        self.number_layers = number_layers
        self.units = units
        self.dropout = dropout
        self.attention = attention
        self.embedding_layer = layers.Embedding(input_dim=dec_size, 
                                                output_dim=embedding_dim)
        
        self.dense = layers.Dense(dec_size, activation="softmax")
        self.flatten = layers.Flatten()
        if self.attention:
            self.attention_layer = attention_Layer_RNN(self.units)
        self.final_RNN_layers()

    def call(self, x, hidden, encoder_output=None):
        
        x = self.embedding_layer(x)

        if self.attention:
            attn_vector, attn_weights = self.attention_layer(hidden, encoder_output)
            x = tf.concat([tf.expand_dims(attn_vector, 1), x], -1)
        else:
            attn_weights = None

        x = self.RNN_layers[0](x, initial_state=hidden)

        for layer in self.RNN_layers[1:]:
            x = layer(x)

        out,state = x[0], x[1:]

        out= self.dense(self.flatten(out))
        
        return out,state, attn_weights

    def final_RNN_layers(self):
        self.RNN_layers = []    

        for i in range(self.number_layers - 1):
            self.RNN_layers.append(define_Layers(self.define_layer, self.units, self.dropout,
                                                return_sequences=True,
                                                return_state=True))
        
        self.RNN_layers.append(define_Layers(self.define_layer, self.units, self.dropout,
                                            return_sequences=False,
                                            return_state=True))

In [8]:
class Seq2Seq_RNN_Model():
    def __init__(self, embedding_dim, encoder_layers, dec_layers, define_layer, units, dropout, attention=False):
        self.embedding_dim = embedding_dim
        self.encoder_layers = encoder_layers
        self.dec_layers = dec_layers
        self.define_layer = define_layer
        self.units = units
        self.dropout = dropout
        self.attention = attention
        self.stats = []
        self.batch_size = 128
        self.use_beam_search = False

    def build(self, loss, optimizer, metric):
        self.loss = loss
        self.optimizer = optimizer
        self.metric = metric

    def set_vocabulary(self, input_tokenizer, targ_tokenizer):
        self.input_tokenizer = input_tokenizer
        self.targ_tokenizer = targ_tokenizer
        self.model_create()
    
    def model_create(self):

        encoder_vocab_size = len(self.input_tokenizer.word_index) + 1
        dec_size = len(self.targ_tokenizer.word_index) + 1

        self.encoder = Encoder(self.define_layer, self.encoder_layers, self.units, encoder_vocab_size,
                               self.embedding_dim, self.dropout)

        self.dec = Decoder(self.define_layer, self.dec_layers, self.units, dec_size,
                               self.embedding_dim,  self.dropout, self.attention)

    @tf.function
    def Training(self, input, target, encoder_state):

        loss = 0 

        with tf.GradientTape() as tape: 

            encoder_output, encoder_state = self.encoder(input, encoder_state)

            dec_state = encoder_state
            dec_input = tf.expand_dims([self.targ_tokenizer.word_index["\t"]]*self.batch_size ,1)

            ##  Teacher forcing to train 
            ## Each target at timestep t is passed as input nest timestamp

            if random.random() < self.teacher_forcing_ratio:

                for t in range(1, target.shape[1]):

                    preds, dec_state, _ = self.dec(dec_input, dec_state, encoder_output)
                    loss += self.loss(target[:,t], preds)
                    self.metric.update_state(target[:,t], preds)
                    
                    dec_input = tf.expand_dims(target[:,t], 1)
            
            else:

                for i in range(1, target.shape[1]):

                    preds, dec_state, _ = self.dec(dec_input, dec_state, encoder_output)
                    loss += self.loss(target[:,i], preds)
                    self.metric.update_state(target[:,i], preds)

                    preds = tf.argmax(preds, 1)
                    dec_input = tf.expand_dims(preds, 1)


            batch_loss = loss / target.shape[1]

            variables = self.encoder.variables + self.dec.variables
            gradients = tape.gradient(loss, variables)

            self.optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss, self.metric.result()

    @tf.function
    def validation_step(self, input, target, encoder_state):

        loss = 0
        
        encoder_output, encoder_state = self.encoder(input, encoder_state)

        dec_state = encoder_state
        dec_input = tf.expand_dims([self.targ_tokenizer.word_index["\t"]]*self.batch_size ,1)

        for t in range(1, target.shape[1]):

            preds, dec_state, _ = self.dec(dec_input, dec_state, encoder_output)
            loss += self.loss(target[:,t], preds)
            self.metric.update_state(target[:,t], preds)

            preds = tf.argmax(preds, 1)
            dec_input = tf.expand_dims(preds, 1)

        batch_loss = loss / target.shape[1]
        
        return batch_loss, self.metric.result()


    def fit(self, dataset, val_dataset, batch_size=128, epochs=10, use_wandb=False, teacher_forcing_ratio=1.0):

        self.batch_size = batch_size
        self.teacher_forcing_ratio = teacher_forcing_ratio

        steps_per_epoch = len(dataset) // self.batch_size
        steps_per_epoch_val = len(val_dataset) // self.batch_size
        
        dataset = dataset.batch(self.batch_size, drop_remainder=True)
        val_dataset = val_dataset.batch(self.batch_size, drop_remainder=True)

        # useful when we need to translate the sentence
        sample_inp, sample_targ = next(iter(dataset))
        self.max_target_len = sample_targ.shape[1]
        self.max_input_len = sample_inp.shape[1]

        template = "\nTrain Loss: {0:.4f} Train Accuracy: {1:.4f} Validation Loss: {2:.4f} Validation Accuracy: {3:.4f}"

        print("-"*100)
        for epoch in range(1, epochs+1):
            print(f"EPOCH {epoch}\n")

            ## Training loop ##
            total_loss = 0
            total_acc = 0
            self.metric.reset_states()

            starting_time = time.time()
            encoder_state = self.encoder.hidden_states(self.batch_size)

            print("validating_model....")
            for batch, (input, target) in enumerate(dataset.take(steps_per_epoch)):
                batch_loss, acc = self.Training(input, target, encoder_state)
                total_loss += batch_loss
                total_acc += acc


                if batch==0 or ((batch + 1) % 100 == 0):
                    print(f"Batch {batch+1} Loss {batch_loss:.4f}")

            avg_acc = total_acc / steps_per_epoch
            avg_loss = total_loss / steps_per_epoch

            # Validation loop ##
            total_val_loss = 0
            total_val_acc = 0
            self.metric.reset_states()

            encoder_state = self.encoder.hidden_states(self.batch_size)

            print("\nValidating ...")
            for batch, (input, target) in enumerate(val_dataset.take(steps_per_epoch_val)):
                batch_loss, acc = self.validation_step(input, target, encoder_state)
                total_val_loss += batch_loss
                total_val_acc += acc

            validation_accuracy = total_val_acc / steps_per_epoch_val
            validation_loss = total_val_loss / steps_per_epoch_val

            print(template.format(avg_loss, avg_acc*100, validation_loss, validation_accuracy*100))
            
            time_taken = time.time() - starting_time
            self.stats.append({"epoch": epoch,
                            "train loss": avg_loss,
                            "val loss": validation_loss,
                            "train acc": avg_acc*100,
                            "val acc": validation_accuracy*100,
                            "training time": time_taken})
            

          
       
        
    def evaluate(self, test_dataset, batch_size=None):

        if batch_size is not None:
            self.batch_size = batch_size

        steps_per_epoch_test = len(test_dataset) // batch_size
        test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
        
        total_test_loss = 0
        total_test_acc = 0
        self.metric.reset_states()

        encoder_state = self.encoder.hidden_states(self.batch_size)

        print("\nRunning test dataset through the model...\n")
        for batch, (input, target) in enumerate(test_dataset.take(steps_per_epoch_test)):
            batch_loss, acc = self.validation_step(input, target, encoder_state)
            total_test_loss += batch_loss
            total_test_acc += acc

        test_Accuracy = total_test_acc / steps_per_epoch_test
        test_Loss = total_test_loss / steps_per_epoch_test
    
        print(f"Test Loss: {test_Loss:.4f} Test Accuracy: {test_Accuracy:.4f}")
        wandb.log({"Test_Loss": test_Loss})
        wandb.log({"Test Accuracy": test_Loss})

        return test_Loss, test_Accuracy


    def translate(self, word, get_heatmap=True):

        word = "\t" + word + "\n"

        inputs = self.input_tokenizer.texts_to_sequences([word])
        inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs,
                                                               maxlen=self.max_input_len,
                                                               padding="post")

        result = ""
        att_wts = []

        encoder_state = self.encoder.hidden_states(1)
        encoder_output, encoder_state = self.encoder(inputs, encoder_state)

        dec_state = encoder_state
        dec_input = tf.expand_dims([self.targ_tokenizer.word_index["\t"]]*1, 1)

        for t in range(1, self.max_target_len):

            preds, dec_state, attn_weights = self.dec(dec_input, dec_state, encoder_output)
            
            if get_heatmap:
                att_wts.append(attn_weights)
            
            preds = tf.argmax(preds, 1)
            next_char = self.targ_tokenizer.index_word[preds.numpy().item()]
            result += next_char

            dec_input = tf.expand_dims(preds, 1)

            if next_char == "\n":
                return result[:-1], att_wts[:-1]

        return result[:-1], att_wts[:-1]

    def plot_attention_heatmap(self, word):

        translated_word, attn_wts = self.translate(word, get_heatmap=True)
        attn_heatmap = tf.squeeze(tf.concat(attn_wts, 0), -1).numpy()

        input_word_len = len(word)
        output_word_len = len(translated_word)
        list(word).sort()
        list(translated_word).sort()
        wandb.log({'heatmap_with_attn': wandb.plots.HeatMap(list(word), list(translated_word), attn_heatmap[:, :input_word_len], show_text=False)})
       

# Visualizing Model Outputs

In [9]:
def randomly_evaluate(model, test_file=get_data("kn")[2], n=10):

    data_Frame = pd.read_csv(test_file, sep="\t", header=None)
    data_Frame = data_Frame.sample(n=n).reset_index(drop=True)

    print(f"Randomly evaluating the model on {n} words\n")
    my_data=[]
    for i in range(n):
        word = str(data_Frame[1][i])

        print(f"Input word: {word}")
        print(f"Actual translation: {str(data_Frame[0][i])}")
        print(f"Model translation: {model.translate(word)[0]}\n")
        my_data.append([i,word,str(data_Frame[0][i]),model.translate(word)[0]])
    columns=["id","Input word", "Actual translation","Model translation"]
    my_table = wandb.Table(data=my_data, columns=columns)
    wandb.log({"table_key": my_table})


    


def train_best_model(language, embedding_dim, encoder_layers, dec_layers, define_layer, units, dropout, attention, teacher_forcing_ratio=1.0, save_outputs=None):
    
    TRAIN_INPUT, val_input, test_input = get_data(language)

    model = Seq2Seq_RNN_Model(embedding_dim, 
                         encoder_layers, 
                         dec_layers, 
                         define_layer, 
                         units,
                         dropout,
                         attention)

    dataset, input_tokenizer, targ_tokenizer = data_Preprocess(TRAIN_INPUT)
    val_dataset, _, _ = data_Preprocess(val_input, input_tokenizer, targ_tokenizer)

    model.set_vocabulary(input_tokenizer, targ_tokenizer)
    model.build(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizer = tf.keras.optimizers.Adam(),
                metric = tf.keras.metrics.SparseCategoricalAccuracy())
    
    model.fit(dataset, val_dataset, epochs=30, use_wandb=True, teacher_forcing_ratio=teacher_forcing_ratio)

    ## Character level accuracy ##
    test_dataset, _, _ = data_Preprocess(test_input, model.input_tokenizer, model.targ_tokenizer)
    test_loss, test_acc = model.evaluate(test_dataset, batch_size=100)

    ##  Word level accuracy ##
    test_input = pd.read_csv(test_input, sep="\t", header=None)
    inputs = test_input[1].astype(str).tolist()
    targets = test_input[0].astype(str).tolist()
    
    outputs = []

    for word in inputs:
        outputs.append(model.translate(word)[0])

    def word_level_acc(outputs, targets):
        return np.sum(np.asarray(outputs) == np.array(targets)) / len(outputs)

    print(f"Word level accuracy: {word_level_acc(outputs, targets)}")

    
    data_Frame = pd.DataFrame()
    data_Frame["inputs"] = inputs
    data_Frame["targets"] = targets
    data_Frame["outputs"] = outputs
    data_Frame.to_csv('predictions_attention.csv', encoding = 'utf-8-sig') 
    files.download('predictions_attention.csv')


    return model



## RUN BEST MODEL _ FUNCTION

In [10]:
model = train_best_model(language="kn",
                        embedding_dim=64,
                        encoder_layers=2,
                        dec_layers=2,
                        define_layer="LSTM",
                        units=256,
                        dropout=0.2,
                        attention=True)

----------------------------------------------------------------------------------------------------
EPOCH 1

validating_model....
Batch 1 Loss 3.9938
Batch 100 Loss 1.1259
Batch 200 Loss 0.9969
Batch 300 Loss 0.9122

Validating ...

Train Loss: 1.1046 Train Accuracy: 65.6259 Validation Loss: 2.1946 Validation Accuracy: 57.3341
EPOCH 2

validating_model....
Batch 1 Loss 0.8512
Batch 100 Loss 0.8133
Batch 200 Loss 0.8009
Batch 300 Loss 0.6969

Validating ...

Train Loss: 0.7622 Train Accuracy: 75.9704 Validation Loss: 2.0829 Validation Accuracy: 61.5177
EPOCH 3

validating_model....
Batch 1 Loss 0.6679
Batch 100 Loss 0.6277
Batch 200 Loss 0.5608
Batch 300 Loss 0.4710

Validating ...

Train Loss: 0.5386 Train Accuracy: 81.8841 Validation Loss: 1.8261 Validation Accuracy: 68.6165
EPOCH 4

validating_model....
Batch 1 Loss 0.4151
Batch 100 Loss 0.3075
Batch 200 Loss 0.2985
Batch 300 Loss 0.2474

Validating ...

Train Loss: 0.3050 Train Accuracy: 89.0632 Validation Loss: 1.5367 Validation A

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Visualizing Model Connectivity (Q6)

In [11]:
# model connectivity between input and output characters
def LSTM_estimate(dec, x, hidden, encoder_output=None):
    
    x = dec.embedding_layer(x)

    if dec.attention:
        attn_vector, attn_weights = dec.attention_layer(hidden, encoder_output)
        x = tf.concat([tf.expand_dims(attn_vector, 1), x], -1)
    else:
        attn_weights = None

    x = dec.RNN_layers[0](x, initial_state=hidden)

    for layer in dec.RNN_layers[1:]:
        x = layer(x)

    out,state = x[0], x[1:]

    #output = dec.dense(dec.flatten(output))
    
    return out,state, attn_weights

def output_embeded(encoder, x, hidden):

    x = encoder.RNN_layers[0](x, initial_state=hidden)

    for layer in encoder.RNN_layers[1:]:
        x = layer(x)

    out,state = x[0], x[1:]

    return out,state


def get_connectivity(model, word):

    word = "\t" + word + "\n"

    inputs = model.input_tokenizer.texts_to_sequences([word])
    inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs,
                                                            maxlen=model.max_input_len,
                                                            padding="post")

    result = ""

    gradient_list = []

    encoder_state = model.encoder.hidden_states(1)
    embedded_in = model.encoder.embedding(inputs)


    with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape:
        tape.watch(embedded_in)

        encoder_output, encoder_state = output_embeded(model.encoder, embedded_in, encoder_state)

        dec_state = encoder_state
        dec_input = tf.expand_dims([model.targ_tokenizer.word_index["\t"]]*1, 1)

        for t in range(1, model.max_target_len):

            LSTM_output, dec_state, _ = LSTM_estimate(model.dec, dec_input, dec_state, encoder_output)

            preds = model.dec.dense(model.dec.flatten(LSTM_output))
            gradient_list.append(tape.gradient(LSTM_output, embedded_in)[0])
            
            preds = tf.argmax(preds, 1)
            next_char = model.targ_tokenizer.index_word[preds.numpy().item()]
            result += next_char

            dec_input = tf.expand_dims(preds, 1)

            if next_char == "\n":
                return result[:-1], gradient_list[:-1]

        return result[:-1], gradient_list[:-1]

In [12]:
# Imports for visualising the model connectivity
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

from IPython.display import HTML as html_print
from IPython.display import display
import tensorflow.keras.backend as K

# get html element
def cstr(s, color='black'):
    if s == ' ':
      return "<text style=color:#000;padding-left:10px;background-color:{}> </text>".format(color, s)
    else:
      return "<text style=color:#000;background-color:{}>{} </text>".format(color, s)
	
# print html
def print_color(t):
	  display(html_print(''.join([cstr(ti, color=ci) for ti,ci in t])))

# get appropriate color for value
def get_clr(value):
    colors = ['#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8'
      '#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
      '#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
      '#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e']
    value = int(value * 19)
    if value == 19:
        value -= 1
    return colors[value]

# sigmoid function
def sigmoid(x):
    z = 1/(1 + np.exp(-x)) 
    return z

def softmax(x):
    v = np.exp(x)
    v = v / np.sum(v)
    return v

def get_gradient_norms(grad_list, word, activation="sigmoid"):
    grad_norms = []
    for grad_tensor in grad_list:
        grad_mags = tf.norm(grad_tensor, axis=1)
        grad_mags = grad_mags[:len(word)]
        if activation == "softmax":
            grad_mags_scaled = softmax(grad_mags)
        elif activation == "scaler":
            scaler = MinMaxScaler()
            grad_mags = tf.reshape(grad_mags, (-1,1))
            grad_mags_scaled = scaler.fit_transform(grad_mags)
        else:
            grad_mags_scaled = sigmoid(grad_mags)
        grad_norms.append(grad_mags_scaled)
    return grad_norms

def visualize(grad_norms, word, translated_word):
    print("Original Word:", word)
    print("Transliterated Word:", translated_word)
    for i in range(len(translated_word)):
        print("Connectivity Visualization for", translated_word[i],":")
        text_colours = []
        for j in range(len(grad_norms[i])):
            text = (word[j], get_clr(grad_norms[i][j]))
            text_colours.append(text)
        print_color(text_colours)
        data_table = wandb.Table(data=text_colours, columns=["s_ind", "t_ind"])
        fields = {
                     "s_index": "s_ind",
                     "t_index": "t_ind"

                }
        wandb.log({"heatmap": wandb.plot_table(
                       vega_spec_name="spec-visualization",
                       data_table=data_table,
                       fields=fields
                       )
                   })

def visualise_connectivity(model, word, activation="sigmoid"):
    translated_word, grad_list = get_connectivity(model, word)
    grad_norms = get_gradient_norms(grad_list, word, activation)
    visualize(grad_norms, word, translated_word)

In [13]:
def test_words_sample(n):
    test_df = pd.read_csv(get_data("kn")[2])
    test_sample = test_df.sample(n)
    test_sample.reset_index(inplace=True, drop=True)
    test_words = []
    for i in test_sample.index:
        entry = test_sample["ಅಂಗಡಿ\tangadi\t3"].loc[i]
        parts = entry.split("\t")
        word = parts[1]
        test_words.append(word)
    return test_words

test_words = test_words_sample(5)
print(test_words)
for word in test_words:
    visualise_connectivity(model, word, activation="scaler")

['anumatiyillade', 'bhinnavaagiruva', 'ranarangada', 'hanuma', 'kedet']
Original Word: anumatiyillade
Transliterated Word: ಅನುಮತಿಯಿಲ್ಲದೆ
Connectivity Visualization for ಅ :


Connectivity Visualization for ನ :


Connectivity Visualization for ು :


Connectivity Visualization for ಮ :


Connectivity Visualization for ತ :


Connectivity Visualization for ಿ :


Connectivity Visualization for ಯ :


Connectivity Visualization for ಿ :


Connectivity Visualization for ಲ :


Connectivity Visualization for ್ :


Connectivity Visualization for ಲ :


Connectivity Visualization for ದ :


Connectivity Visualization for ೆ :


Original Word: bhinnavaagiruva
Transliterated Word: ಭಿನ್ನವಾಗಿರುವ
Connectivity Visualization for ಭ :


Connectivity Visualization for ಿ :


Connectivity Visualization for ನ :


Connectivity Visualization for ್ :


Connectivity Visualization for ನ :


Connectivity Visualization for ವ :


Connectivity Visualization for ಾ :


Connectivity Visualization for ಗ :


Connectivity Visualization for ಿ :


Connectivity Visualization for ರ :


Connectivity Visualization for ು :


Connectivity Visualization for ವ :


Original Word: ranarangada
Transliterated Word: ರಣರಂಗದ
Connectivity Visualization for ರ :


Connectivity Visualization for ಣ :


Connectivity Visualization for ರ :


Connectivity Visualization for ಂ :


Connectivity Visualization for ಗ :


Connectivity Visualization for ದ :


Original Word: hanuma
Transliterated Word: ಹನುಮ
Connectivity Visualization for ಹ :


Connectivity Visualization for ನ :


Connectivity Visualization for ು :


Connectivity Visualization for ಮ :


Original Word: kedet
Transliterated Word: ಕೆದೆತ್
Connectivity Visualization for ಕ :


Connectivity Visualization for ೆ :


Connectivity Visualization for ದ :


Connectivity Visualization for ೆ :


Connectivity Visualization for ತ :


Connectivity Visualization for ್ :


## To get prediction CSV file (execute only after getting best model test accuracy)

In [None]:
test_input = pd.read_csv(get_data("kn")[2], sep="\t", header=None)
inputs = test_input[1].astype(str).tolist()
targets = test_input[0].astype(str).tolist()
outputs = []
for word in inputs:
    outputs.append(model.translate(word)[0])
data_Frame = pd.DataFrame()
data_Frame["inputs"] = inputs
data_Frame["targets"] = targets
data_Frame["outputs"] = outputs
data_Frame.to_csv('predictions_attention.csv', encoding = 'utf-8-sig') 
files.download('predictions_attention.csv')

In [None]:
randomly_evaluate(model, n=5)

# Hyper_Parameter_tuning_with_attention

In [None]:
def train_with_wandb(language):

    config_defaults = {"embedding_dim": 64, 
                   "enc_dec_layers": 1,
                   "define_layer": "LSTM",
                   "latent_dim": 128,
                   "dropout": 0,
                   "attention": False,
                   "beam_width": 3,
                   "teacher_forcing_ratio": 1.0
                   }

    #wandb.init(config=config_defaults, project='Assignment3-partB_attn', entity='cs21s002-ee21s113-dlassignment-1')

## 1. SELECT LANGUAGE ##
    TRAIN_INPUT, val_input, test_input = get_data(language)

## 2. DATA PREPROCESSING ##
    dataset, input_tokenizer, targ_tokenizer = data_Preprocess(TRAIN_INPUT)
    val_dataset, _, _ = data_Preprocess(val_input, input_tokenizer, targ_tokenizer)

## 3. CREATING THE MODEL ##
    model = Seq2Seq_RNN_Model(embedding_dim=wandb.config.embedding_dim,
                     encoder_layers=wandb.config.enc_dec_layers,
                     dec_layers=wandb.config.enc_dec_layers,
                     define_layer=wandb.config.define_layer,
                     latent_dim=wandb.config.latent_dim,
                     dropout=wandb.config.dropout,
                     attention=wandb.config.attention)

## 4. COMPILING THE MODEL 
    model.set_vocabulary(input_tokenizer, targ_tokenizer)
    model.build(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            optimizer = tf.keras.optimizers.Adam(),
            metric = tf.keras.metrics.SparseCategoricalAccuracy())

## 5. FITTING AND VALIDATING THE MODEL
    model.fit(dataset, val_dataset, epochs=30, use_wandb=True, teacher_forcing_ratio=wandb.config.teacher_forcing_ratio)


# Sweeps with Attention

In [None]:
sweep_config5 = {
                 "name": "Attention Sweep - Assignment3",
                 "description": "Hyperparameter sweep for Seq2Seq Model with Attention",
                 "method": "grid",
                 "parameters": {
                 "define_layer": {
                 "values": ["GRU", "RNN", "LSTM"]
                                 },
                 "enc_dec_layers": {
                            "values": [2]
                                   },
                 "units": {
                            "values": [256]
                           },
                 "dropout": {
                            "values": [0.2]
                            },
                 "attention": {
                            "values": [True]
                            },
                 "teacher_forcing_ratio": {
                            "values":[0.3,0.5,0.7,1.0]}
                            }
}

In [None]:
sweep_id5 = wandb.sweep(sweep_config5, project='Assignment3-partB_attn', entity='cs21s002-ee21s113-dlassignment-1')

In [None]:
#wandb.agent(sweep_id5, function=lambda: train_with_wandb("kn"), project='Assignment3-partB_attn', entity='cs21s002-ee21s113-dlassignment-1')

In [None]:
#wandb.init(config=config_defaults, project='Assignment3-partB_attn', entity='cs21s002-ee21s113-dlassignment-1')