#### CS20M059 Shibobrota Das | CS20M007 Abhishek Kumar

## Setup

In [1]:
!pip install tensorflow-addons -qqq

In [2]:
!pip install wandb -qqq

In [3]:
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import GradientTape
from tensorflow import keras
import pandas as pd
import datetime
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Embedding, LSTM, GRU, SimpleRNN, SimpleRNNCell, LSTMCell, GRUCell
from keras.models import Sequential
from keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
import time
import sys
import datetime
from sklearn.utils import shuffle
import wandb
# import nltk
import csv
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.font_manager import FontProperties

print("Using numpy:",np.__version__)
print("Using tensorflow:",tf.__version__)
print("Using tensorflow Addons:",tfa.__version__)
print("Using keras:",keras.__version__)
print("Using pandas:",pd.__version__)

Using numpy: 1.19.5
Using tensorflow: 2.4.1
Using tensorflow Addons: 0.13.0
Using keras: 2.4.0
Using pandas: 1.1.5


In [4]:
# wandb.init(project='Assignment 3', entity='iitm-cs6910-jan-may-2021-cs20m059-cs20m007')

In [5]:
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/My Drive/DL-A3 Dataset/dakshina_dataset_v1.0/hi/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/DL-A3 Dataset/dakshina_dataset_v1.0/hi


#### Load Data

In [6]:
val_df = pd.read_csv("./lexicons/hi.translit.sampled.dev.tsv", sep='\t', header=None)
train_df = pd.read_csv("./lexicons/hi.translit.sampled.train.tsv", sep='\t', header=None)
test_df = pd.read_csv("./lexicons/hi.translit.sampled.test.tsv", sep='\t', header=None)
print("Data Loaded to Dataframes!")

Data Loaded to Dataframes!


In [7]:
%cd '/content/drive/My Drive/A3-checkpoints/'

/content/drive/My Drive/A3-checkpoints


#### Dataset Samples

In [8]:
train_df.sample(n=3)

Unnamed: 0,0,1,2
11951,चिटफंड,chitfund,3
21380,पंखा,pankhaa,1
43110,स्वीकृति,swikriti,1


## Preparing Dataset

In [9]:
sos = "@"
eos = "#"

In [10]:
class LexDataset:
    def __init__(self, input_tensor, target_tensor, batch_size):
        self.input_tensor = input_tensor
        self.target_tensor = target_tensor
        self.batch = tf.data.Dataset.from_tensor_slices((self.input_tensor, self.target_tensor)).shuffle(len(self.input_tensor)).batch(batch_size, drop_remainder=True)

In [11]:
class TransliterationDatatset:
    def __init__(self, df_list, batch_size = 64):
        
        self.input_tokenizer = None
        self.target_tokenizer = None
        self.train = None
        self.val = None
        self.test = None
        self.batch_size = batch_size
        # Load Data
        self.load_dataset(df_list)
        # Other parameters
        self.num_input_tokens = len(self.input_tokenizer.index_word)+1
        self.num_target_tokens = len(self.target_tokenizer.index_word)+1
        self.max_input_seq_length = np.max([self.train.input_tensor.shape[1], self.val.input_tensor.shape[1], self.test.input_tensor.shape[1]])
        self.max_target_seq_length = np.max([self.train.target_tensor.shape[1], self.val.target_tensor.shape[1], self.test.target_tensor.shape[1]])
        
    def preprocess_word(self, w):
        return sos + str(w) + eos
    
    def print_input(self, tensor):
        for t in tensor:
            if t != 0:
                print(f'{t} ----> {self.input_tokenizer.index_word[t]}')

    def get_target_word(self, tensor):
        word = []
        for t in tensor:
            if t != 0:
                word.append(self.input_tokenizer.index_word[t])
        return "".join([ch for ch in word])
                
    def print_target(self, tensor):
        for t in tensor:
            if t != 0:
                print(f'{t} ----> {self.target_tokenizer.index_word[t]}')
    
    def create_dataset(self, data_frame):
        input_words = []
        target_words = []
        # Shuffle the data_frame before creating dataset
        df = data_frame
        for i in range(5):
            df = shuffle(df)
        for x, y in zip(df[1], df[0]):
            input_words.append(self.preprocess_word(x))
            target_words.append(self.preprocess_word(y))
        return (input_words, target_words)
    
    def load_dataset(self, df_list):
        # df_list should have train -> val -> test in sequence
        
        self.input_tokenizer = Tokenizer(num_words = None, char_level = True)
        self.target_tokenizer = Tokenizer(num_words = None, char_level = True)
        
        ds_list = []
        
        for df in df_list:
            # Get the words list
            (input_words, target_words) = self.create_dataset(df)
            # Fit on the set of words
            self.input_tokenizer.fit_on_texts(input_words)
            self.target_tokenizer.fit_on_texts(target_words)
            ds_list.append((input_words, target_words))
                    
        self.target_tokenizer.index_word.update({0:" "})
        self.input_tokenizer.index_word.update({0:" "})
        
        input_word_len = []
        target_word_len = []
        
        tensor_list = []
        
        for i, (input_words, target_words) in enumerate(ds_list):
            input_tensor = self.input_tokenizer.texts_to_sequences(input_words)
            target_tensor = self.target_tokenizer.texts_to_sequences(target_words)
            tensor_list.append((input_tensor, target_tensor))
            input_word_len.append(np.max([len(x) for x in input_tensor]))
            target_word_len.append(np.max([len(x) for x in target_tensor]))
        
        for i, (input_tensor, target_tensor) in enumerate(tensor_list):
            
            input_tensor = pad_sequences(input_tensor, padding='post', maxlen = np.max(input_word_len))
            target_tensor = pad_sequences(target_tensor, padding='post', maxlen = np.max(target_word_len))
            
            if i == 0:
                self.train = LexDataset(input_tensor, target_tensor, self.batch_size)
            elif i == 1:
                self.val = LexDataset(input_tensor, target_tensor, self.batch_size)
            else:
                self.test = LexDataset(input_tensor, target_tensor, self.batch_size)

In [12]:
dataset = TransliterationDatatset([train_df, val_df, test_df], 128)

#### Training Data

In [13]:
# Training data
dataset.train.input_tensor.shape, dataset.train.target_tensor.shape

((44204, 22), (44204, 21))

#### Validation Data

In [14]:
# Validation data
dataset.val.input_tensor.shape, dataset.val.target_tensor.shape

((4358, 22), (4358, 21))

#### Test Data

In [15]:
# Test data
dataset.test.input_tensor.shape, dataset.test.target_tensor.shape

((4502, 22), (4502, 21))

#### Number of Tokens

In [16]:
# Number of tokens
dataset.num_input_tokens, dataset.num_target_tokens

(30, 67)

#### Maximum Sequence Lengths

In [17]:
# max seq length
dataset.max_input_seq_length, dataset.max_target_seq_length

(22, 21)

#### Example batch - dataset

In [18]:
# example_input_batch, example_target_batch = next(iter(dataset.train.batch))
# example_input_batch.shape, example_target_batch.shape

In [19]:
# dataset.print_input(example_input_batch[2].numpy())

In [20]:
# dataset.print_target(example_target_batch[2].numpy())

## Encoder Model

In [21]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz, dropout=0.2, layer_type="GRU", num_layers=1):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.layer_type = layer_type
        self.num_layers = num_layers
        self.dropout = dropout
        self.rnn_layers = []

        if self.layer_type == "LSTM":
            for i in range(self.num_layers):
                self.rnn_layers.append(LSTM(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       dropout = self.dropout,
                                       recurrent_initializer='glorot_uniform'))
        
        elif self.layer_type == "GRU":
            for i in range(self.num_layers):
                self.rnn_layers.append(GRU(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       dropout = self.dropout,
                                       recurrent_initializer='glorot_uniform'))
        
        else:
            for i in range(self.num_layers):
                self.rnn_layers.append(SimpleRNN(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       dropout = self.dropout,
                                       recurrent_initializer='glorot_uniform'))        


    def call(self, inputs, hidden):
        inputs = self.embedding(inputs)
        state_h, state_c = [], []

        if self.layer_type == "LSTM":
            output, h, c = self.rnn_layers[0](inputs, initial_state = hidden)
            state_h.append(h)
            state_c.append(c)
            for i in range(1, self.num_layers):
                output, h, c = self.rnn_layers[i](output, initial_state = hidden)
                state_h.append(h)
                state_c.append(c)
            return output, state_h, state_c
        
        elif self.layer_type == "GRU":
            output, h = self.rnn_layers[0](inputs, initial_state = hidden)
            state_h.append(h)
            for i in range(1, self.num_layers):
                output, h = self.rnn_layers[i](output, initial_state = hidden)
                state_h.append(h)
            return output, state_h, None
           
        else:
            output, h = self.rnn_layers[0](inputs, initial_state = hidden)
            state_h.append(h)
            for i in range(1, self.num_layers):
                output, h = self.rnn_layers[i](output, initial_state = hidden)
                state_h.append(h)
            return output, state_h, None

    def initialize_hidden_state(self):
        if self.layer_type == "LSTM":
            return [tf.zeros((self.batch_sz, self.enc_units)), tf.zeros((self.batch_sz, self.enc_units))]
        else:
            return tf.zeros((self.batch_sz, self.enc_units))

### Test Encoder

In [22]:
# vocab_inp_size = dataset.num_input_tokens
# embedding_dim = 64
# units = 256
# BATCH_SIZE = dataset.batch_size

In [23]:
# encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE, 0.2, "GRU", 3)

# # sample input
# sample_hidden = encoder.initialize_hidden_state()
# sample_output, sample_hidden, sample_cell = encoder(example_input_batch, sample_hidden)
# print('Encoder output shape: (batch size, sequence length, units)', np.shape(sample_output))
# print('Encoder Hidden state shape: (batch size, units)', np.shape(sample_hidden))
# if encoder.layer_type == "LSTM":
#     print ('Encoder c vector shape: (batch size, units) {}'.format(np.shape(sample_cell)))

## Decoder Model 

In [24]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz, dropout=0.2, layer_type="GRU", num_layers=1):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.layer_type = layer_type
        self.num_layers = num_layers
        self.dropout = dropout
        self.rnn_layers = []
        
        if self.layer_type == "LSTM":
            for i in range(self.num_layers):
                self.rnn_layers.append(LSTM(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       dropout = self.dropout,
                                       recurrent_initializer='glorot_uniform'))
        
        elif self.layer_type == "GRU":
            for i in range(self.num_layers):
                self.rnn_layers.append(GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       dropout = self.dropout,
                                       recurrent_initializer='glorot_uniform'))
           
        else:
            for i in range(self.num_layers):
                self.rnn_layers.append(SimpleRNN(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       dropout = self.dropout,
                                       recurrent_initializer='glorot_uniform'))
        
        self.fc = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, state_h, state_c=None):
        inputs = self.embedding(inputs)

        if self.layer_type == "LSTM":
            output, h, c = self.rnn_layers[0](inputs, initial_state = [state_h[0], state_c[0]])
            for i in range(1, self.num_layers):
                output, h, c = self.rnn_layers[i](output, initial_state = [state_h[i], state_c[i]])
        
        elif self.layer_type == "GRU":
            output, h = self.rnn_layers[0](inputs, initial_state = state_h[0])
            for i in range(1, self.num_layers):
                output, h = self.rnn_layers[i](output, initial_state = state_h[i])
           
        else:
            output, h = self.rnn_layers[0](inputs, initial_state = state_h[0])
            for i in range(1, self.num_layers):
                output, h = self.rnn_layers[i](output, initial_state = state_h[i])

        # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))

        # output shape == (batch_size, vocab)
        x = self.fc(output)

        # return x, state
        if self.layer_type != "LSTM":
            return x, h, None
        else:
            return x, h, c

### Test Decoder

In [25]:
# vocab_tar_size = dataset.num_target_tokens

In [26]:
# decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, 0.2, "GRU", 3)

# if decoder.layer_type != "LSTM":
#     sample_decoder_output, sample_decoder_hidden, _ = decoder(tf.random.uniform((BATCH_SIZE, 1)), sample_hidden, sample_cell)
# else:
#     sample_decoder_output, sample_decoder_hidden, sample_decoder_cell = decoder(tf.random.uniform((BATCH_SIZE, 1)), sample_hidden, sample_cell)

# print('Decoder output shape: (batch_size, vocab size)', sample_decoder_output.shape)
# print('Decoder Hidden state shape: (batch size, units)', sample_decoder_hidden.shape)
# if encoder.layer_type == "LSTM":
#     print ('Encoder c vector shape: (batch size, units) {}'.format(sample_decoder_cell.shape))

## loss function

In [27]:
def loss_function(real, pred, loss_object):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

## Accuracy (Word level)

In [28]:
def accuracy(real, pred):
    real = tf.cast(real, tf.int32)
    pred = tf.cast(pred, tf.int32)
    return tf.reduce_mean(tf.cast(tf.equal(real, pred), tf.float32))

## Training

In [29]:
def train_one_step():
    @tf.function
    def train_step(encoder, decoder, inp, targ, enc_hidden, optimizer, loss_object, is_val = False):
        loss = 0
        spc_loss = 0
        
        with tf.GradientTape() as tape:
            enc_output, enc_hidden, enc_cell = encoder(inp, enc_hidden)
            if decoder.layer_type != "LSTM":
                dec_hidden = enc_hidden
            else:
                dec_hidden, dec_cell = enc_hidden, enc_cell

            dec_input = tf.expand_dims([dataset.target_tokenizer.word_index[sos]] * dataset.batch_size, 1)

            pred = None

            # Teacher forcing - feeding the target as the next input
            for t in range(1, targ.shape[1]):
                
                # passing enc_output to the decoder
                if decoder.layer_type != "LSTM":
                    predictions, _, _ = decoder(dec_input, dec_hidden)
                else:
                    predictions, _, _ = decoder(dec_input, dec_hidden, dec_cell)
                            
                loss += loss_function(targ[:, t], predictions, loss_object)

                # using teacher forcing
                dec_input = tf.expand_dims(targ[:, t], 1)

                if t == 1:
                    pred = tf.expand_dims(tf.argmax(predictions, axis=-1), 1)
                else:
                    pred = tf.concat([pred, tf.expand_dims(tf.argmax(predictions, axis=-1), 1)], 1)
        
        batch_accuracy = accuracy(targ[:, 1:], pred)

        batch_loss = (loss / int(targ.shape[1]))

        if not is_val:
            variables = encoder.trainable_variables + decoder.trainable_variables

            gradients = tape.gradient(loss, variables)

            optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss, batch_accuracy
    return train_step

In [40]:
default_config = {
    "layer_type": "LSTM",
    "units": 256,
    "embedding_dim": 16,
    "optimiser": "nadam",
    "epochs": 20,
    "dropout": 0.0,
    "batch_size": dataset.batch_size,
    "num_layers": 1
}

In [41]:
def log_wandb(data):
    wandb.log(data)

In [42]:
sweep_config = {
    "name": "Assignment 3 - Without Attention " + str(datetime.datetime.now().replace(microsecond=0).isoformat()),
    "method": "random",
    "metric":{
        "name": "loss",
        "goal": "minimize"
    },
    "project": 'Assignment 3',
    "parameters": {
        "layer_type": {
            "values": ["GRU", "LSTM", "SimpleRNN"]
        },
        "dropout": {
            "values": [0.0, 0.2]
        },
        "units": {
            "values": [64, 256]
        },
        "embedding_dim": {
            "values": [16, 64]
        },
        "optimiser": {
            "values": ["nadam"]
        },
        "epochs": {
            "values": [20]
        },
        "batch_size": {
            "values": [dataset.batch_size]
        },
        "num_layers": {
            "values": [1, 2]
        }
    }
}

In [50]:
def train(dataset, config, callback=None):

    run_name = "".join(f"{a}:{b} " for (a, b) in config.items())
    print(run_name)
    wandb.run.name = run_name

    train_dataset = dataset.train
    val_dataset = dataset.val

    optimizer = tf.keras.optimizers.Nadam()
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

    EPOCHS = config["epochs"]
    BATCH_SIZE = config["batch_size"]
    steps_per_epoch = len(train_dataset.input_tensor)//BATCH_SIZE
    val_steps_per_epoch = len(val_dataset.input_tensor)//BATCH_SIZE
    embedding_dim = config["embedding_dim"]     
    units = config["units"]
    layer_type = config["layer_type"]
    num_layers = config["num_layers"]
    dropout = config["dropout"]

    # Encoder
    encoder = Encoder(dataset.num_input_tokens, embedding_dim, units, BATCH_SIZE, dropout, layer_type, num_layers)
    # Decoder
    decoder = Decoder(dataset.num_target_tokens, embedding_dim, units, BATCH_SIZE, dropout, layer_type, num_layers)

    train_step = train_one_step()

    for epoch in range(EPOCHS):
        start = time.time()

        enc_hidden = encoder.initialize_hidden_state()
        total_loss = 0
        total_accuracy = 0
        val_total_loss = 0
        val_total_accuracy = 0

        train_dataset.batch.shuffle(BATCH_SIZE*10)

        for (batch, (inp, targ)) in enumerate(train_dataset.batch.take(steps_per_epoch)):
            # Step Train
            batch_loss, batch_accuracy = train_step(encoder, decoder, inp, targ, enc_hidden, optimizer, loss_object, False)
            total_loss += batch_loss
            total_accuracy += batch_accuracy
            if batch % 100 == 0 or batch == steps_per_epoch-1:
                print(f'Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f} Accuracy {batch_accuracy:.4f}')
            
            if callback != None:
                callback({"epoch":epoch+1, "loss": batch_loss.numpy(), "accuracy":batch_accuracy})

        print(f'Epoch {epoch+1} Loss {total_loss/steps_per_epoch:.4f} Acc {total_accuracy/steps_per_epoch:.4f}')
        print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')
        if callback != None:
            callback({"ep_training_loss": total_loss/steps_per_epoch, "ep_training_accuracy": total_accuracy/steps_per_epoch})

        val_dataset.batch.shuffle(BATCH_SIZE*10)

        for (batch, (inp, targ)) in enumerate(val_dataset.batch.take(val_steps_per_epoch)):
            val_batch_loss, val_batch_accuracy = train_step(encoder, decoder, inp, targ, enc_hidden, optimizer, loss_object, True)
            val_total_loss += val_batch_loss
            val_total_accuracy += val_batch_accuracy

            if batch % 100 == 0 or batch == val_steps_per_epoch-1:
                print(f'Epoch {epoch+1} Batch {batch} Val Loss {val_batch_loss.numpy():.4f} Val Accuracy {val_batch_accuracy:.4f}')

            if callback != None:
                callback({"epoch":epoch+1, "val loss": batch_loss.numpy(), "val accuracy":batch_accuracy})

        print(f'Epoch {epoch+1} Val Loss {val_total_loss/val_steps_per_epoch:.4f} Val Acc {val_total_accuracy/val_steps_per_epoch:.4f}')
        print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')

        if callback != None:
            callback({"ep_val_loss": val_total_loss/val_steps_per_epoch, "ep_val_accuracy": val_total_accuracy/val_steps_per_epoch})

    return encoder, decoder

In [51]:
# enc, dec = train(dataset, default_config)

In [52]:
def sweep():

    wandb.init(config=default_config, magic=True, project='Assignment 3', entity='iitm-cs6910-jan-may-2021-cs20m059-cs20m007')
    config = wandb.config
    
    encoder, decoder = train(dataset, config, log_wandb)

In [54]:
# sweep_id = wandb.sweep(sweep_config, project='Assignment 3', entity='iitm-cs6910-jan-may-2021-cs20m059-cs20m007')

Create sweep with ID: ybc5yonl
Sweep URL: https://wandb.ai/iitm-cs6910-jan-may-2021-cs20m059-cs20m007/Assignment%203/sweeps/ybc5yonl


In [None]:
wandb.agent("ybc5yonl", function=sweep, project='Assignment 3', entity='iitm-cs6910-jan-may-2021-cs20m059-cs20m007')

[34m[1mwandb[0m: Agent Starting Run: ijq9tksi with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	layer_type: LSTM
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimiser: nadam
[34m[1mwandb[0m: 	units: 256


batch_size:128 dropout:0 embedding_dim:16 epochs:20 layer_type:LSTM num_layers:1 optimiser:nadam units:256 
Epoch 1 Batch 0 Loss 1.4784 Accuracy 0.0035
Epoch 1 Batch 100 Loss 1.0871 Accuracy 0.0594
Epoch 1 Batch 200 Loss 0.9768 Accuracy 0.0766
Epoch 1 Batch 300 Loss 1.0164 Accuracy 0.0836
Epoch 1 Batch 344 Loss 1.0070 Accuracy 0.0855
Epoch 1 Loss 1.0583 Acc 0.0700
Time taken for 1 epoch 146.26 sec

Epoch 1 Batch 0 Val Loss 0.9516 Val Accuracy 0.0750
Epoch 1 Batch 33 Val Loss 0.9307 Val Accuracy 0.0781
Epoch 1 Val Loss 0.9458 Val Acc 0.0786
Time taken for 1 epoch 159.42 sec

Epoch 2 Batch 0 Loss 1.0499 Accuracy 0.0828
Epoch 2 Batch 100 Loss 0.9834 Accuracy 0.0957
Epoch 2 Batch 200 Loss 0.9769 Accuracy 0.0910
Epoch 2 Batch 300 Loss 0.8613 Accuracy 0.0949
Epoch 2 Batch 344 Loss 0.9172 Accuracy 0.0996
Epoch 2 Loss 0.9308 Acc 0.0939
Time taken for 1 epoch 114.87 sec

Epoch 2 Batch 0 Val Loss 0.8646 Val Accuracy 0.0973
Epoch 2 Batch 33 Val Loss 0.8663 Val Accuracy 0.0898
Epoch 2 Val Loss 0.8

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,20.0
loss,0.24678
accuracy,0.29102
_runtime,2451.0
_timestamp,1621864590.0
_step,7619.0
ep_training_loss,0.23711
ep_training_accuracy,0.27949
val loss,0.24678
val accuracy,0.29102


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,█▇▆▆▆▅▅▅▄▄▄▄▄▄▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁
accuracy,▁▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
ep_training_loss,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁
ep_training_accuracy,▁▂▂▃▄▄▄▅▅▆▆▆▇▇▇▇████
val loss,██▇▇▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val accuracy,▁▁▁▁▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▆▆▇▇▇▇██


[34m[1mwandb[0m: Agent Starting Run: 0kdteojo with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	layer_type: LSTM
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimiser: nadam
[34m[1mwandb[0m: 	units: 64


batch_size:128 dropout:0 embedding_dim:16 epochs:20 layer_type:LSTM num_layers:1 optimiser:nadam units:64 
Epoch 1 Batch 0 Loss 1.4374 Accuracy 0.0223
Epoch 1 Batch 100 Loss 1.1776 Accuracy 0.0500
Epoch 1 Batch 200 Loss 1.0650 Accuracy 0.0629
Epoch 1 Batch 300 Loss 1.0308 Accuracy 0.0785
Epoch 1 Batch 344 Loss 0.9947 Accuracy 0.0781
Epoch 1 Loss 1.1034 Acc 0.0621
Time taken for 1 epoch 66.52 sec

Epoch 1 Batch 0 Val Loss 1.0062 Val Accuracy 0.0723
Epoch 1 Batch 33 Val Loss 0.9838 Val Accuracy 0.0742
Epoch 1 Val Loss 0.9818 Val Acc 0.0713
Time taken for 1 epoch 79.91 sec

Epoch 2 Batch 0 Loss 1.0072 Accuracy 0.0703
Epoch 2 Batch 100 Loss 0.9795 Accuracy 0.0910
Epoch 2 Batch 200 Loss 0.9775 Accuracy 0.0859
Epoch 2 Batch 300 Loss 0.9655 Accuracy 0.0848
Epoch 2 Batch 344 Loss 0.9685 Accuracy 0.0844
Epoch 2 Loss 0.9821 Acc 0.0835
Time taken for 1 epoch 21.34 sec

Epoch 2 Batch 0 Val Loss 0.9111 Val Accuracy 0.0812
Epoch 2 Batch 33 Val Loss 0.9357 Val Accuracy 0.0832
Epoch 2 Val Loss 0.9207 

## Translate

In [34]:
def evaluate(sentence):
    attention_plot = np.zeros((dataset.max_target_seq_length, dataset.max_input_seq_length))

    sentence = dataset.preprocess_word(sentence)

    inputs = [dataset.input_tokenizer.word_index[i] for i in sentence]
    inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=dataset.max_input_seq_length,
                                                         padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''

    if encoder.layer_type != "LSTM":
        hidden = [tf.zeros((1, units))]
    else:
        hidden = [tf.zeros((1, units)), tf.zeros((1, units))]
    
    enc_out, enc_hidden, enc_cell = encoder(inputs, hidden)
    
    if decoder.layer_type != "LSTM":
        dec_hidden = enc_hidden
    else:
        dec_hidden, dec_cell = enc_hidden, enc_cell

    dec_input = tf.expand_dims([dataset.target_tokenizer.word_index[sos]], 0)

    for t in range(dataset.max_target_seq_length):
        # passing enc_output to the decoder
        if decoder.layer_type != "LSTM":
            predictions, _, _ = decoder(dec_input, dec_hidden)
        else:
            predictions, _, _ = decoder(dec_input, dec_hidden, dec_cell)
                
        
        predicted_id = tf.argmax(predictions[0]).numpy()
        
        result += dataset.target_tokenizer.index_word[predicted_id]

        if dataset.target_tokenizer.index_word[predicted_id] == eos:
            return result, sentence

        # the predicted ID is fed back into the model
        dec_input = tf.expand_dims([predicted_id], 0)

    return result, sentence

In [35]:
def translate(sentence):
    result, sentence = evaluate(sentence)

    print('Input:', sentence)
    print('Predicted translation:', result)

In [39]:
translate("false")

Input: @false#
Predicted translation: फालस#


In [None]:
sequence = dataset.input_tokenizer.texts_to_sequences("shibobrota")
np.reshape(sequence, len(sequence))

array([ 9,  6,  5, 19, 11, 19,  7, 11,  8,  1])

In [None]:
text = dataset.input_tokenizer.sequences_to_texts(sequence)
text

['s', 'h', 'i', 'b', 'o', 'b', 'r', 'o', 't', 'a']

In [None]:
def save_predictions(data_frame, name):
    accuracy_count = 0;
    with open(name, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["INPUT", "PREDICTION", "TRUE"])
        for i, (inp, trg) in enumerate(zip(data_frame[1], data_frame[0])): 
            result, sentence, attention_plot = evaluate(inp)
            writer.writerow([inp, result[:-1], trg])
            print(inp, result[:-1], trg)
            if result[:-1] == trg:
                accuracy_count += 1
            if (i+1) % 100 == 0 or i+1 == data_frame.size:
                print("Accuracy", (accuracy_count / (i+1)))

    return accuracy_count/data_frame.size

In [None]:
save_predictions(test_df, "new_without_attn_predictions.csv")

In [None]:
dataset.target_tokenizer.word_index[sos], dataset.target_tokenizer.word_index[eos]

(1, 2)