<a href="https://colab.research.google.com/github/anandhc6/Assignment-3/blob/main/seq2seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Required packages

import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import backend
from random import randrange 
from tensorflow import keras
from google.colab import files
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from tensorflow.python.keras.models import load_model
from tensorflow.python.keras.callbacks import EarlyStopping

In [None]:
# Downloading dataset

!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xf 'dakshina_dataset_v1.0.tar'

--2022-05-07 12:42:49--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.45.112, 172.217.15.80, 172.253.62.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.45.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2022-05-07 12:43:02 (142 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]



In [None]:
 # Paths of train and valid datasets.
train_data_path = "dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.train.tsv"
val_data_path = "dakshina_dataset_v1.0/ta/lexicons/ta.translit.sampled.dev.tsv"

# Saving the files in list
with open(train_data_path, "r", encoding="utf-8") as file:
    train_data_lines = file.read().split("\n")

with open(val_data_path, "r", encoding="utf-8") as file:
    val_data_lines = file.read().split("\n")

# popping the empty character of the lists 
val_data_lines.pop()
train_data_lines.pop()

# Fixed parameter
batch_size = 64 


In [None]:
# embedding train data

def embed_train_data(train_data_lines):

    lenk = len(train_data_lines) - 1
    train_input_data = []
    train_target_data = []
    input_data_characters = set()
    target_data_characters = set()
    
    for line in train_data_lines[: lenk]:
        target_data, input_data, _ = line.split("\t")

        # We are using "tab" as the "start sequence" and "\n" as "end sequence".
        target_data = "\t" + target_data + "\n"
        train_input_data.append(input_data)
        train_target_data.append(target_data)

        # Finding unique characters.
        for ch in input_data:
            if ch not in input_data_characters:
                input_data_characters.add(ch)
        for ch in target_data:
            if ch not in target_data_characters:
                target_data_characters.add(ch)

    print("Number of samples:", len(train_input_data))
    # adding space 
    input_data_characters.add(" ")
    target_data_characters.add(" ")

    # sorting
    input_data_characters = sorted(list(input_data_characters))
    target_data_characters = sorted(list(target_data_characters))

    # maximum length of the words
    encoder_max_length = max([len(txt) for txt in train_input_data])
    decoder_max_length = max([len(txt) for txt in train_target_data])

    print("Max sequence length for inputs:", encoder_max_length)
    print("Max sequence length for outputs:", decoder_max_length)

    # number of input and target characters
    num_encoder_tokens = len(input_data_characters)
    num_decoder_tokens = len(target_data_characters)  
    
    print("Number of unique input tokens:", num_encoder_tokens)
    print("Number of unique output tokens:", num_decoder_tokens)

    # create an index
    input_token_idx = dict([(char, i) for i, char in enumerate(input_data_characters)])
    target_token_idx = dict([(char, i) for i, char in enumerate(target_data_characters)])
   
    # creating 0 array for encoder,decoder 
    encoder_input_data = np.zeros((len(train_input_data), encoder_max_length), dtype="float32")

    decoder_input_data = np.zeros((len(train_input_data), decoder_max_length), dtype="float32")

    decoder_target_data = np.zeros((len(train_input_data), decoder_max_length, num_decoder_tokens), dtype="float32")

    # index of the character is encoded for all the sample whereas target data is one hot encoded.
    for i, (input_data, target_data) in enumerate(zip(train_input_data, train_target_data)):
        for t, char in enumerate(input_data):
            encoder_input_data[i, t] = input_token_idx[char]
        
        encoder_input_data[i, t + 1:] = input_token_idx[" "]
        
        # decoder data
        for t, char in enumerate(target_data):
            # decoder_target_data is one timestep ahead of decoder_input_data
            decoder_input_data[i, t] = target_token_idx[char]

            if t > 0:
                # excluding the start character since decoder target data is one timestep ahead.
                decoder_target_data[i, t - 1, target_token_idx[char]] = 1.0
        # append the remaining positions with empty space
       
        decoder_input_data[i, t + 1:] = target_token_idx[" "]
        decoder_target_data[i, t:, target_token_idx[" "]] = 1.0

    return encoder_input_data,decoder_input_data,decoder_target_data,num_encoder_tokens,num_decoder_tokens,input_token_idx,target_token_idx,encoder_max_length,decoder_max_length


In [None]:
# embedding validation data

def embed_val_data(val_data_lines,num_decoder_tokens,input_token_idx,target_token_idx):
    val_input_data = []
    val_target_data = []
    lenk = len(val_data_lines) - 1

    for line in val_data_lines[: lenk]:
        target_data, input_data, _ = line.split("\t")
        
        # We use "tab" as the "start sequence" character and "\n" as "end sequence" character.
        target_data = "\t" + target_data + "\n"
        val_input_data.append(input_data)
        val_target_data.append(target_data)

    val_encoder_max_length = max([len(txt) for txt in val_input_data])
    val_decoder_max_length = max([len(txt) for txt in val_target_data])

    val_encoder_input_data = np.zeros((len(val_input_data), val_encoder_max_length), dtype="float32")
    val_decoder_input_data = np.zeros((len(val_input_data), val_decoder_max_length), dtype="float32")
    val_decoder_target_data = np.zeros((len(val_input_data), val_decoder_max_length, num_decoder_tokens), dtype="float32")

    for i, (input_data, target_data) in enumerate(zip(val_input_data, val_target_data)):
        for t, ch in enumerate(input_data):
            val_encoder_input_data[i, t] = input_token_idx[ch]
        val_encoder_input_data[i, t + 1:] = input_token_idx[" "]
        
        for t, ch in enumerate(target_data):
            # decoder_target_data is one timestep ahead of decoder_input_data
            val_decoder_input_data[i, t] = target_token_idx[ch]
            if t > 0:
                # excluding the start character since decoder target data is one timestep ahead.
                val_decoder_target_data[i, t - 1, target_token_idx[ch]] = 1.0
       
        val_decoder_input_data[i, t + 1:] = target_token_idx[" "]
        val_decoder_target_data[i, t:, target_token_idx[" "]] = 1.0

    return val_encoder_input_data,val_decoder_input_data,val_decoder_target_data,target_token_idx,val_target_data


In [None]:
# Embedding data
encoder_input_data,decoder_input_data,decoder_target_data,num_encoder_tokens,num_decoder_tokens,input_token_idx,target_token_idx,encoder_max_length,decoder_max_length = embed_train_data(train_data_lines)

val_encoder_input_data,val_decoder_input_data,val_decoder_target_data,target_token_idx,val_target_data = embed_val_data(val_data_lines,num_decoder_tokens,input_token_idx,target_token_idx)

reverse_input_char_index = dict((i, char) for char, i in input_token_idx.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_idx.items())


Number of samples: 68217
Max sequence length for inputs: 30
Max sequence length for outputs: 28
Number of unique input tokens: 27
Number of unique output tokens: 49


In [None]:
# RNN model

def seq2seq(embedding_size, n_encoder_tokens, n_decoder_tokens, n_encoder_layers,n_decoder_layers, latent_dimension, cell_type,
            target_token_idx, decoder_max_length, reverse_target_char_index,dropout,encoder_input_data, decoder_input_data,
            decoder_target_data,batch_size,epochs):
  
  encoder_inputs = keras.Input(shape=(None,), name='encoder_input')

  encoder = None
  encoder_outputs = None
  state_h = None
  state_c = None
  e_layer= n_encoder_layers
  
  # RNN

  if cell_type=="RNN":
    embed = tf.keras.layers.Embedding(input_dim=n_encoder_tokens, output_dim=embedding_size,name='encoder_embedding')(encoder_inputs)
    encoder = keras.layers.SimpleRNN(latent_dimension, return_state=True, return_sequences=True,name='encoder_hidden_1', dropout=dropout)
    print("Embed done")
    encoder_outputs, state_h = encoder(embed)
    
    for i in range(2,e_layer+1):
      layer_name = ('encoder_hidden_%d') % i
      print("Starting 2nd")
      encoder = keras.layers.SimpleRNN(latent_dimension, return_state=True, return_sequences=True,name=layer_name, dropout=dropout)
      print("Ending 2nd")

      encoder_outputs, state_h = encoder(encoder_outputs, initial_state=[state_h])

    encoder_states = None
    encoder_states = [state_h]
    decoder_inputs = keras.Input(shape=(None,), name='decoder_input')
    embed_dec = tf.keras.layers.Embedding(n_decoder_tokens, embedding_size, name='decoder_embedding')(decoder_inputs)
    
    # number of decoder layers
    d_layer = n_decoder_layers
    decoder = None
    decoder = keras.layers.SimpleRNN(latent_dimension, return_sequences=True, return_state=True,name='decoder_hidden_1', dropout=dropout)
    
    # initial state of decoder is encoder's last state of last layer
    decoder_outputs, _ = decoder(embed_dec, initial_state=encoder_states)
    for i in range(2,d_layer+1):
      layer_name = 'decoder_hidden_%d' % i
      decoder = keras.layers.SimpleRNN(latent_dimension, return_sequences=True, return_state=True,name=layer_name, dropout=dropout)
      decoder_outputs, _ = decoder(decoder_outputs, initial_state=encoder_states)
    
    decoder_dense = keras.layers.Dense(n_decoder_tokens, activation="softmax", name='decoder_output')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
    
    model.compile(optimizer="rmsprop", loss="categorical_crossentropy",metrics=['accuracy'])                 

    model.fit(
          [encoder_input_data, decoder_input_data],
          decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=WandbCallback()
      )
    
    # Inference model
    encoder_inputs = model.input[0]
    encoder_outputs, state_h_enc = model.get_layer('encoder_hidden_' + str(n_encoder_layers)).output
    encoder_states = [state_h_enc]
    encoder_model = keras.Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1] 
    decoder_outputs = model.get_layer('decoder_embedding')(decoder_inputs)
    decoder_states_inputs = []
    decoder_states = []

    for j in range(1, n_decoder_layers + 1):
        decoder_state_input_h = keras.Input(shape=(latent_dimension,))
        current_states_inputs = [decoder_state_input_h]
        decoder = model.get_layer('decoder_hidden_' + str(j))
        decoder_outputs, state_h_dec = decoder(decoder_outputs, initial_state=current_states_inputs)
        decoder_states += [state_h_dec]
        decoder_states_inputs += current_states_inputs

    decoder_dense = model.get_layer('decoder_output')
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    return encoder_model, decoder_model

  # GRU

  elif cell_type=="GRU":
    embed = tf.keras.layers.Embedding(input_dim=n_encoder_tokens, output_dim=embedding_size,name='encoder_embedding')(encoder_inputs)
    encoder = keras.layers.GRU(latent_dimension, return_state=True, return_sequences=True,name='encoder_hidden_1', dropout=dropout)
    encoder_outputs, state_h = encoder(embed)
    
    for i in range(2,e_layer+1):
      layer_name = ('encoder_hidden_%d') % i
      encoder = keras.layers.GRU(latent_dimension, return_state=True, return_sequences=True,name=layer_name, dropout=dropout)
      encoder_outputs, state_h = encoder(encoder_outputs, initial_state=[state_h])
    
    encoder_states = None
    encoder_states = [state_h]
    decoder_inputs = keras.Input(shape=(None,), name='decoder_input')
    embed_dec = tf.keras.layers.Embedding(n_decoder_tokens, embedding_size, name='decoder_embedding')(decoder_inputs)
    
    # number of decoder layers
    d_layer = n_decoder_layers
    decoder = None
    decoder = keras.layers.GRU(latent_dimension, return_sequences=True, return_state=True,name='decoder_hidden_1', dropout=dropout)
    
    # initial state of decoder is encoder's last state of last layer
    decoder_outputs, _ = decoder(embed_dec, initial_state=encoder_states)
    for i in range(2,d_layer+1):
      layer_name = 'decoder_hidden_%d' % i
      decoder = keras.layers.GRU(latent_dimension, return_sequences=True, return_state=True,name=layer_name, dropout=dropout)
      decoder_outputs, _ = decoder(decoder_outputs, initial_state=encoder_states)
    
    decoder_dense = keras.layers.Dense(n_decoder_tokens, activation="softmax", name='decoder_output')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    model.compile(optimizer="rmsprop", loss="categorical_crossentropy",metrics=['accuracy'])#, metrics=[my_metric]                 

    model.fit(
          [encoder_input_data, decoder_input_data],
          decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=WandbCallback()
      )
    
    # Inference Model
    encoder_inputs = model.input[0]
    encoder_outputs, state_h_enc = model.get_layer('encoder_hidden_' + str(n_encoder_layers)).output
    encoder_states = [state_h_enc]
    encoder_model = keras.Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1]
    decoder_outputs = model.get_layer('decoder_embedding')(decoder_inputs)
    decoder_states_inputs = []
    decoder_states = []

    for j in range(1, n_decoder_layers + 1):
        decoder_state_input_h = keras.Input(shape=(latent_dimension,))
        current_states_inputs = [decoder_state_input_h]
        decoder = model.get_layer('decoder_hidden_' + str(j))
        decoder_outputs, state_h_dec = decoder(decoder_outputs, initial_state=current_states_inputs)
        decoder_states += [state_h_dec]
        decoder_states_inputs += current_states_inputs
    
    decoder_dense = model.get_layer('decoder_output')
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    return encoder_model, decoder_model

  # LSTM

  elif cell_type=="LSTM":
    embed = tf.keras.layers.Embedding(input_dim=n_encoder_tokens, output_dim=embedding_size,name='encoder_embedding')(encoder_inputs)
    encoder = keras.layers.LSTM(latent_dimension, return_state=True, return_sequences=True,name='encoder_hidden_1', dropout=dropout)
    encoder_outputs, state_h, state_c = encoder(embed)
    
    for i in range(2,e_layer+1):
      layer_name = ('encoder_hidden_%d') % i
      encoder = keras.layers.LSTM(latent_dimension, return_state=True, return_sequences=True,name=layer_name, dropout=dropout)
      encoder_outputs, state_h, state_c = encoder(encoder_outputs, initial_state=[state_h,state_c])
    
    encoder_states = None
    encoder_states = [state_h, state_c]

    decoder_inputs = keras.Input(shape=(None,), name='decoder_input')
    embed_dec = tf.keras.layers.Embedding(n_decoder_tokens, embedding_size, name='decoder_embedding')(decoder_inputs)
    
    # number of decoder layers
    d_layer = n_decoder_layers
    decoder = None
    decoder = keras.layers.LSTM(latent_dimension, return_sequences=True, return_state=True,name='decoder_hidden_1', dropout=dropout)
    
    # initial state of decoder is encoder's last state of last layer
    decoder_outputs, _,_ = decoder(embed_dec, initial_state=encoder_states)
    for i in range(2,d_layer+1):
      layer_name = 'decoder_hidden_%d' % i
      decoder = keras.layers.LSTM(latent_dimension, return_sequences=True, return_state=True,name=layer_name, dropout=dropout)
      decoder_outputs, _,_ = decoder(decoder_outputs, initial_state=encoder_states)
    
    decoder_dense = keras.layers.Dense(n_decoder_tokens, activation="softmax", name='decoder_output')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    model.compile(optimizer="rmsprop", loss="categorical_crossentropy",metrics=['accuracy'])#, metrics=[my_metric]                 
    
    model.fit(
          [encoder_input_data, decoder_input_data],
          decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=WandbCallback()
      )
    
    encoder_inputs = model.input[0]
    encoder_outputs, state_h_enc, state_c_enc = model.get_layer('encoder_hidden_' + str(n_encoder_layers)).output
    encoder_states = [state_h_enc, state_c_enc]
    encoder_model = keras.Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1]
    decoder_outputs = model.get_layer('decoder_embedding')(decoder_inputs)
    decoder_states_inputs = []
    decoder_states = []

    for j in range(1,n_decoder_layers + 1):
        decoder_state_input_h = keras.Input(shape=(latent_dimension,))
        decoder_state_input_c = keras.Input(shape=(latent_dimension,))
        current_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder = model.get_layer('decoder_hidden_' + str(j))
        decoder_outputs, state_h_dec, state_c_dec = decoder(decoder_outputs, initial_state=current_states_inputs)
        decoder_states += [state_h_dec, state_c_dec]
        decoder_states_inputs += current_states_inputs
    
    decoder_dense = model.get_layer('decoder_output')
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    return encoder_model, decoder_model
      

In [None]:
def accuracy(val_encoder_input_data, val_target_data,n_decoder_layers,encoder_model,decoder_model, verbose=False):
        correct_count = 0
        total_count = 0
        n_val_data=len(val_encoder_input_data)
        for seq_idx in range(n_val_data):
            # Taking one sequence 
            input_charseq = val_encoder_input_data[seq_idx: seq_idx + 1]

            states_val = [encoder_model.predict(input_charseq)]*n_decoder_layers

            empty_charseq = np.zeros((1, 1))
            # adding first character of target sequence with the start character.
            empty_charseq[0, 0] = target_token_idx["\t"]
            target_charseq = empty_charseq

    
            stop_cond = False
            decoded_sentence = ""
            while not stop_cond:
                if cell_type is not None and (cell_type.lower() == 'rnn' or cell_type.lower() == 'gru'):
                    temp = decoder_model.predict([target_charseq] + [states_val])
                    output_tokens, states_val = temp[0], temp[1:]
                else:
                    temp = decoder_model.predict([target_charseq] + states_val )
                    output_tokens, states_val = temp[0], temp[1:]

                # Sample a token
                sampled_token_idx = np.argmax(output_tokens[0, -1, :])
                sampled_character = reverse_target_char_index[sampled_token_idx]
                decoded_sentence += sampled_character

                if sampled_character == "\n" or len(decoded_sentence) > decoder_max_length:
                    stop_cond = True

                # Updating the target sequence.
                target_charseq = np.zeros((1, 1))
                target_charseq[0, 0] = sampled_token_idx

            if decoded_sentence.strip() == val_target_data[seq_idx].strip():
                correct_count += 1

            total_count += 1

            if verbose:
                print('Prediction ', decoded_sentence.strip(), ',Ground Truth ', val_target_data[seq_idx].strip())
        
        accuracy =correct_count * 100.0 / total_count
        return accuracy

In [None]:
!pip install wandb
import wandb
from wandb.keras import WandbCallback

Collecting wandb
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 26.9 MB/s 
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.5.11-py2.py3-none-any.whl (144 kB)
[K     |████████████████████████████████| 144 kB 44.6 MB/s 
[?25hCollecting setproctitle
  Downloading setproctitle-1.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29 kB)
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.8-py3-none-any.whl (9.5 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 52.1 MB/s 
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
Collecting sm

In [None]:
def fit():
  config_defaults = {
            "cell_type":'LSTM',
            "num_encoder_layers":2,
            "num_decoder_layers":3,
            "embedding_size":256,
            "latent_dimension":256,
            "dropout":0.2,
            "epochs":25
        }
  wandb.init(config=config_defaults)

  config = wandb.config
  
  cell_type=config.cell_type
  n_encoder_layers=config.num_encoder_layers
  n_decoder_layers=config.num_decoder_layers
  embedding_size=config.embedding_size
  latent_dimension=config.latent_dimension
  dropout=config.dropout
  epochs=config.epochs

  run_name = "cell_type_{}_nel_{}_ndl_{}_drop_{}_emd_{}_ld_{}".format(cell_type, n_encoder_layers, n_decoder_layers, dropout, embedding_size, latent_dimension )
  
  encoder_model, decoder_model=seq2seq(embedding_size, num_encoder_tokens,num_decoder_tokens,n_encoder_layers, n_decoder_layers,latent_dimension,
                cell_type, target_token_idx, decoder_max_length,reverse_target_char_index, dropout ,encoder_input_data, decoder_input_data,
                decoder_target_data,batch_size,epochs)
  
  val_accuracy=accuracy(val_encoder_input_data, val_target_data,n_decoder_layers,encoder_model,decoder_model)
  print("Validation Accuracy:", val_accuracy)
  wandb.log({'val_accuracy': val_accuracy})
  wandb.run.name = run_name
  wandb.run.save()
  wandb.run.finish()
  

key = 012003eeed065050f00940856a48fb3f54ab471b

In [None]:
# run sweeps
sweep_config = {
    'method': 'bayes',  # grid, random
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'embedding_size': {
            'values': [64,128,256]
        },
        'num_encoder_layers': {
            'values': [1,2,3]
        },
        'num_decoder_layers': {
            'values': [1,2,3]
        },
        'latent_dimension': {
            'values': [64, 256, 512]
        },
        'cell_type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },                             
        'dropout': {
            'values': [0.3,0.4,0.5,0.0,0.2]
        },
        'epochs': {
            'values': [25,20,30]
        }
    }
}

#sweep_id = wandb.sweep(sweep_config,entity="anandh" ,project="CS6910_Assignment3_S2S")
# wandb.agent(sweep_id, fit, count=10)
sweep_id="5y0u4iyv"
wandb.agent(sweep_id, fit, entity="anandh", project = "CS6910_Assignment3_S2S", count = 1)


[34m[1mwandb[0m: Agent Starting Run: jwj2eoo2 with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	latent_dimension: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1




Epoch 1/25
Epoch 2/25
Epoch 3/25

In [None]:
#Test accuracy

def test_accuracy(val_encoder_input_data, val_target_texts,n_decoder_layers,encoder_model,decoder_model,test_input_texts ,verbose=False):
        n_correct = 0
        n_total = 0
        inputs=[]
        outputs=[]
        ground_truth=[]
        for seq_index in range(len(val_encoder_input_data)):
            # Take one sequence (part of the training set)
            # for trying out decoding.
            input_seq = val_encoder_input_data[seq_index: seq_index + 1]
            # Generate empty target sequence of length 1.
            # empty_seq = np.zeros((1, 1))
            # # Populate the first character of target sequence with the start character.
            # empty_seq[0, 0] = self.target_token_index["\t"]
            decoded_sentence = decode_sequence(input_seq,n_decoder_layers,'GRU',encoder_model,decoder_model)

            if decoded_sentence.strip() == val_target_texts[seq_index].strip():
                n_correct += 1

            n_total += 1

            if verbose:
                print('Prediction ', decoded_sentence.strip(), ',Ground Truth ', val_target_texts[seq_index].strip())
            inputs.append(test_input_texts[seq_index])
            outputs.append(decoded_sentence.strip())
            ground_truth.append(val_target_texts[seq_index].strip())
        df_train = pd.DataFrame({"Input": inputs, "Ground Truth" : ground_truth, "Model output":outputs})
        #print(df_train)
        df_train.to_csv('predictions_seq2seq2.csv', index=False)

        return n_correct * 100.0 / n_total

In [None]:
# Test accuracy

subset = 50
test_accuracy = accuracy(test_encoder_input_data[0:subset], test_target_data[0:subset],n_decoder_layers,encoder_model,decoder_model) if subset>0 \
    else accuracy(test_encoder_input_data, test_target_data,n_decoder_layers,encoder_model,decoder_model)
print('Validation accuracy: ', test_accuracy)