In [None]:
from random import randint
from numpy import array
from numpy import argmax
import keras.backend as K
from tensorflow.keras import models
from numpy import array_equal
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Bidirectional, SimpleRNN, GRU
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Input
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from keras.optimizers import Adam

from tensorflow.keras.layers import Lambda
from tensorflow.keras import backend as K
import tensorflow as tf
import io

tf.keras.backend.set_floatx('float64')

In [None]:
!git clone https://github.com/borate267/lexicon-dataset.git

fatal: destination path 'lexicon-dataset' already exists and is not an empty directory.


In [None]:
%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

In [None]:
train_dir = "lexicon-dataset/ta.translit.sampled.train.tsv"
dev_dir = "lexicon-dataset/ta.translit.sampled.dev.tsv"
test_dir = "lexicon-dataset/ta.translit.sampled.test.tsv"

# The following function reads the raw text document and returns a list of lists comprising the romanized and native versions of the words

def read_corpus(corpus_file):
  tamil_words = []
  latin_words = []
  with io.open(corpus_file, encoding ='utf-8') as f:
    for line in f:
      if '\t' not in line:
        continue
      tokens = line.rstrip().split("\t")
      latin_words.append(tokens[1])
      tamil_words.append(tokens[0])
  return latin_words, tamil_words

train_source, train_target = read_corpus(train_dir)
valid_source, valid_target = read_corpus(dev_dir)
test_source, test_target = read_corpus(test_dir)

print("Number of training samples: ", len(train_source))
print("Number of validation samples: ", len(valid_source))
print("Number of testing samples: ", len(test_source))


Number of training samples:  68218
Number of validation samples:  6827
Number of testing samples:  6864


In [None]:
arr = np.arange(len(train_source))
np.random.shuffle(arr)
arr1 = np.arange(len(valid_source))
np.random.shuffle(arr1)

input_characters = set()
target_characters = set()
input_texts_ns = []
target_texts_ns = []
val_input_texts_ns = []
val_target_texts_ns = []

for (input_text, target_text) in zip(train_source, train_target):
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = "B" + target_text + "E"
    input_texts_ns.append(input_text)
    target_texts_ns.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

for (input_text, target_text) in zip(valid_source, valid_target):
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = "B" + target_text + "E"
    val_input_texts_ns.append(input_text)
    val_target_texts_ns.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

input_texts = []
target_texts = []

for i in range(len(train_source)):
    input_texts.append(input_texts_ns[arr[i]])
    target_texts.append(target_texts_ns[arr[i]])

val_input_texts = []
val_target_texts = []

for i in range(len(valid_source)):
    val_input_texts.append(val_input_texts_ns[arr1[i]])
    val_target_texts.append(val_target_texts_ns[arr1[i]])

input_characters.add(" ")
target_characters.add(" ")

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))



# Adding the padding character
#input_characters.append("P")
#target_characters.append("P")

num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
val_max_encoder_seq_length = max([len(txt) for txt in val_input_texts])
val_max_decoder_seq_length = max([len(txt) for txt in val_target_texts])



print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)
print("Max sequence length for val inputs:", val_max_encoder_seq_length)
print("Max sequence length for val outputs:", val_max_decoder_seq_length)

print(input_characters)
print(target_characters)

Number of samples: 68218
Number of unique input tokens: 27
Number of unique output tokens: 49
Max sequence length for inputs: 30
Max sequence length for outputs: 28
Max sequence length for val inputs: 23
Max sequence length for val outputs: 22
[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
[' ', 'B', 'E', 'ஃ', 'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'க', 'ங', 'ச', 'ஜ', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ன', 'ப', 'ம', 'ய', 'ர', 'ற', 'ல', 'ள', 'ழ', 'வ', 'ஷ', 'ஸ', 'ஹ', 'ா', 'ி', 'ீ', 'ு', 'ூ', 'ெ', 'ே', 'ை', 'ொ', 'ோ', 'ௌ', '்']


In [None]:
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])
print(input_token_index)
print(target_token_index)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{' ': 0, 'B': 1, 'E': 2, 'ஃ': 3, 'அ': 4, 'ஆ': 5, 'இ': 6, 'ஈ': 7, 'உ': 8, 'ஊ': 9, 'எ': 10, 'ஏ': 11, 'ஐ': 12, 'ஒ': 13, 'ஓ': 14, 'க': 15, 'ங': 16, 'ச': 17, 'ஜ': 18, 'ஞ': 19, 'ட': 20, 'ண': 21, 'த': 22, 'ந': 23, 'ன': 24, 'ப': 25, 'ம': 26, 'ய': 27, 'ர': 28, 'ற': 29, 'ல': 30, 'ள': 31, 'ழ': 32, 'வ': 33, 'ஷ': 34, 'ஸ': 35, 'ஹ': 36, 'ா': 37, 'ி': 38, 'ீ': 39, 'ு': 40, 'ூ': 41, 'ெ': 42, 'ே': 43, 'ை': 44, 'ொ': 45, 'ோ': 46, 'ௌ': 47, '்': 48}


In [None]:
trunc_input_texts = input_texts[:68096]
trunc_target_texts = target_texts[:68096]

encoder_input_data = np.zeros(
    (len(trunc_input_texts), max_encoder_seq_length, num_encoder_tokens), dtype="float64"
)
decoder_target_data = np.zeros(
    (len(trunc_input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float64"
)

for i, (input_text, target_text) in enumerate(zip(trunc_input_texts, trunc_target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.0
    encoder_input_data[i, t + 1 :, input_token_index[" "]] = 1.0
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_target_data[i, t, target_token_index[char]] = 1.0
    decoder_target_data[i, t + 1 :, target_token_index[" "]] = 1.0
    
val_encoder_input_data = np.zeros(
    (len(val_input_texts), max_encoder_seq_length, num_encoder_tokens), dtype="float64"
)
val_decoder_target_data = np.zeros(
    (len(val_target_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float64"
)

for i, (input_text, target_text) in enumerate(zip(val_input_texts, val_target_texts)):
    for t, char in enumerate(input_text):
        val_encoder_input_data[i, t, input_token_index[char]] = 1.0
    #encoder_input_data[i, t + 1 :] = input_token_index["P"]
    val_encoder_input_data[i, t + 1 :, input_token_index[" "]] = 1.0

    for t, char in enumerate(target_text):
      # decoder_target_data is ahead of decoder_input_data by one timestep
        val_decoder_target_data[i, t, target_token_index[char]] = 1.0
    val_decoder_target_data[i, t + 1: ,target_token_index[" "]] = 1.0
  


# ATTENTION MECHANISM 

In [None]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)
    
  def call(self, query, values):
    
    # query hidden state shape == (batch_size, hidden size)
    # query_with_time_axis shape == (batch_size, 1, hidden size)
    # values shape == (batch_size, max_len, hidden size)
    # we are doing this to broadcast addition along the time axis to calculate the score
    query_with_time_axis = tf.expand_dims(query, 1)
    
    
    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))
    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)
    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)
    return context_vector, attention_weights


In [None]:
#import tensorflow 
class LuongAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(LuongAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
   
    query_with_time_axis = tf.expand_dims(query, 1)
    
    values_transposed = tf.transpose(values, perm=[0, 2, 1])
    
    #LUONGH Dot-product
    score = tf.transpose(tf.matmul(query_with_time_axis, values_transposed) , perm=[0, 2, 1])

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)
    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [None]:
class MyRNN_atten(object):
  def __init__(self,cell_type = 'RNN', hidden_size=32, 
               learning_rate= 1e-3,dropout=0.3,epochs = 10, batch_size = 32,
               attention = 'bahdanau'):
    
    self.cell_type = cell_type
    self.hidden_size = hidden_size
    self.learning_rate = learning_rate
    self.dropout = dropout
    self.epochs = epochs
    self.batch_size = batch_size
    self.attention = attention

  def build_fit(self,encoder_input_data,decoder_target_data):

    encoder_inputs = Input(shape=(max_encoder_seq_length, num_encoder_tokens), name='encoder_inputs')
    if self.cell_type == 'LSTM':
      encoder_lstm = LSTM(self.hidden_size,return_sequences=True, return_state=True, dropout = self.dropout, name='encoder_lstm')
      encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
      encoder_states = [encoder_state_h, encoder_state_c]
    elif self.cell_type == 'GRU':
      encoder_gru = GRU(self.hidden_size,return_sequences=True, return_state=True, dropout = self.dropout, name='encoder_gru')
      encoder_outputs, encoder_state_h = encoder_gru(encoder_inputs)
      encoder_states = [encoder_state_h]
    elif self.cell_type == 'RNN':
      encoder_rnn = SimpleRNN(self.hidden_size,return_sequences=True, return_state=True, dropout = self.dropout, name='encoder_rnn')
      encoder_outputs, encoder_state_h = encoder_rnn(encoder_inputs)
      encoder_states = [encoder_state_h]

    # Set up the attention layer
    if self.attention == 'bahdanau':
      attention= BahdanauAttention(self.hidden_size)
    elif self.attention == 'luong':
      attention= LuongAttention(self.hidden_size)

    # Set up the decoder layers
    decoder_inputs = Input(shape=(1, (num_decoder_tokens+self.hidden_size)),name='decoder_inputs')
    if self.cell_type == 'LSTM':
      decoder_lstm = LSTM(self.hidden_size, dropout = self.dropout, return_state=True, name='decoder_lstm')
    elif self.cell_type == 'GRU':
      decoder_gru = GRU(self.hidden_size, dropout = self.dropout, return_state=True, name='decoder_gru')
    elif self.cell_type == 'RNN':
      decoder_rnn = SimpleRNN(self.hidden_size, dropout = self.dropout, return_state=True, name='decoder_rnn')  
    
    decoder_dense = Dense(num_decoder_tokens, activation='softmax',  name='decoder_dense')

    all_outputs = []

    inputs = np.zeros((self.batch_size, 1, num_decoder_tokens))
    inputs[:, 0, 0] = 1 

    decoder_outputs = encoder_state_h
    states = encoder_states

    for _ in range(max_decoder_seq_length):

      context_vector, attention_weights=attention(decoder_outputs, encoder_outputs)
      
      context_vector = tf.expand_dims(context_vector, 1)
      
      inputs = tf.concat([context_vector, inputs], axis=-1)
      if self.cell_type == 'LSTM':
        decoder_outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
      if self.cell_type == 'GRU':
        decoder_outputs, state_h = decoder_gru(inputs, initial_state=states)
      if self.cell_type == 'RNN':
        decoder_outputs, state_h = decoder_rnn(inputs, initial_state=states)
      
      outputs = decoder_dense(decoder_outputs)
      outputs = tf.expand_dims(outputs, 1)
      all_outputs.append(outputs)
      inputs = outputs
      if self.cell_type == 'LSTM':
        states = [state_h, state_c]
      if self.cell_type == 'GRU' or self.cell_type == 'RNN':
        states = [state_h]


    decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
    #getindicelayer = Lambda(lambda x: x[:, -1, :]) 
    #decoder_outputs = getindicelayer(all_outputs)

    model = Model(encoder_inputs, decoder_outputs, name='model_encoder_decoder')
    
    optimizer = Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    #model.summary()

    model.fit(encoder_input_data, decoder_target_data,
              batch_size=self.batch_size, 
              epochs=self.epochs,
              #callbacks = [WandbCallback()]
              )

    pred=model.predict(val_encoder_input_data[:6784], batch_size=128)

    global_count = 0
    count = 0
    global_total = 0
    global_correct = 0
    val_total = 6784
    for index in range(0, val_total):
      one_hot_vector = pred[index]
      one_hot_vector1 = val_decoder_target_data[index]
      index2 = tf.argmax(one_hot_vector, axis=1)
      index1 = tf.argmax(one_hot_vector1, axis=1)
      #a = (index2-index1).numpy()
      if (index2.numpy() == index1.numpy()).all():
        global_correct = global_correct + 1
        
      global_total = global_total + 1
      accuracy_epoch = global_correct/global_total
      if global_total % 50 == 0:
        wandb.log({'epoch_accuracy' : accuracy_epoch})
      #print("Accuracy: %s" % (accuracy_epoch))
    
    val_accuracy = global_correct/global_total
    #print(val_accuracy)

    wandb.log({'val_accuracy' : val_accuracy})

    '''    
        count = count + 1
      global_count = global_count+1
      if global_count % 50 == 0:
        accuracy_epoch = count/global_count
        #print(accuracy_epoch)
        wandb.log({'epoch_accuracy' : accuracy_epoch})
    val_accuracy = count/global_count
    wandb.log({'val_accuracy' : val_accuracy})
    #print(val_accuracy)


    
    val_total = 6784
    count = 0
    for index in range(0,val_total):
      seq_in = val_encoder_input_data[index:index+1]
      true = val_decoder_target_data[index]
      seq_true = tf.argmax(true, axis=1)
      seq_out = self.translate(seq_in,index)
      if (seq_out == seq_true.numpy()).all():
        count = count + 1
    print(count/val_total)

  def evaluate(self,seq_in):
    attention_plot = np.zeros((max_decoder_seq_length, max_encoder_seq_length))
    #sequence = [7, 9, 8, 5]
    sequence = seq_in
    #sequence = one_hot_encode(seq_in,num_encoder_tokens)
    encoder_inputs=array(sequence).reshape(1,max_encoder_seq_length,num_encoder_tokens)
    
    encoder_inputs = tf.convert_to_tensor(encoder_inputs,dtype=tf.float32)
    
    if self.cell_type == 'LSTM':
      encoder_lstm = LSTM(self.hidden_size,return_sequences=True, return_state=True, dropout = self.dropout, name='encoder_lstm')
      encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
      encoder_states = [encoder_state_h, encoder_state_c]
    elif self.cell_type == 'GRU':
      encoder_gru = GRU(self.hidden_size,return_sequences=True, return_state=True, dropout = self.dropout, name='encoder_gru')
      encoder_outputs, encoder_state_h = encoder_gru(encoder_inputs)
      encoder_states = [encoder_state_h]
    elif self.cell_type == 'RNN':
      encoder_rnn = SimpleRNN(self.hidden_size,return_sequences=True, return_state=True, dropout = self.dropout, name='encoder_rnn')
      encoder_outputs, encoder_state_h = encoder_rnn(encoder_inputs)
      encoder_states = [encoder_state_h]

    # Set up the attention layer
    if self.attention == 'bahdanau':
      attention= BahdanauAttention(self.hidden_size)
    elif self.attention == 'luong':
      attention= LuongAttention(self.hi#dden_size)

    decoder_inputs = Input(shape=(1, (num_decoder_tokens+self.hidden_size)),name='decoder_inputs')
    if self.cell_type == 'LSTM':
      decoder_lstm = LSTM(self.hidden_size, dropout = self.dropout, return_state=True, name='decoder_lstm')
    elif self.cell_type == 'GRU':
      decoder_gru = GRU(self.hidden_size, dropout = self.dropout, return_state=True, name='decoder_gru')
    elif self.cell_type == 'RNN':
      decoder_rnn = GRU(self.hidden_size, dropout = self.dropout, return_state=True, name='decoder_rnn')  
    
    decoder_dense = Dense(num_decoder_tokens, activation='softmax',  name='decoder_dense')
    
    all_outputs = []

    decoder_input_data = np.zeros((1, 1, num_decoder_tokens))
    decoder_input_data[:, 0, 0] = 1 

    inputs = decoder_input_data
    decoder_outputs = encoder_state_h
    states = encoder_states

    for t in range(max_decoder_seq_length):

      # pay attention
      context_vector, attention_weights=attention(decoder_outputs, encoder_outputs)

      # storing the attention weights to plot later on
      attention_weights = tf.reshape(attention_weights, (-1, ))
      attention_plot[t] = attention_weights.numpy()
      
      decoder_outputs=tf.expand_dims(decoder_outputs, 1)

      context_vector = tf.expand_dims(context_vector, 1)
      inputs = tf.concat([context_vector, inputs], axis=-1)

      if self.cell_type == 'LSTM':
        decoder_outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
      if self.cell_type == 'GRU':
        decoder_outputs, state_h = decoder_gru(inputs, initial_state=states)
      if self.cell_type == 'RNN':
        decoder_outputs, state_h = decoder_rnn(inputs, initial_state=states)
            
      outputs = decoder_dense(decoder_outputs)
      # Store the current prediction (we will concatenate all predictions later)
      outputs = tf.expand_dims(outputs, 1)
      all_outputs.append(outputs)
      inputs = outputs
      if self.cell_type == 'LSTM':
        states = [state_h, state_c]
      if self.cell_type == 'GRU' or self.cell_type == 'RNN':
        states = [state_h]

    # Concatenate all predictions such as [batch_size, timesteps, features]
    decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
    seq_in = decoder_outputs[0]
    seq_out = tf.argmax(seq_in, axis=1)
    seq_out = seq_out.numpy()
    #seq_out=one_hot_decode(decoder_outputs[0])
    
    return seq_in, seq_out, attention_plot

  def translate(self,seq_in,index):
    seq_in, seq_out, attention_plot = self.evaluate(seq_in)
    true = val_decoder_target_data[index]
    seq_true = tf.argmax(seq_in, axis=1)
    seq_in = np.nonzero(seq_in)
    seq_out = np.nonzero(seq_out)
    #print('Input: %' % (seq_in))
    #print('Predicted translation: {}'.format(seq_out))
    #print('True: {}' .format(seq_true))
    
    attention_plot = attention_plot[:len(seq_out), :len(seq_in)]
    #plot_attention(attention_plot, seq_in, seq_out)
    '''

In [None]:
model_rnn = MyRNN_atten(cell_type = 'LSTM', hidden_size=128, learning_rate= 1e-3,
                        dropout=0.2,epochs = 2, batch_size = 128, attention = 'bahdanau')

In [None]:
model_rnn.build_fit(encoder_input_data,decoder_target_data)

In [None]:
##########

Sweep

In [None]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'dropout': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [64, 128]
        },
        'hidden_size':{
            'values': [32, 64, 128]
        },
        'cell_type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'attention': {
            'values': ['bahdanau', 'luong']
        }
    }
}

In [None]:
# Initialize a new sweep
sweep_id = wandb.sweep(sweep_config, entity="cs6910assignment3", project="RNN")

Create sweep with ID: xfqc0dsg
Sweep URL: https://wandb.ai/cs6910assignment3/RNN/sweeps/xfqc0dsg


In [None]:
def train_sweep():
  config_defaults = {
        'dropout': 0.3,
        'learning_rate': 1e-3,
        'batch_size': 128,
        'epochs' : 15,
        'hidden_size': 128,
        'cell_type': 'LSTM',
        'attention': 'bahdanau'
        }

  # Initialize a new wandb run
  wandb.init(config = config_defaults)
  
  # Config is a variable that holds and saves hyperparameters and inputs
  config = wandb.config

  wandb.run.name = str(config.cell_type)+ '_' + config.attention +'_bs_'+str(config.batch_size)
  
  model_rnn = MyRNN_atten(cell_type = config.cell_type, hidden_size=config.hidden_size,
                learning_rate= config.learning_rate, dropout=config.dropout,epochs = config.epochs,
                batch_size = config.batch_size, attention = config.attention)
  
  model_rnn.build_fit(encoder_input_data,decoder_target_data)

In [None]:
#wandb.agent(sweep_id, train_sweep,count=100)
wandb.agent("ub6frmrd", entity="cs6910assignment3",project="RNN", function =train_sweep,count=100)

[34m[1mwandb[0m: Agent Starting Run: nk1pzqg2 with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.10MB of 0.10MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run nk1pzqg2 errored: ValueError('in user code:\n\n    /usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1478 predict_function  *\n        return step_function(self, iterator)\n    /usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1468 step_function  **\n        outputs = model.distribute_strategy.run(run_step, args=(data,))\n    /usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run\n        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)\n    /usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica\n        return self._call_for_each_replica(fn, args, kwargs)\n    /usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica\n        return fn(*args, **kwargs)\n    /usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:1461

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.12MB of 0.12MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.44563
_runtime,333.0
_timestamp,1620902335.0
_step,135.0
val_accuracy,0.44575


0,1
epoch_accuracy,█▆▄▄▅▄▃▃▃▂▂▁▁▁▁▁▂▂▃▃▂▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂
_runtime,▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆███████
_timestamp,▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆███████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: n154sj92 with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.13MB of 0.13MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.0
_runtime,346.0
_timestamp,1620902689.0
_step,135.0
val_accuracy,0.0


0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆█████████
_timestamp,▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆█████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: u1d1hcdq with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.14MB of 0.14MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.0
_runtime,347.0
_timestamp,1620903044.0
_step,135.0
val_accuracy,0.0


0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆█████████
_timestamp,▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆█████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: yt1asl8b with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.00015
_runtime,349.0
_timestamp,1620903402.0
_step,135.0
val_accuracy,0.00015


0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▅▄
_runtime,▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆████████
_timestamp,▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: eqxp6mtk with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.0
_runtime,323.0
_timestamp,1620903739.0
_step,135.0
val_accuracy,0.0


0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: b64f999l with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.0
_runtime,347.0
_timestamp,1620904100.0
_step,135.0
val_accuracy,0.0


0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████████
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: ujxuvmga with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.41807
_runtime,350.0
_timestamp,1620904466.0
_step,135.0
val_accuracy,0.41819


0,1
epoch_accuracy,▁▇▅▅▇█▇██▇▇▇▇▆▆▆▆▆▇▇▆▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅███████████
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅███████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: 98d93khe with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.0
_runtime,351.0
_timestamp,1620904831.0
_step,135.0
val_accuracy,0.0


0,1
epoch_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆███████
_timestamp,▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆███████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: w4051a6p with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch_accuracy,0.40548
_runtime,351.0
_timestamp,1620905198.0
_step,135.0
val_accuracy,0.40537


0,1
epoch_accuracy,▁█▅▆▆▇▆▆▇▆▅▅▅▅▅▄▄▅▅▅▄▅▅▅▅▅▅▅▅▆▆▅▅▅▅▅▅▅▆▆
_runtime,▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆██████
_timestamp,▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆██████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: 7vtw3lii with config:
[34m[1mwandb[0m: 	attention: luong
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15

In [None]:
## function for plotting the attention weights
def plot_attention(attention, sequence, predicted_sequence):
  fig = plt.figure(figsize=(8,8))
  ax = fig.add_subplot(1, 1, 1)
  ax.matshow(attention, cmap='viridis')

  fontdict = {'fontsize': 14}

  ax.set_xticklabels([''] + sequence, fontdict=fontdict, rotation=90)
  ax.set_yticklabels([''] + predicted_sequence, fontdict=fontdict)

  ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
  ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

  plt.show()



time: 7.22 ms (started: 2021-05-12 14:09:22 +00:00)
