<a href="https://colab.research.google.com/github/akshaygrao77/DeepLearning-Assignment3/blob/main/DL_Assignment_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from keras.utils.vis_utils import plot_model
from keras.callbacks import History

In [2]:
!curl https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar --output daksh.tar
!tar -xvf  'daksh.tar' 

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1915M  100 1915M    0     0   127M      0  0:00:15  0:00:15 --:--:--  116M
dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script

In [3]:
!pip install wandb
!wandb login
import wandb
from wandb.keras import WandbCallback

Collecting wandb
  Downloading wandb-0.12.14-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 3.9 MB/s 
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.8-py3-none-any.whl (9.5 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.5.10-py2.py3-none-any.whl (144 kB)
[K     |████████████████████████████████| 144 kB 40.7 MB/s 
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 15.5 MB/s 
[?25hCollecting setproctitle
  Downloading setproctitle-1.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 906 kB/s 
Collecting smm

In [4]:
# wandb.init(project="DeepLearningAssignment-3", entity='cs21s002-ee21s113-dlassignment-1')

In [5]:
wandb.init(project="DeepLearningAssignment-3", entity='akshaygrao')

[34m[1mwandb[0m: Currently logged in as: [33makshaygrao[0m (use `wandb login --relogin` to force relogin)


In [6]:
def obtain_input_target_data_from_path(path,tokenizer_obj):
  input_texts = []
  target_texts = []
  
  df = pd.read_csv(path,sep="\t",names=["1", "2","3"]).astype(str)
  if tokenizer_obj is None:
    # Shuffle rows in random order with a fixed seed(for reproducability)
    df=df.sample(frac=1,random_state=1)
  # Add all the  input and target texts with start sequence and end sequence added to target 
  for index, row in df.iterrows():
      input_text=row['2']
      target_text= row['1']
      # Skip empty lines/words
      if target_text =='</s>' or input_text=='</s>':
        continue
      
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)
  
  return input_texts, target_texts

In [7]:
def convert_text_to_sequences(tokenizer_obj,inp_texts):
  if tokenizer_obj is None:
    tokenizer_obj = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    tokenizer_obj.fit_on_texts(inp_texts)
  ret_tensor = tokenizer_obj.texts_to_sequences(inp_texts)
  ret_tensor = tf.keras.preprocessing.sequence.pad_sequences(ret_tensor,padding='post')

  return ret_tensor,tokenizer_obj

In [8]:
# This method converts a dataset(from path) to input and target sequences
def pre_process_data(path,input_tokenizer=None,target_tokenizer=None,input_length=None,target_length=None):
  
  input_texts, target_texts = obtain_input_target_data_from_path(path,input_tokenizer)
  
  input_tensor,input_tokenizer = convert_text_to_sequences(input_tokenizer,input_texts)
  
  target_tensor,target_tokenizer = convert_text_to_sequences(target_tokenizer,target_texts)
  
  # Above functions return padded version wrt longest sequence in the given list of sequence
  # The below function, pads more zeros wrt input_length and target_length
  if input_length is not None and target_length is not None:
      input_tensor=tf.concat([input_tensor,tf.zeros((input_tensor.shape[0],input_length-input_tensor.shape[1]))],axis=1)
      target_tensor=tf.concat([target_tensor,tf.zeros((target_tensor.shape[0],target_length-target_tensor.shape[1]))],axis=1)
  return input_texts,input_tensor,input_tokenizer,target_texts,target_tensor,target_tokenizer

In [9]:
transliteration_target_language = 'kn'

In [10]:
train_input_texts,train_input_tensor,input_tokenizer,train_target_texts,train_target_tensor,target_tokenizer = pre_process_data("/content/dakshina_dataset_v1.0/"+transliteration_target_language+"/lexicons/"+transliteration_target_language+".translit.sampled.train.tsv")
# Only training dataset is used to fit the tokenizer on text. Other datasets just use this vocab for pre-processing
# The length for padding is also set from training datasets
val_input_texts,val_input_tensor,val_input_tokenizer,val_target_texts,val_target_tensor,val_target_tokenizer = pre_process_data("/content/dakshina_dataset_v1.0/"+transliteration_target_language+"/lexicons/"+transliteration_target_language+".translit.sampled.dev.tsv",input_tokenizer,target_tokenizer,train_input_tensor.shape[1],train_target_tensor.shape[1])
test_input_texts,test_input_tensor,test_input_tokenizer,test_target_texts,test_target_tensor,test_target_tokenizer = pre_process_data("/content/dakshina_dataset_v1.0/"+transliteration_target_language+"/lexicons/"+transliteration_target_language+".translit.sampled.test.tsv",input_tokenizer,target_tokenizer,train_input_tensor.shape[1],train_target_tensor.shape[1])

In [11]:
print(test_input_tensor.shape)

(5047, 26)


In [12]:
# print(train_input_texts[:2])
# print(train_input_tensor[:2])
# print(val_input_texts[:2])
# print(val_input_tensor[:2])
# print(train_target_texts[:10])
# print(train_target_tensor[:10])
print(len(train_input_texts))
print(train_input_tensor.shape)
# print(len(train_target_texts))
# print(train_target_tensor.shape)

50624
(50624, 26)


In [13]:
num_encoder_tokens = len(input_tokenizer.word_index)+1
num_decoder_tokens = len(target_tokenizer.word_index)+1
max_encoder_seq_length =  train_input_tensor.shape[1]
max_decoder_seq_length = train_target_tensor.shape[1]

In [14]:
print(num_encoder_tokens)
print(num_decoder_tokens)
print(max_encoder_seq_length)
print(max_decoder_seq_length)

27
64
26
26


In [15]:
print(input_tokenizer.word_index.keys())

dict_keys(['a', 'i', 'n', 'r', 'd', 't', 'u', 'h', 'e', 'l', 's', 'g', 'v', 'k', 'y', 'o', 'm', 'p', 'b', 'c', 'j', 'w', 'f', 'q', 'z', 'x'])


In [16]:
def build_layered_RNN_model(rnn_type,embedding_in_dim,embedding_out_dim,layers,dropout,inp_length,model_out_dim,initial_state = None):
   #input layer ; takes in tokenize input
  model_inputs = keras.Input(shape=( inp_length))
  #embedding layer
  embed = keras.layers.Embedding(embedding_in_dim, embedding_out_dim)(model_inputs)
  
  last_layer_model = None
  if rnn_type == 'LSTM':
    #adding everything except the last LSTM layer, because in last layer return state=True
    for i in range(layers):
      layered_model = keras.layers.LSTM(model_out_dim, return_sequences=True,return_state=True,dropout=dropout)
      if i==0:
        inp_layer = embed
      else:
        inp_layer = last_layer_model
      
      model_layer_out,state_h, state_c = layered_model(inp_layer,initial_state)
      
      last_layer_model = model_layer_out
    
    model_states = [state_h, state_c]
    
  elif rnn_type=='GRU':
    #adding everything except the last GRU layer, because in last layer return state=True    
    for i in range(layers):
      layered_model = keras.layers.GRU(model_out_dim, return_sequences=True,return_state=True,dropout=dropout)
      if i==0:
        inp_layer = embed
      else:
        inp_layer = last_layer_model
      
      model_layer_out,state = layered_model(inp_layer,initial_state)
      
      last_layer_model = model_layer_out

    model_states = [state]
  elif rnn_type=='RNN':
    #adding everything except the last RNN layer, because in last layer return state=True
    for i in range(layers):      
      layered_model = keras.layers.SimpleRNN(model_out_dim, return_sequences=True,return_state=True,dropout=dropout)
      if i==0:
        inp_layer = embed
      else:
        inp_layer = last_layer_model
        
      model_layer_out,state = layered_model(inp_layer,initial_state)
      
      last_layer_model = model_layer_out

    model_states = [state]
    
  return model_states,last_layer_model,model_inputs


In [17]:

#Build the model
def build_model(rnn_type,embedding_dim,encoder_layers,decoder_layers,dropout,latent_dim):
  
  encoder_states,encoder_outputs,encoder_inputs = build_layered_RNN_model(rnn_type=rnn_type,embedding_in_dim = num_encoder_tokens,embedding_out_dim = embedding_dim,layers = encoder_layers,dropout = dropout,inp_length = max_encoder_seq_length,model_out_dim = latent_dim)

  _,decoder_outputs,decoder_inputs = build_layered_RNN_model(rnn_type=rnn_type,embedding_in_dim = num_decoder_tokens,embedding_out_dim = embedding_dim,layers = decoder_layers,dropout = dropout,inp_length = max_decoder_seq_length,model_out_dim = latent_dim,initial_state = encoder_states)
  
  decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final')
  decoder_outputs = decoder_dense(decoder_outputs)

  model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
  
  return model

In [18]:
def get_inference_encoder_model(model,encoder_layers):
  encoder_inputs = model.input[0]  
  if isinstance(model.layers[encoder_layers+3], keras.layers.LSTM):
    encoder_outputs, state_h_enc, state_c_enc = model.layers[encoder_layers+3].output  
    encoder_states = [state_h_enc, state_c_enc]
  elif (isinstance(model.layers[encoder_layers+3], keras.layers.GRU) or isinstance(model.layers[encoder_layers+3], keras.layers.RNN)):
    encoder_outputs, state = model.layers[encoder_layers+3].output  
    encoder_states = [state]

  encoder_model = keras.Model(encoder_inputs, encoder_states)
  return encoder_model

In [19]:
def get_inference_decoder_model(model,encoder_layers,decoder_layers,latent_dim):
  # Decoder during inference takes just one character(i.e vector rep of a character). This is either from previous timestep or start of sequence("\t")
  decoder_inputs =  keras.Input(shape=( 1))
  # Contains input to each decoder layer
  decoder_states_inputs=[]
  # Contains state output from each decoder layer
  decoder_states=[]
  previous_decoder_output = None

  emdedded_rep_of_decoder_input = model.layers[encoder_layers+2](decoder_inputs)
  # "encoder_layer + 4" because inp,embedding of encoder + inp,embedding of decoder
  if isinstance(model.layers[encoder_layers+4], keras.layers.LSTM):
    for i in range(decoder_layers):
      #every layer must have an input through which we can supply it's hidden state
      decoder_state_input_h = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
      decoder_state_input_c = keras.Input(shape=(latent_dim,),name='inp4_'+str(i))
      init_state = [decoder_state_input_h, decoder_state_input_c]
      decoder_lstm = model.layers[i+encoder_layers+4]
      if i==0:
        decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(emdedded_rep_of_decoder_input, initial_state=init_state)
      else:
        decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(previous_decoder_output, initial_state=init_state )
      
      previous_decoder_output = decoder_outputs
      decoder_states_inputs.append (decoder_state_input_h)
      decoder_states_inputs.append (decoder_state_input_c)
      decoder_states.append (state_h_dec)
      decoder_states.append (state_c_dec)
  elif isinstance(model.layers[encoder_layers + 4], keras.layers.GRU):
    for i in range(decoder_layers):
      decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
      init_state = [decoder_state_input]
      decoder_lstm = model.layers[i+encoder_layers+4]
      if i==0:
        decoder_outputs, state = decoder_lstm(emdedded_rep_of_decoder_input, initial_state=init_state)
      else:
        decoder_outputs, state = decoder_lstm(previous_decoder_output, initial_state=init_state )
      
      previous_decoder_output = decoder_outputs
      decoder_states_inputs.append (decoder_state_input)
      decoder_states.append (state)
  elif isinstance(model.layers[encoder_layers + 4], keras.layers.RNN):
    for i in range(decoder_layers):
      decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
      init_state = [decoder_state_input]
      decoder_lstm = model.layers[i+encoder_layers+4]
      if i==0:
        decoder_outputs, state = decoder_lstm(emdedded_rep_of_decoder_input, initial_state=init_state)
      else:
        decoder_outputs, state = decoder_lstm(previous_decoder_output, initial_state=init_state )
      
      previous_decoder_output = decoder_outputs
      decoder_states_inputs.append (decoder_state_input)
      decoder_states.append (state)      
  decoder_dense = model.get_layer('final')
  decoder_outputs = decoder_dense(previous_decoder_output)
  decoder_model = keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

  return decoder_model

In [20]:
def build_inference_model(model,encoder_layers,decoder_layers,latent_dim):
    encoder_model = get_inference_encoder_model(model,encoder_layers)
    
    decoder_model = get_inference_decoder_model(model,encoder_layers,decoder_layers,latent_dim)

    return encoder_model,decoder_model

In [21]:
index_to_char_target = dict((target_tokenizer.word_index[key], key) for key in target_tokenizer.word_index.keys())

In [22]:
def decode_batch_of_sequences(rnn_type,input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers):
    # Get encoder output
    encoder_output_state_values = encoder_model.predict(input_seq)
    if rnn_type=='GRU' or 'RNN':
      decoder_input_state_values=[encoder_output_state_values]
    
    # This is needed because encoder state is fed to all decoder layers
    decoder_input_state_values = decoder_input_state_values * decoder_layers
    
    # This is contain previously predicted character's index for every words in batch.
    prev_char_index = np.zeros((batch_size, 1))
    # We start with \t for every word in batch
    prev_char_index[:, 0] = target_tokenizer.word_index['\t']
    
    predicted_words = [ "" for i in range(batch_size)]
    done=[False for i in range(batch_size)]
    for i in range(max_decoder_seq_length):
        decoder_out = decoder_model.predict(tuple([prev_char_index] + decoder_input_state_values))
        # Decoder output has both output of all timesteps followed by hidden states
        output_probability = decoder_out[0]
        # Decoder state input is previous layer state output
        decoder_input_state_values = decoder_out[1:]
        for j in range(batch_size):
          if done[j]:
            continue          
          sampled_token_index = np.argmax(output_probability[j, -1, :])
          if sampled_token_index == 0:
            sampled_char='\n'
          else:
            sampled_char = index_to_char_target[sampled_token_index]
          if sampled_char == '\n':
            done[j]=True
            continue            
          predicted_words[j] += sampled_char
          #update the previously predicted characters        
          prev_char_index[j,0]=target_tokenizer.word_index[sampled_char]
    return predicted_words

In [23]:
def test_accuracy(rnn_type,encoder_model,decoder_model,encoder_layers,decoder_layers):
  success=0
  #Get all the predicted words
  pred=decode_batch_of_sequences(rnn_type,test_input_tensor,encoder_model,decoder_model,test_input_tensor.shape[0],encoder_layers,decoder_layers)
  for seq_index in range(test_input_tensor.shape[0]):
      predicted_word = pred[seq_index]
      target_word=test_target_texts[seq_index][1:-1]
      #test the word one by one and write to files
      if target_word == predicted_word:
        success+=1
        f = open("success.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
      else:
        f = open("failure.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
  return float(success)/float(test_input_tensor.shape[0])

In [30]:
def batch_validate(rnn_type,encoder_model,decoder_model,encoder_layers,decoder_layers):
  success=0
  #get all the predicted words
  pred=decode_batch_of_sequences(rnn_type,val_input_tensor,encoder_model,decoder_model,val_input_tensor.shape[0],encoder_layers,decoder_layers)

  for seq_index in range(val_input_tensor.shape[0]):
      predicted_word = pred[seq_index]
      target_word=val_target_texts[seq_index][1:-1]
      #test the words one by one
      if predicted_word == target_word:
        # print("pred:"+str(pred[seq_index]))
        # print("Target: "+str(val_target_texts[seq_index][1:-1]))
        success+=1
  print("success:"+str(success))
  # print("val_input_tensor.shape[0]:"+str(val_input_tensor.shape[0]))
  return float(success)/float(val_input_tensor.shape[0])

In [25]:
default_config = {
        "rnn_type": "LSTM",
        "dropout": 0.5,
        "encoder_layers":3,
        "decoder_layers":4,
        "latent_dim": 64,
        "epochs": 10,
        "lr": 0.0001,
        "embedding_out_dim": 64,
        "beam_search":False
    }

#Keras callback    
history = History()

In [26]:
def HP_tuning_run():
    # Create a MirroredStrategy.
    if tf.config.list_physical_devices('GPU'):
        strategy = tf.distribute.MirroredStrategy()
    else:  # use default strategy
        strategy = tf.distribute.get_strategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

    # wandb.init(config=default_config, magic=True,project="DeepLearningAssignment-3", entity='cs21s002-ee21s113-dlassignment-1')
    wandb.init(config=default_config, magic=True,project="DeepLearningAssignment-3", entity='akshaygrao')
    config = wandb.config
    print("Config: "+str(config))
    run_name = str(config).replace("{", "").replace("}","").replace(":","-")
    wandb.run.name = run_name

    # Open a strategy scope and create the model
    with strategy.scope():
      model = build_model(config.rnn_type,config.embedding_out_dim,config.encoder_layers,config.decoder_layers,config.dropout,config.latent_dim)

    plot_model(model, to_file='model.png', show_shapes=True, show_dtype=True,show_layer_names=True)

    model.summary()

    model.compile(optimizer=keras.optimizers.Adam(config.lr), loss=keras.losses.SparseCategoricalCrossentropy(reduction='none'), metrics=["accuracy"])

    hist=model.fit([train_input_tensor, train_target_tensor],tf.concat([train_target_tensor[:,1:],tf.zeros((train_target_tensor[:,:].shape[0],1))], axis=1),batch_size=32,epochs=config.epochs,shuffle=True,callbacks=[WandbCallback(), history])

    model.save(f'{run_name.replace(",","-")}.h5')  # creates a HDF5 file 'my_model.h5'
    inf = keras.models.load_model(f'{run_name.replace(",","-")}.h5')
    encoder_inference_model,decoder_inference_model=build_inference_model(inf,encoder_layers=config.encoder_layers,decoder_layers=config.decoder_layers,latent_dim=config.latent_dim)
    plot_model(encoder_inference_model, to_file='encoder_model.png', show_shapes=True)
    plot_model(decoder_inference_model, to_file='decoder_model.png', show_shapes=True)
    
    val_acc=batch_validate(config.rnn_type,encoder_inference_model,decoder_inference_model,config.encoder_layers,config.decoder_layers)
    print("val_acc"+str(val_acc))
    wandb.log({"val_acc":val_acc}) 


In [27]:
sweep_config = {
    "name": "Assignment 3 - alpha",
    "method": "bayes",
    "metric":{
      "goal": "maximize",
      "name": "val_acc"
    },
    "early_terminate":{
      "type": "hyperband",
      "min_iter": 2,
      "eta":2
    },
    "project": 'DeepLearningAssignment-3',
    "parameters": {
        "rnn_type": {
            "values": ["LSTM","RNN","GRU"]
        },
        "dropout": {
            "values": [0.3,0.5,0.7]
        },
        "encoder_layers": {
            "values": [2,5,7,10]
        },
        "decoder_layers": {
            "values": [2,5,7,10]
        },
        "latent_dim": {
            "values": [32,64,128]
        },
        "epochs": {
            "values": [5,10,15]
        },
        "lr": {
            "values": [0.0001]
        },
        "embedding_out_dim": {
            "values":[32,64,128]
        },
        "beam_search":{
            "values":[False]
        }
        
    }
}

In [28]:
sweep_id = wandb.sweep(sweep_config,  project='DeepLearningAssignment-3', entity='akshaygrao')

Create sweep with ID: 343i8hbd
Sweep URL: https://wandb.ai/akshaygrao/DeepLearningAssignment-3/sweeps/343i8hbd


In [None]:
wandb.agent(sweep_id, function=HP_tuning_run, project='DeepLearningAssignment-3', entity='akshaygrao')

[34m[1mwandb[0m: Agent Starting Run: su2cnce5 with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	latent_dim: 32
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: LSTM


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 2, 'dropout': 0.7, 'embedding_out_dim': 32, 'encoder_layers': 5, 'epochs': 5, 'latent_dim': 32, 'lr': 0.0001, 'rnn_type': 'LSTM'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 32)       864         ['input_1[0][0]']                
                                                                                                  
 lstm (LSTM)                    [(None, 26, 32),     8320        ['embedding[0][0]']              
                                 (None, 32),                                                      
              

VBox(children=(Label(value='0.990 MB of 0.990 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▄▇▇█
epoch,▁▃▅▆█
loss,█▃▂▁▁
val_acc,▁

0,1
accuracy,0.71053
epoch,4.0
loss,1.07817
val_acc,0.0


[34m[1mwandb[0m: Agent Starting Run: dwjt2w0g with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 7
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 5
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	latent_dim: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: GRU


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 7, 'dropout': 0.7, 'embedding_out_dim': 128, 'encoder_layers': 5, 'epochs': 15, 'latent_dim': 128, 'lr': 0.0001, 'rnn_type': 'GRU'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 gru (GRU)                      [(None, 26, 128),    99072       ['embedding[0][0]']              
                                 (None, 128)]                                                     
            

VBox(children=(Label(value='1.552 MB of 1.552 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▄▄▅▆▆▆▇▇▇███
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▆▅▅▄▃▃▃▂▂▂▂▁▁▁
val_acc,▁

0,1
accuracy,0.79267
epoch,14.0
loss,0.71397
val_acc,0.00159


[34m[1mwandb[0m: Agent Starting Run: e44fn58m with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 7
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 10
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	latent_dim: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: GRU


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 7, 'dropout': 0.7, 'embedding_out_dim': 128, 'encoder_layers': 10, 'epochs': 10, 'latent_dim': 128, 'lr': 0.0001, 'rnn_type': 'GRU'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 gru (GRU)                      [(None, 26, 128),    99072       ['embedding[0][0]']              
                                 (None, 128)]                                                     
           

VBox(children=(Label(value='2.182 MB of 2.182 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▅▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▄▃▂▂▁▁▁
val_acc,▁

0,1
accuracy,0.76384
epoch,9.0
loss,0.81518
val_acc,0.0006


[34m[1mwandb[0m: Agent Starting Run: jc8bnh12 with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 7
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 7
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	latent_dim: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: GRU


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 7, 'dropout': 0.7, 'embedding_out_dim': 128, 'encoder_layers': 7, 'epochs': 10, 'latent_dim': 128, 'lr': 0.0001, 'rnn_type': 'GRU'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 gru (GRU)                      [(None, 26, 128),    99072       ['embedding[0][0]']              
                                 (None, 128)]                                                     
            

VBox(children=(Label(value='1.804 MB of 1.804 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▄▅▆▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▅▄▃▂▂▂▁▁
val_acc,▁

0,1
accuracy,0.76865
epoch,9.0
loss,0.80425
val_acc,0.0002


[34m[1mwandb[0m: Agent Starting Run: leg7y9mp with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 10
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	latent_dim: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: GRU


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 10, 'dropout': 0.7, 'embedding_out_dim': 128, 'encoder_layers': 2, 'epochs': 10, 'latent_dim': 128, 'lr': 0.0001, 'rnn_type': 'GRU'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 gru (GRU) 

VBox(children=(Label(value='1.548 MB of 1.548 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂▃▄▅▆▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▅▄▃▃▂▁▁▁
val_acc,▁

0,1
accuracy,0.74529
epoch,9.0
loss,0.88858
val_acc,0.0


[34m[1mwandb[0m: Agent Starting Run: wnbv28iq with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 5
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	latent_dim: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: GRU


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 5, 'dropout': 0.7, 'embedding_out_dim': 128, 'encoder_layers': 2, 'epochs': 15, 'latent_dim': 128, 'lr': 0.0001, 'rnn_type': 'GRU'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 gru (GRU)  

VBox(children=(Label(value='0.926 MB of 0.926 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇██
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▅▅▄▃▃▃▂▂▂▂▂▁▁▁
val_acc,▁

0,1
accuracy,0.78479
epoch,14.0
loss,0.74887
val_acc,0.0


[34m[1mwandb[0m: Agent Starting Run: v4v4z4rk with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 5
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 7
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	latent_dim: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: GRU


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 5, 'dropout': 0.7, 'embedding_out_dim': 128, 'encoder_layers': 7, 'epochs': 15, 'latent_dim': 128, 'lr': 0.0001, 'rnn_type': 'GRU'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 gru (GRU)                      [(None, 26, 128),    99072       ['embedding[0][0]']              
                                 (None, 128)]                                                     
            

VBox(children=(Label(value='1.556 MB of 1.556 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▄▅▆▆▆▇▇▇▇███
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▅▅▄▃▃▃▂▂▂▂▂▁▁▁
val_acc,▁

0,1
accuracy,0.79516
epoch,14.0
loss,0.70586
val_acc,0.0004


[34m[1mwandb[0m: Agent Starting Run: afsbagd3 with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 10
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_out_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	latent_dim: 64
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: RNN


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 10, 'dropout': 0.3, 'embedding_out_dim': 32, 'encoder_layers': 5, 'epochs': 5, 'latent_dim': 64, 'lr': 0.0001, 'rnn_type': 'RNN'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 32)       864         ['input_1[0][0]']                
                                                                                                  
 simple_rnn (SimpleRNN)         [(None, 26, 64),     6208        ['embedding[0][0]']              
                                 (None, 64)]                                                      
              

VBox(children=(Label(value='1.603 MB of 1.603 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▆▇██
epoch,▁▃▅▆█
loss,█▂▂▁▁
val_acc,▁

0,1
accuracy,0.68682
epoch,4.0
loss,1.12482
val_acc,0.0


[34m[1mwandb[0m: Agent Starting Run: svsq4o0p with config:
[34m[1mwandb[0m: 	beam_search: False
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_out_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 7
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	latent_dim: 64
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	rnn_type: RNN


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1




Config: {'beam_search': False, 'decoder_layers': 2, 'dropout': 0.3, 'embedding_out_dim': 128, 'encoder_layers': 7, 'epochs': 5, 'latent_dim': 64, 'lr': 0.0001, 'rnn_type': 'RNN'}
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 26)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 26, 128)      3456        ['input_1[0][0]']                
                                                                                                  
 simple_rnn (SimpleRNN)         [(None, 26, 64),     12352       ['embedding[0][0]']              
                                 (None, 64)]                                                      
              

In [None]:
model = build_model('LSTM',32,2,3,0.3,64)

In [None]:
model.compile(optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(reduction='none'), metrics=["accuracy"])

In [None]:
hist=model.fit([train_input_tensor, train_target_tensor],tf.concat([train_target_tensor[:,1:],tf.zeros((train_target_tensor[:,:].shape[0],1))], axis=1),batch_size=32,epochs=5,shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Save model
model.save("s2s.keras")
# Run inferencing
# Restore the model and construct the encoder and decoder.
inf = keras.models.load_model("/content/s2s.keras")
encoder_model,decoder_model=build_inference_model(inf,encoder_layers=2,decoder_layers=3,latent_dim=64)