In [59]:
import os
import datetime
from google.colab import drive
drive.mount('/content/gdrive')

os.chdir('/content/gdrive/My Drive/Deep Learning CS6910/rnn_test')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [60]:
# !pip install --upgrade wandb -qq

In [61]:
import torch 
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import wandb
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output

In [62]:
device_gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [63]:
path = './aksharantar_sampled/'
dirs = os.listdir(path)
# dirs

In [64]:
train_df = pd.read_csv(path+'hin'+'/'+'hin_train.csv', header=None)
print(train_df.head())
print(len(train_df))
train_data = train_df.values.T
train_data.shape

             0           1
0  shastragaar  शस्त्रागार
1      bindhya    बिन्द्या
2    kirankant    किरणकांत
3  yagyopaveet   यज्ञोपवीत
4      ratania     रटानिया
51200


(2, 51200)

In [65]:
valid_df = pd.read_csv(path+'hin'+'/'+'hin_valid.csv', header=None)
print(valid_df.head())
print(len(valid_df))
valid_data = valid_df.values

           0        1
0   jaisawal   जयसवाल
1      bajai     बजाई
2  sanghthan    संघठन
3    haiwaan    हैवान
4    nilgiri  नीलगिरि
4096


In [66]:
test_df = pd.read_csv(path+'hin'+'/'+'hin_valid.csv', header=None)
print(test_df.head())
print(len(test_df))
test_data = test_df.values.T

           0        1
0   jaisawal   जयसवाल
1      bajai     बजाई
2  sanghthan    संघठन
3    haiwaan    हैवान
4    nilgiri  नीलगिरि
4096


In [67]:
# storing all the alphabets of English and the pad char to a dictionary to create OHE representation later.
eng_alphabets = 'abcdefghijklmnopqrstuvwxyz'
pad_char = '-PAD-'

eng_alpha2index = {pad_char: 0}
for index, alpha in enumerate(eng_alphabets):
    eng_alpha2index[alpha] = index+1

print(eng_alpha2index)


{'-PAD-': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}


In [68]:
tensor_dict = {key: torch.tensor(value).to(device_gpu) for key, value in eng_alpha2index.items()}

In [69]:
hindi_alphabets = [chr(alpha) for alpha in range(2304, 2432)]
hindi_alphabet_size = len(hindi_alphabets)

hindi_alpha2index = {pad_char: 0}
for index, alpha in enumerate(hindi_alphabets):
    hindi_alpha2index[alpha] = index+1

print(hindi_alpha2index)

{'-PAD-': 0, 'ऀ': 1, 'ँ': 2, 'ं': 3, 'ः': 4, 'ऄ': 5, 'अ': 6, 'आ': 7, 'इ': 8, 'ई': 9, 'उ': 10, 'ऊ': 11, 'ऋ': 12, 'ऌ': 13, 'ऍ': 14, 'ऎ': 15, 'ए': 16, 'ऐ': 17, 'ऑ': 18, 'ऒ': 19, 'ओ': 20, 'औ': 21, 'क': 22, 'ख': 23, 'ग': 24, 'घ': 25, 'ङ': 26, 'च': 27, 'छ': 28, 'ज': 29, 'झ': 30, 'ञ': 31, 'ट': 32, 'ठ': 33, 'ड': 34, 'ढ': 35, 'ण': 36, 'त': 37, 'थ': 38, 'द': 39, 'ध': 40, 'न': 41, 'ऩ': 42, 'प': 43, 'फ': 44, 'ब': 45, 'भ': 46, 'म': 47, 'य': 48, 'र': 49, 'ऱ': 50, 'ल': 51, 'ळ': 52, 'ऴ': 53, 'व': 54, 'श': 55, 'ष': 56, 'स': 57, 'ह': 58, 'ऺ': 59, 'ऻ': 60, '़': 61, 'ऽ': 62, 'ा': 63, 'ि': 64, 'ी': 65, 'ु': 66, 'ू': 67, 'ृ': 68, 'ॄ': 69, 'ॅ': 70, 'ॆ': 71, 'े': 72, 'ै': 73, 'ॉ': 74, 'ॊ': 75, 'ो': 76, 'ौ': 77, '्': 78, 'ॎ': 79, 'ॏ': 80, 'ॐ': 81, '॑': 82, '॒': 83, '॓': 84, '॔': 85, 'ॕ': 86, 'ॖ': 87, 'ॗ': 88, 'क़': 89, 'ख़': 90, 'ग़': 91, 'ज़': 92, 'ड़': 93, 'ढ़': 94, 'फ़': 95, 'य़': 96, 'ॠ': 97, 'ॡ': 98, 'ॢ': 99, 'ॣ': 100, '।': 101, '॥': 102, '०': 103, '१': 104, '२': 105, '३': 106, '४': 107, '५': 108, '६': 109, '७': 

In [70]:
import re
non_eng_letters_regex = re.compile('[^a-zA-Z ]')

# Remove all English non-letters
def cleanEnglishVocab(line):
    line = line.replace('-', ' ').replace(',', ' ').upper()
    line = non_eng_letters_regex.sub('', line)
    return line.split()

# Remove all Hindi non-letters
def cleanHindiVocab(line):
    line = line.replace('-', ' ').replace(',', ' ')
    cleaned_line = ''
    for char in line:
        if char in hindi_alpha2index or char == ' ':
            cleaned_line += char
    return cleaned_line.split()

In [71]:

class TransliterationDataLoader(Dataset):
    def __init__(self, eng_words, hindi_words):
        self.eng_words = eng_words
        self.hindi_words = hindi_words
        self.shuffle_indices = list(range(len(self.eng_words)))
        random.shuffle(self.shuffle_indices)
        self.shuffle_start_index = 0
        
    def __len__(self):
        return len(self.eng_words)
    
    def __getitem__(self, idx):
        return self.eng_words[idx], self.hindi_words[idx]
    
    def get_random_sample(self):
        return self.__getitem__(np.random.randint(len(self.eng_words)))
    
    def get_batch_from_array(self, batch_size, array):
        end = self.shuffle_start_index + batch_size
        batch = []
        if end >= len(self.eng_words):
            batch = [array[i] for i in self.shuffle_indices[0:end % len(self.eng_words)]]
            end = len(self.eng_words)
        return batch + [array[i] for i in self.shuffle_indices[self.shuffle_start_index : end]]
    
    def get_batch(self, batch_size, postprocess=True):
        eng_batch = self.get_batch_from_array(batch_size, self.eng_words)
        hindi_batch = self.get_batch_from_array(batch_size, self.hindi_words)
        self.shuffle_start_index += batch_size + 1
        
        if self.shuffle_start_index >= len(self.eng_words):
            random.shuffle(self.shuffle_indices)
            self.shuffle_start_index = 0
            
        return eng_batch, hindi_batch


In [72]:
train_data = TransliterationDataLoader(train_data[0],train_data[1])

In [None]:
for i in range(10):
  eng, hindi = train_data.get_random_sample()
  print(eng + ' - ' + hindi)

grampanchayatimadhun - ग्रामपंचायतीमधून
kesharpur - केशरपुर
srinagarchya - श्रीनगरच्या
nipponese - निप्पोनीज़
gavavaril - गावावरील
imarna - इमरना
manasgangotri - मानसगंगोत्री
vinaypatrik - विनयपत्रिक
kahuara - कहुआरा
thakuri - ठाकुरी


In [73]:
def word_rep(word, letter2index, device = 'cpu'):
    rep = torch.zeros(len(word)+1, 1, len(letter2index)).to(device)
    for letter_index, letter in enumerate(word):
        pos = letter2index[letter]
        rep[letter_index][0][pos] = 1
    pad_pos = letter2index[pad_char]
    rep[letter_index+1][0][pad_pos] = 1
    return rep

def gt_rep(word, letter2index, device = 'cpu'):
    gt_rep = torch.zeros([len(word)+1, 1], dtype=torch.long).to(device)
    for letter_index, letter in enumerate(word):
        pos = letter2index[letter]
        gt_rep[letter_index][0] = pos
    gt_rep[letter_index+1][0] = letter2index[pad_char]
    return gt_rep

In [None]:
eng, hindi = train_data.get_random_sample()
eng_rep = word_rep(eng, eng_alpha2index)
# print(eng, eng_rep)

In [None]:
hindi_gt = gt_rep(hindi, hindi_alpha2index)
print(hindi, hindi_gt.shape[0])

कूतरने 7


In [74]:
MAX_OUTPUT_CHARS = 30

class Transliteration_EncoderDecoder_BeamSearch(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, type_ = 'gru', bidirectional=False, beam_width=5,embedding_size=128, num_layers=1, verbose=False):
    super(Transliteration_EncoderDecoder_BeamSearch, self).__init__()
    
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.beam_width = beam_width
    self.type_ = type_


    self.embedding = nn.Embedding(input_size, embedding_size)
    
    if type_ == 'gru':
      self.encoder_rnn_cell = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,bidirectional=bidirectional)
      self.decoder_rnn_cell = nn.GRU(input_size=output_size, hidden_size=hidden_size,num_layers=num_layers,bidirectional=bidirectional)
    
    elif type_ == 'rnn':
      self.encoder_rnn_cell = nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,bidirectional=bidirectional)
      self.decoder_rnn_cell = nn.RNN(input_size=output_size, hidden_size=hidden_size,num_layers=num_layers,bidirectional=bidirectional)
    
    elif type_ == 'lstm':
      self.encoder_rnn_cell = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,bidirectional=bidirectional)
      self.decoder_rnn_cell = nn.LSTM(input_size=output_size, hidden_size=hidden_size,num_layers=num_layers,bidirectional=bidirectional)
    
    self.h2o = nn.Linear(hidden_size, output_size)
    self.softmax = nn.LogSoftmax(dim=2)
    
    self.verbose = verbose
  
  def forward(self, input, max_output_chars=MAX_OUTPUT_CHARS, device='cpu', ground_truth=None):
    # encoder
    # Convert input tensor to LongTensor
    input = input.long()

    # Embedding
    embedded_input = self.embedding(input)
    embedded_input = embedded_input.view((embedded_input.shape[0]*embedded_input.shape[2],1,embedded_input.shape[3]))
    if self.type_ == 'lstm':
      out, (hidden,_) = self.encoder_rnn_cell(embedded_input)
    else:  
      out, hidden = self.encoder_rnn_cell(embedded_input)
    
    if self.verbose:
      print('Encoder input', input.shape)
      print('Encoder output', out.shape)
      print('Encoder hidden', hidden.shape)
    
    # decoder
    decoder_state = hidden
    decoder_input = torch.zeros(1, 1, self.output_size).to(device)
    decoder_state_ = torch.zeros_like(decoder_state).to(device)
    outputs = []
    
    if self.verbose:
      print('Decoder state', decoder_state.shape)
      print('Decoder input', decoder_input.shape)
    
    for i in range(max_output_chars):
      if self.type_ == 'lstm':
        out, (decoder_state,_) = self.decoder_rnn_cell(decoder_input, (decoder_state,decoder_state_))
      else:
        out,decoder_state = self.decoder_rnn_cell(decoder_input,decoder_state)
      
      if self.verbose:
        print('Decoder intermediate output', out.shape)
      
      out = self.h2o(decoder_state)
      out = self.softmax(out)
      outputs.append(out.view(1, -1))
      
      if self.verbose:
          print('Decoder output', out.shape)
          self.verbose = False
      
      if ground_truth is not None:
          max_idx = ground_truth[i].reshape(1, 1, 1)
      else:
          topk_probs, topk_indices = out.topk(self.beam_width, dim=2)
          topk_probs = topk_probs.view(1, -1)
          topk_indices = topk_indices.view(1, -1)
          # Exclude invalid probabilities
          topk_probs[torch.isnan(topk_probs)] = 0
          topk_probs[torch.isinf(topk_probs)] = 0
          # Normalize probabilities
          topk_probs /= topk_probs.sum()
          # Perform multinomial sampling
          selected_indices = torch.multinomial(topk_probs, 1)
          max_idx = topk_indices[0][selected_indices[0]].reshape(1, 1, 1)
      
      one_hot = torch.FloatTensor(out.shape).to(device)
      one_hot.zero_()
      one_hot.scatter_(2, max_idx, 1)
      
      decoder_input = one_hot.detach()
    
    return outputs


In [None]:
def infer(net, eng_word,shape,device ='cpu'):
  # net.eval()
  net.to(device)
  input_ = word_rep(eng_word,tensor_dict,device) # convert the name into one hot encoding.
  outputs = net(input_,shape,device) # initilise the hidden layer.
  
  return outputs

In [None]:
net = Transliteration_EncoderDecoder_BeamSearch(len(eng_alpha2index), 256, len(hindi_alpha2index), verbose=True,bidirectional=True, num_layers=2)

In [None]:
out = infer(net, 'india', 30, device_gpu)
# type(out)

Encoder input torch.Size([6, 1, 27])
Encoder output torch.Size([162, 1, 512])
Encoder hidden torch.Size([4, 1, 256])
Decoder state torch.Size([4, 1, 256])
Decoder input torch.Size([1, 1, 129])
Decoder intermediate output torch.Size([1, 1, 512])
Decoder output torch.Size([4, 1, 129])


In [77]:
def train_batch(net, opt, criterion, batch_size, device = 'cpu', teacher_force = False):
    
  net.train().to(device)
  opt.zero_grad()
  eng_batch, hindi_batch = train_data.get_batch(batch_size)
  
  total_loss = 0
  accuracy = 0
  for i in range(batch_size):
      
    input = word_rep(eng_batch[i], eng_alpha2index, device)
    gt = gt_rep(hindi_batch[i], hindi_alpha2index, device)
    outputs = net(input, gt.shape[0], device, ground_truth = gt if teacher_force else None)
    
    correct = 0
    for index, output in enumerate(outputs):
      loss = criterion(output, gt[index]) / batch_size
      loss.backward(retain_graph = True)
      total_loss += loss

      val, indices = output.topk(1)
      hindi_pos = indices.tolist()[0]
      if hindi_pos[0] == gt[index][0]:
        correct += 1
    accuracy += correct/gt.shape[0]
  accuracy /= batch_size
  opt.step()


  return total_loss.cpu().detach().numpy()/batch_size,accuracy

In [81]:
def train_setup(net, lr = 0.01, n_batches = 100, batch_size = 10, momentum = 0.9, display_freq=5, device = 'cpu',name='model'):

  log = {}
    
  net = net.to(device)
  criterion = nn.NLLLoss(ignore_index = -1)
  opt = optim.Adam(net.parameters(), lr=lr)
  teacher_force_upto = n_batches//3
  
  # loss_arr = np.zeros(n_batches + 1)
  
  for i in range(n_batches):
    loss,accuracy = train_batch(net, opt, criterion, batch_size, device = device, teacher_force = i<teacher_force_upto )

    log['loss'] = loss
    log['acc'] = accuracy
    print(log)
    
    # if i%display_freq == display_freq-1:
    #     clear_output(wait=True)
        
        # print('Iteration', i, 'Loss', loss,'accuracy:',accuracy)
        # plt.figure()
        # plt.plot(loss, '-*')
        # plt.xlabel('Iteration')
        # plt.ylabel('Loss')
        # plt.show()
        # print('\n\n')

    # val_acc = calc_accuracy(net,valid_data) 
    # log['val_acc'] = val_acc
    # wandb.log(log)
    # print('val_acc',val_acc)    
  torch.save(net, name+'.pt')
  # return loss

In [79]:
net = Transliteration_EncoderDecoder_BeamSearch(len(eng_alpha2index), 256, len(hindi_alpha2index))

In [None]:
train_setup(net, lr=0.001, n_batches=2000, batch_size = 64, display_freq=5, device = device_gpu,name='best_model')

In [84]:
torch.save(net, 'best_model.pt')

In [85]:
def test(net, word, device = 'cpu'):
  net = net.eval().to(device)
  outputs = infer(net, word, 30, device)
  hindi_output = ''
  for out in outputs:
      val, indices = out.topk(1)
      index = indices.tolist()[0][0]
      if index == 0:
          break
      hindi_char = hindi_alphabets[index+1]
      hindi_output += hindi_char
  # print(word + ' - ' + hindi_output)
  return hindi_output

In [None]:
net = Transliteration_EncoderDecoder_BeamSearch(len(eng_alpha2index), 16, len(hindi_alpha2index))

In [None]:
net =torch.load('best.pt')

In [86]:
pred=np.array([])
for i in range(4096):
  pred =np.append(pred,test(net,test_data[0][i]))

In [88]:
import csv

def write_predictions_to_csv(predictions, actual_values, filename):
    # Prepare data as a list of dictionaries
    data = [{'predicted': pred, 'actual': actual} for pred, actual in zip(predictions, actual_values)]

    # Write data to CSV file
    with open(filename, 'w', newline='') as file:
        fieldnames = ['predicted', 'actual']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

In [89]:
write_predictions_to_csv(pred,test_data[1],'predictions_vanilla.csv')

In [None]:
def calc_accuracy(net,Data, device = 'cpu'):
  net = net.eval()#.to('cpu')
  predictions = []
  accuracy = 0
  for i in range(len(Data)):
    data = Data[i]
    eng, hindi = data[0],data[1]
    gt = gt_rep(hindi, hindi_alpha2index, device_gpu)
    outputs = infer(net, eng, gt.shape[0], device_gpu)
    correct = 0
    for index, out in enumerate(outputs):
      val, indices = out.topk(1)
      hindi_pos = indices.tolist()[0]
      if hindi_pos[0] == gt[index][0]:
        correct += 1      
    accuracy += correct/gt.shape[0]
  accuracy /= len(Data)
  # print(accuracy)
  return accuracy

In [None]:
calc_accuracy(net,valid_data)

0.0

In [None]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_acc',
      'goal': 'maximize'   
    },
    'parameters':{
        'embedding_size':{
            'values':[16,32,64,128,256]
        },
        'n_layers':{
            'values':[1,2,3]
        },
        'hidden_size':{
            'values':[16, 32, 64, 256],
        },
        'cell_type':{
            'values':['rnn','lstm','gru']
        },
        'bidirectional':{
            'values':[True,False]
        },
        'dropout':{
            'values':[0.2,0.3]
        },
        'beam_width':{
            'values':[3,4,5]
        }
    }
    }

In [None]:
sweep_id = wandb.sweep(sweep_config, entity='viswa_ee', project="CS6910_NLG")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: hqtp9ouh
Sweep URL: https://wandb.ai/viswa_ee/CS6910_NLG/sweeps/hqtp9ouh


In [None]:
def train():
  config_defaults={
      'embedding_size':16,
      'n_layers':1,
      'hidden_size':16,
      'cell_type':'gru',
      'bidirectional':False,
      'dropout':0.2,
      'beam_width':2
  }
  wandb.init(config=config_defaults)
  config = wandb.config
  net = Transliteration_EncoderDecoder_BeamSearch(input_size=len(eng_alpha2index), hidden_size=256, output_size=len(hindi_alpha2index),type_=config.cell_type,bidirectional=config.bidirectional,
                                                  beam_width=config.beam_width,embedding_size=config.embedding_size,num_layers=config.n_layers)
  
  train_setup(net, lr=0.001, n_batches=50, batch_size = 64, display_freq=5, device = device_gpu,name=wandb.run.name)

In [None]:
wandb.agent(sweep_id,function=train,count=20)

[34m[1mwandb[0m: Agent Starting Run: nfv7gpcc with config:
[34m[1mwandb[0m: 	beam_width: 4
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	n_layers: 3
[34m[1mwandb[0m: Currently logged in as: [33mviswa_ee[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.979942…

0,1
acc,▁▁▂▇▇▆▇▆▆▆▆▇▆▇▇▇▆▆▆▆▆▇▇▇█▇▇▇▇▇▇▆█▇█▇▇▇█▇
loss,█▇██▇█▆▆▃▃▃▂▃▃▂▂▂▂▂▂▃▂▂▁▂▂▂▄▁▂▂▂▃▂▂▂▂▂▂▂
val_acc,▁▂█▇▇▇▇▇▇▇█████▇▇▇▇▇███▇█████▇▇▇▇▇▇▇▇███

0,1
acc,0.17402
loss,0.45116
val_acc,0.17441


[34m[1mwandb[0m: Agent Starting Run: jr534f1g with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	n_layers: 1


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.980146…

0,1
acc,▃▁▂▂▂▂▃▂▃▃▃▃▂▂▂▃▃▃▄▇▇▇▆▆▆▆▆▆▇█▇▆█▇▆▆▆▆▆▇
loss,█▆▇▆█▆▇▅▆▇▇▆▄▃▃▂▂▂▂▂▁▁▂▂▂▂▂▁▂▂▁▂▁▁▁▃▂▃▂▁
val_acc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃█▇▇▇▇▇▇▇▇▇█████▇▇▇▇▇██

0,1
acc,0.14784
loss,0.44076
val_acc,0.17605


[34m[1mwandb[0m: Agent Starting Run: 2ie1obfs with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	n_layers: 3


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.979942…

0,1
acc,▁▂▅▄▆▇█▇▇▇▇█▇▆▇▆▇▇▇▇▇▇▇▇█▇█▇▇▇█▇▇▇▇▇▇▇▇▇
loss,█▇▇▇▆▆▅▅▄▄▂▂▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▁▃▂▂▁▂▂▂▂
val_acc,▁▄▄▄▇███████▇▇▇▇█▇▇▇▇▇▇▇█▇▇█▇▇▇▇▆▆▆▆▇▇▇▇

0,1
acc,0.14674
loss,0.4349
val_acc,0.16729


[34m[1mwandb[0m: Agent Starting Run: ykmm1gd1 with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	n_layers: 3


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.979839…

0,1
acc,▁▁▂▃▅▅▅▆█▇▇▇███▇▇██▇▇▇▇█▇███▇▇▇▇▇█▇▇▇▇▇█
loss,▇▇▆▇██▆▆▆▅▅▃▁▂▂▃▂▂▁▂▂▂▃▃▂▂▁▂▂▂▂▂▁▁▂▂▂▂▁▁
val_acc,▁▁▂▅▅▅▅█████████████████████████████████

0,1
acc,0.13755
loss,0.44386
val_acc,0.15457


[34m[1mwandb[0m: Agent Starting Run: lt0n83m7 with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	n_layers: 1


0,1
acc,▁▄▆▅▅▄▃▃▃▃▄▄█▇▇▇▇█▇▇▆▅▆▆▇▇███▇█▇▆█▇▇▆▇▇▇
loss,█▇▇▅▆▄▄▄▄▃▂▂▁▂▂▂▂▂▂▂▂▂▂▁▁▂▁▂▂▂▁▁▂▂▃▁▂▂▂▂
val_acc,▁▄▄▄▂▂▂▂▂▃▃▇█████▇▇▅▄▅▆▇████▇▇▆▆▇▇▇▇▇▇▆▆

0,1
acc,0.16641
loss,0.48359
val_acc,0.15168


[34m[1mwandb[0m: Agent Starting Run: jnj9fzhb with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	n_layers: 3
