In [1]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 5.1 MB/s 
[?25hCollecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 47.7 MB/s 
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.8-py3-none-any.whl (9.5 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.5.11-py2.py3-none-any.whl (144 kB)
[K     |████████████████████████████████| 144 kB 45.3 MB/s 
Collecting setproctitle
  Downloading setproctitle-1.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
Collecting smm

In [2]:
import shutil
import io
import numpy as np
import pandas as pd
import tensorflow 
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, GRU, Dropout, SimpleRNN
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.vis_utils import plot_model
from math import log
from numpy import array
from numpy import argmax
from tensorflow.keras.optimizers import Adam
import keras
from keras.models import load_model
from math import log1p 

import wandb
from wandb.keras import WandbCallback

In [3]:
!curl --header "Host: storage.googleapis.com" --header "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36 Edg/89.0.774.77" --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" --header "Accept-Language: en-US,en;q=0.9" --header "Referer: https://github.com/google-research-datasets/dakshina" "https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar" -L -o "dakshina_dataset_v1.0.tar"
shutil.unpack_archive("/content/dakshina_dataset_v1.0.tar",'/content/')

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1915M  100 1915M    0     0   211M      0  0:00:09  0:00:09 --:--:--  215M


In [8]:
TRAIN_PATH = "/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
VAL_PATH = "/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
TEST_PATH = "/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"

# Function used to load data from a given path
def load_data(path):
  df = pd.read_csv(path, sep='\t', header=None)
  df.columns = ['Devanagari', 'Romanized', 'Attestations']
  df = df.dropna()
  input_texts = df['Romanized'].tolist()
  target_texts = df['Devanagari'].apply(lambda x: 'S' + x + 'E').tolist()
  return input_texts, target_texts

#loading training , testing and validation data
train_texts, train_target_texts = load_data(TRAIN_PATH)
val_texts, val_target_texts = load_data(VAL_PATH)
test_texts, test_target_texts = load_data(TEST_PATH)

print("Number of training samples: ", len(train_texts))
print("Number of validation samples: ", len(val_texts))
print("Number of testing samples: ", len(test_texts))

train_indices = np.arange(len(train_texts))
val_indices = np.arange(len(val_texts))
test_indices = np.arange(len(test_texts))

np.random.shuffle(train_indices)
np.random.shuffle(val_indices)

# Used to store vocabulary of source and target language
input_characters = set()
target_characters = set()

# Used to store texts after adding start and end token
train_target_texts_processed = []
val_target_texts_processed = []
test_target_texts_processed = []

# Adding starting and ending token in training data
for (input_text, target_text) in zip(train_texts, train_target_texts):
    # "S" -> start token, "E" -> end token, " " -> pad token
    target_text = "S" + target_text + "E"
    train_target_texts_processed.append(target_text)
    for char in input_text:
      input_characters.add(char)
    for char in target_text:
      target_characters.add(char)

# Adding starting and ending token in validation data
for (input_text, target_text) in zip(val_texts, val_target_texts):
    # "S" -> start token, "E" -> end token, " " -> pad token
    target_text = "S" + target_text + "E"
    val_target_texts_processed.append(target_text)
    for char in input_text:
      input_characters.add(char)
    for char in target_text:
      target_characters.add(char)

# Adding starting and ending token in testing data
for (input_text, target_text) in zip(test_texts, test_target_texts):
    # "S" -> start token, "E" -> end token, " " -> pad token
    target_text = "S" + target_text + "E"
    test_target_texts_processed.append(target_text)
    for char in input_text:
      input_characters.add(char)
    for char in target_text:
      target_characters.add(char)

input_texts = list(map(train_texts.__getitem__, train_indices))
target_texts = list(map(train_target_texts_processed.__getitem__, train_indices))

val_input_texts = list(map(val_texts.__getitem__, val_indices))
val_target_texts = list(map(val_target_texts_processed.__getitem__, val_indices))

test_input_texts = list(map(test_texts.__getitem__, test_indices))
test_target_texts = list(map(test_target_texts_processed.__getitem__, test_indices))

# Creating sorted vocabulary of source and target language
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))

# Add pad tokens
input_characters.insert(0, " ")
target_characters.insert(0, " ")

# Creating essential parameters
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(t) for t in input_texts])
max_decoder_seq_length = max([len(t) for t in target_texts])
val_max_encoder_seq_length = max([len(t) for t in val_input_texts])
val_max_decoder_seq_length = max([len(t) for t in val_target_texts])

test_max_encoder_seq_length = max([len(t) for t in test_input_texts])
test_max_decoder_seq_length = max([len(t) for t in test_target_texts])

# Mapping each character of vocabulary to index
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

# defining shapes of input sequence of encoder after padding for training data
encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length), dtype="float32")

# defining shapes of input and target sequence of decoder after padding for training data
decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length), dtype="float32")
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32")

# Adding training data
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t] = input_token_index[char]
    encoder_input_data[i, t+1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            decoder_target_data[i, t-1, target_token_index[char]] = 1.0
    decoder_input_data[i, t + 1: ] = target_token_index[" "]
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0

# defining shapes of input sequence of encoder after padding for validation data
val_encoder_input_data = np.zeros((len(input_texts), val_max_encoder_seq_length), dtype="float32")

# defining shapes of input and target sequence of decoder after padding for validation data
val_decoder_input_data = np.zeros((len(input_texts), val_max_decoder_seq_length), dtype="float32")
val_decoder_target_data = np.zeros((len(input_texts), val_max_decoder_seq_length, num_decoder_tokens), dtype="float32")

# Adding validation data
for i, (input_text, target_text) in enumerate(zip(val_input_texts, val_target_texts)):
    for t, char in enumerate(input_text):
        val_encoder_input_data[i, t] = input_token_index[char]
    val_encoder_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        val_decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            val_decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    val_decoder_input_data[i, t + 1: ] = target_token_index[" "]
    val_decoder_target_data[i, t:, target_token_index[" "]] = 1.0

# defining shapes of input sequence of encoder after padding for testing data
test_encoder_input_data = np.zeros((len(input_texts), test_max_encoder_seq_length), dtype="float32")

# defining shapes of input and target sequence of decoder after padding for testing data
test_decoder_input_data = np.zeros((len(input_texts), test_max_decoder_seq_length), dtype="float32")
test_decoder_target_data = np.zeros((len(input_texts), test_max_decoder_seq_length, num_decoder_tokens), dtype="float32")

# Adding testing data
for i, (input_text, target_text) in enumerate(zip(test_input_texts, test_target_texts)):
    for t, char in enumerate(input_text):
        test_encoder_input_data[i, t] = input_token_index[char]
    test_encoder_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        test_decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            test_decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    test_decoder_input_data[i, t + 1: ] = target_token_index[" "]
    test_decoder_target_data[i, t:, target_token_index[" "]] = 1.0

# creating inverse map which maps integer to character
inverse_input_token_index = dict((i, char) for char, i in input_token_index.items())
inverse_target_token_index = dict((i, char) for char, i in target_token_index.items())

print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for train inputs:", max_encoder_seq_length)
print("Max sequence length for train outputs:", max_decoder_seq_length)
print("Max sequence length for val inputs:", val_max_encoder_seq_length)
print("Max sequence length for val outputs:", val_max_decoder_seq_length)
print("Max sequence length for test inputs:", test_max_encoder_seq_length)
print("Max sequence length for test outputs:", test_max_decoder_seq_length)
print(input_characters)
print(target_characters)
print(input_token_index)
print(target_token_index)
print(encoder_input_data[10])
print(decoder_input_data[10])
print(decoder_target_data[10])


class TransliterationModel(object):
  def __init__(self, config):
    self.config = config

  def train_and_evaluate(self, encoder_input_data, decoder_input_data, decoder_target_data,
                         val_encoder_input_data, val_target_texts, test_encoder_input_data, test_target_texts):
    # Encoder
    encoder_inputs = Input(shape=(None, ),name = 'Encoder_inputs')

    # Embedding layer: (num_encoder_tokens, input_embedding_size)
    encoder_embedded =  Embedding(num_encoder_tokens, self.config.input_embedding_size,
                         mask_zero=True, name='Encoder_embeddings')(encoder_inputs)
    encoder_outputs = encoder_embedded

    # Adding encoder layers and storing encoder states according to cell type
    if self.config.cell_type == 'RNN':
      encoder_layers = [SimpleRNN(self.config.hidden_units, 
                                  dropout=self.config.dropout, 
                                  return_sequences=True, 
                                  return_state=True, 
                                  name=f"Encoder_{layer_idx}")
                        for layer_idx in range(self.config.num_encoder_layers)]
      encoder_outputs, hidden = encoder_layers[0](encoder_outputs)
      encoder_states = [hidden]
      for layer_idx in range(1, self.config.num_encoder_layers):
        encoder_outputs, hidden = encoder_layers[layer_idx](encoder_outputs, initial_state=encoder_states)
        encoder_states = [hidden]  
    elif self.config.cell_type == 'LSTM':
      encoder_layers = [LSTM(self.config.hidden_units, 
                             dropout=self.config.dropout, 
                             return_sequences=True, 
                             return_state=True, 
                             name=f"Encoder_{layer_idx}")
                        for layer_idx in range(self.config.num_encoder_layers)]
      encoder_outputs, hidden, context = encoder_layers[0](encoder_outputs)
      encoder_states = [hidden, context]
      for layer_idx in range(1, self.config.num_encoder_layers):
        encoder_outputs, hidden, context = encoder_layers[layer_idx](encoder_outputs, initial_state=encoder_states)
        encoder_states = [hidden, context]
    elif self.config.cell_type == 'GRU':
      encoder_layers = [GRU(self.config.hidden_units, 
                            dropout=self.config.dropout, 
                            return_sequences=True, 
                            return_state=True, 
                            name=f"Encoder_{layer_idx}")
                        for layer_idx in range(self.config.num_encoder_layers)]
      encoder_outputs, hidden = encoder_layers[0](encoder_outputs)
      encoder_states = [hidden]
      for layer_idx in range(1, self.config.num_encoder_layers):
        encoder_outputs, hidden = encoder_layers[layer_idx](encoder_outputs, initial_state=encoder_states)
        encoder_states = [hidden]

    # Decoder
    decoder_inputs = Input(shape=(None,), name = 'Decoder_inputs')

    # Embedding layer: (num_decoder_tokens, hidden_units)
    decoder_embedded = Embedding(num_decoder_tokens, self.config.hidden_units,
                       mask_zero=True, name='Decoder_embeddings')(decoder_inputs)
    decoder_outputs = decoder_embedded

    # Adding decoder layers and storing decoder states according to cell type
    if self.config.cell_type == 'RNN':
      decoder_layers = [SimpleRNN(self.config.hidden_units, 
                                  dropout=self.config.dropout, 
                                  return_sequences=True, 
                                  return_state=True, 
                                  name=f"Decoder_{layer_idx}")
                        for layer_idx in range(self.config.num_decoder_layers)]
      decoder_outputs, _ = decoder_layers[0](decoder_outputs, initial_state=encoder_states)
      for layer_idx in range(1, self.config.num_decoder_layers):
        decoder_outputs, _ = decoder_layers[layer_idx](decoder_outputs, initial_state = encoder_states)
    if self.config.cell_type == 'LSTM':
      decoder_layers = [LSTM(self.config.hidden_units, 
                             dropout=self.config.dropout, 
                             return_sequences=True, 
                             return_state=True, 
                             name=f"Decoder_{layer_idx}")
                        for layer_idx in range(self.config.num_decoder_layers)]
      decoder_outputs, _, _ = decoder_layers[0](decoder_outputs, initial_state=encoder_states)
      for layer_idx in range(1, self.config.num_decoder_layers):
        decoder_outputs, _, _ = decoder_layers[layer_idx](decoder_outputs, initial_state = encoder_states)
    elif self.config.cell_type == 'GRU':
      decoder_layers = [GRU(self.config.hidden_units, 
                            dropout=self.config.dropout, 
                            return_sequences=True, 
                            return_state=True, 
                            name=f"Decoder_{layer_idx}")
                        for layer_idx in range(self.config.num_decoder_layers)]
      decoder_outputs, _ = decoder_layers[0](decoder_outputs, initial_state=encoder_states)
      for layer_idx in range(1, self.config.num_decoder_layers):
        decoder_outputs, _ = decoder_layers[layer_idx](decoder_outputs, initial_state=encoder_states)
    decoder_outputs = Dense(num_decoder_tokens, activation='softmax', name='dense')(decoder_outputs)

    # Defining our Seq2seq model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    optimizer = Adam(learning_rate=self.config.learning_rate, beta_1=0.9, beta_2=0.999)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer,
                  metrics=['accuracy'])
    model.fit(
        [encoder_input_data, decoder_input_data],
        decoder_target_data,
        batch_size=self.config.batch_size,
        epochs=self.config.epochs,
        validation_data=([val_encoder_input_data, val_decoder_input_data], val_decoder_target_data)
    )

    
    # Wrap Encoder Decoder
    encoder_inputs = model.input[0]
    if self.config.cell_type in ['RNN', 'GRU']:
      encoder_outputs, hidden_state = model.get_layer(f'Encoder_{self.config.num_encoder_layers-1}').output
      encoder_states = [hidden_state]
      encoder = Model(encoder_inputs, encoder_states)
      decoder_inputs = model.input[1]
      decoder_outputs = model.get_layer('Decoder_embeddings')(decoder_inputs)
      decoder_states_inputs = []
      decoder_states = []
      for i in range(self.config.num_decoder_layers):
        decoder_hidden = keras.Input(shape=(self.config.hidden_units,))
        states = [decoder_hidden]
        decoder_outputs, hidden_state_decoder = model.get_layer(f'Decoder_{i}')(decoder_outputs, initial_state=states)
        decoder_states += [hidden_state_decoder]
        decoder_states_inputs += states
    elif self.config.cell_type == 'LSTM':
      encoder_outputs, hidden_state, context_state = model.get_layer(f'Encoder_{self.config.num_encoder_layers-1}').output
      encoder_states = [hidden_state, context_state]
      encoder = Model(encoder_inputs, encoder_states)
      decoder_inputs = model.input[1]  # input_1
      decoder_outputs = model.get_layer('Decoder_embeddings')(decoder_inputs)
      decoder_states_inputs = []
      decoder_states = []
      for i in range(self.config.num_decoder_layers):
        decoder_hidden = keras.Input(shape=(self.config.hidden_units,))
        decoder_context = keras.Input(shape=(self.config.hidden_units,))
        states = [decoder_hidden, decoder_context]
        decoder = model.get_layer(f'Decoder_{i}')
        decoder_outputs, hidden_state_decoder, context_state_decoder = decoder(decoder_outputs, initial_state=states)
        decoder_states += [hidden_state_decoder, context_state_decoder]
        decoder_states_inputs += states
    decoder_dense = model.get_layer('dense')
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
 
    #calculating test accuracy
    total, correct = 0, 0
    input_list,output_list, target_list = [], [], []
    for i in range(len(test_texts)):
      curr_input =  test_input_texts[i]
      input_list.append(curr_input.replace(' ', '').replace('S', '').replace('E', ''))
      output = self.decode_to_text(test_encoder_input_data[i:i+1], encoder, decoder)
      target = test_target_texts[i][1:len(test_target_texts[i])-1]
      output = output[0:len(output)-1]
      output = output.replace(' ', '').replace('S', '').replace('E', '')
      target = target.replace(' ', '').replace('S', '').replace('E', '')
      output_list.append(output)
      target_list.append(target)
      
      if output == target:
        correct += 1
      total += 1
      test_word_accuracy = correct / total
      
    word_test_accuracy = correct / total
    print("Test Accuracy: ",word_test_accuracy )

    # Making predictions_vanilla.csv
    df = pd.DataFrame({'inputs': input_list,'targets': target_list,'predictions': output_list})
    df.to_csv('predictions_vanilla.csv')
    
    
  def decode_to_text(self, inputs, encoder, decoder):
    encoder_states = [encoder.predict(inputs) for _ in range(self.config.num_decoder_layers)]
    target = np.array([[target_token_index['S']]])
    sentence, done = "", False
    beam_width = 1 if self.config.decoding_strategy == 'greedy' else self.config.beam_width
    sentence = self.beam_search_decoder(inputs, encoder, decoder, beam_width)
    return sentence
  

  def beam_search_decoder(self, inputs, encoder, decoder, beam_width):
        
        done, decoded_sentence = False, ""


        # Get encoder states
        encoder_states = [encoder.predict(inputs) for _ in range(self.config.num_decoder_layers)]

        # Decoder input begins with Start Token "S"
        target_sequence = np.array([[target_token_index["S"]]])

        # sum_of_log_probs (score), flag for end of current sequence, target_sequence, states , sequence_token, sequence_char
        sequences = [[0.0, 0,  target_sequence, encoder_states,  list(),list()]]
        while not done:
            candidates = list()
            for i in range(len(sequences)):
              output = decoder.predict([sequences[i][2]] + sequences[i][3])
              output_tokens, states = output[0], output[1:]
              prob = output_tokens[0,-1,:]
              
              score, flag, _, _, sequence_token, sequence_char = sequences[i]
              
              if flag == 0:
                for j in range(len(inverse_target_token_index)):
                  char = inverse_target_token_index[j]
                  target_sequence = np.array([[j]])
                  candidate = [score - np.log(prob[j]), 0, target_sequence, states,  sequence_token + [j] , sequence_char + [char] ]
                  candidates.append(candidate)
            sorted_candidates = sorted(candidates, key=lambda x:x[0])
            k = min(beam_width, len(sorted_candidates))
            sequences = sorted_candidates[:k]
            done = True
           
            for sequence in range(len(sequences)):
                score, flag, tgt_seq, states, sequence_token, sequence_char = sequences[sequence]
                if (len(sequence_char) > max_decoder_seq_length) or (sequence_char[-1] == "E"): 
                  flag = 1
                sequences[sequence][1] = flag
                done = False if flag == 0 else done
            if sequences[0][-1][-1]=="E": 
              done = True
        top_decoded_sentence = ''.join(sequences[0][5])
        return top_decoded_sentence

Number of training samples:  44202
Number of validation samples:  4358
Number of testing samples:  4502
Number of samples: 44202
Number of unique input tokens: 27
Number of unique output tokens: 66
Max sequence length for train inputs: 20
Max sequence length for train outputs: 23
Max sequence length for val inputs: 18
Max sequence length for val outputs: 18
Max sequence length for test inputs: 16
Max sequence length for test outputs: 19
[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
[' ', 'E', 'S', 'ँ', 'ं', 'ः', 'अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ऑ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', '़', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'ॅ', 'े', 'ै', 'ॉ', 'ो', 'ौ', '्', 'ॐ']
{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k

In [9]:
# Best hyperparameter configuration
cfg_dict = {'batch_size': 256, 'beam_width': 5, 'cell_type': 'GRU', 'decoding_strategy': 'greedy', 'dropout': 0.3, 'epochs': 15, 'hidden_units': 512, 'input_embedding_size': 256, 'learning_rate': 0.0005, 'num_decoder_layers': 3, 'num_encoder_layers': 1}

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

cfg = dotdict(cfg_dict)
model_transliteration = TransliterationModel(cfg)
model_transliteration.train_and_evaluate(encoder_input_data,decoder_input_data,decoder_target_data,val_encoder_input_data, val_target_texts, test_encoder_input_data, test_target_texts)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test Accuracy:  0.38360728565082186
