# Building your Recurrent Neural Network: Step by Step

In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
with open("dinos.txt", "r") as file:
    data = file.readlines()
    data = '<eos>'.join(data).lower()
    data = data.split('<eos>')
    random.shuffle(data)

chars = ''.join(data)
vocab = sorted(list(set(chars)))

In [3]:
data

['serendipaceratops\n',
 'peishansaurus\n',
 'pneumatoarthrus\n',
 'vulcanodon\n',
 'dimodosaurus\n',
 'coronosaurus\n',
 'shuangbaisaurus\n',
 'eugongbusaurus\n',
 'canardia\n',
 'megalosaurus\n',
 'kittysaurus\n',
 'saichania\n',
 'arcusaurus\n',
 'dandakosaurus\n',
 'siamodracon\n',
 'leptospondylus\n',
 'cheneosaurus\n',
 'opisthocoelicaudia\n',
 'gongpoquansaurus\n',
 'megadontosaurus\n',
 'adamantisaurus\n',
 'quilmesaurus\n',
 'notohypsilophodon\n',
 'diabloceratops\n',
 'pyroraptor\n',
 'loncosaurus\n',
 'aragosaurus\n',
 'mantellodon\n',
 'chuxiongosaurus\n',
 'brasileosaurus\n',
 'serikornis\n',
 'zhenyuanlong\n',
 'shidaisaurus\n',
 'mantellisaurus\n',
 'auroraceratops\n',
 'archaeornis\n',
 'yibinosaurus\n',
 'zupaysaurus\n',
 'vectensia\n',
 'adeopapposaurus\n',
 'velociraptor\n',
 'syntarsus\n',
 'pachyrhinosaurus\n',
 'byronosaurus\n',
 'avaceratops\n',
 'geminiraptor\n',
 'parrosaurus\n',
 'hierosaurus\n',
 'protorosaurus\n',
 'edmontonia\n',
 'trimucrodon\n',
 'xenopos

In [4]:
vocab

['\n',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [5]:
data_size = len(data)
vocab_size = len(vocab)

print('There are {} names and {} unique tokens in your data.'.format(data_size, vocab_size))

There are 1536 names and 27 unique tokens in your data.


In [6]:
char_to_idx = {ch:i for i,ch in enumerate(vocab)}
idx_to_char = {i:ch for i,ch in enumerate(vocab)}

In [7]:
char_to_idx

{'\n': 0,
 'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26}

In [8]:
idx_to_char

{0: '\n',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z'}

In [9]:
max_length_name = max([len(d) for d in data])+5
eos_id = char_to_idx['\n']

X_train = np.zeros(shape=(data_size, max_length_name, vocab_size), dtype='float32')
for i, name in enumerate(data):
    for j, character in enumerate(name):
        index = char_to_idx[character]
        X_train[i,j,index] = 1.0
    X_train[i,len(name):,eos_id] = 1.0

Y_train = np.zeros(shape=(data_size, max_length_name, vocab_size), dtype='float32')
for i in range(data_size):
    for j in range(max_length_name-1):
        Y_train[i,j,:] = X_train[i,j+1,:]
    Y_train[i,max_length_name-1,eos_id] = 1.0

In [10]:
class SimpleRNN():
    
    def __init__(self, units):
        self.units = units
        self.weights = []
        self.built = False
        
    def add_weight(self, shape):
        var_init = tf.random.normal(shape=shape, mean=0.0, stddev=0.05, dtype="float32")
        return tf.Variable(initial_value=var_init, trainable=True)
        
    def build(self, input_dims):
        self.w_xh = self.add_weight(shape=(input_dims, self.units))
        self.weights.append(self.w_xh)
        self.w_hh = self.add_weight(shape=(self.units, self.units))
        self.weights.append(self.w_hh)
        self.b = self.add_weight(shape=(1, self.units))
        self.weights.append(self.b)
        self.built = True

    def __call__(self, inputs):
        if not self.built:
            self.build(input_dims=inputs.shape[2])
        outputs = []
        h = tf.zeros(shape=(1,self.units))
        for t in range(inputs.shape[1]):
            h = tf.math.tanh(tf.matmul(inputs[:,t,:], self.w_xh) + tf.matmul(h, self.w_hh) + self.b)
            outputs.append(h)
        return tf.transpose(tf.convert_to_tensor(outputs), perm=[1,0,2])

class Dense():
    
    def __init__(self, units):
        self.units = units
        self.weights = []
        self.built = False
        
    def add_weight(self, shape):
        var_init = tf.random.normal(shape=shape, mean=0.0, stddev=0.05, dtype="float32")
        return tf.Variable(initial_value=var_init, trainable=True)
        
    def build(self, input_dims):
        self.w = self.add_weight(shape=(input_dims, self.units))
        self.weights.append(self.w)
        self.b = self.add_weight(shape=(1, self.units))
        self.weights.append(self.b)
        self.built = True

    def __call__(self, inputs):
        if not self.built:
            self.build(input_dims=inputs.shape[2])
        z = tf.einsum('dtj, ji ->dti',inputs,self.w) + self.b
        u = tf.math.exp(z)
        return u/tf.math.reduce_sum(u, axis=2, keepdims=True)

In [11]:
class CategoricalCrossentropy():
                    
    def __call__(self, y_true, y_pred):
        return -tf.math.reduce_mean(tf.math.reduce_mean(tf.math.reduce_sum(y_true*tf.math.log(y_pred), axis=2), axis=1), axis=0)

In [12]:
class CategoricalAccuracy():
                    
    def __call__(self, y_true, y_pred):
        y_pred_max = tf.cast(tf.math.argmax(y_pred, axis=2), dtype='float32')
        y_true_max = tf.cast(tf.math.argmax(y_true, axis=2), dtype='float32')
        return tf.math.reduce_mean(tf.math.reduce_mean(1-tf.square(tf.sign(y_true_max-y_pred_max)), axis=1), axis=0)

In [13]:
class Adam():

    def __init__(self, model, learning_rate, beta_1, beta_2, epsilon):
        self.model = model
        self.learning_rate = learning_rate
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.epsilon= epsilon
        self.stop_training = False
        self.weights = []
        self.built = False
        
    def add_weight(self, shape):
        w_init = tf.zeros(shape=shape, dtype="float32")
        return  tf.Variable(initial_value=w_init, trainable=False)
    
    def build(self):
        for weight in self.model.weights:
            m = self.add_weight(shape=weight.shape)
            v = self.add_weight(shape=weight.shape)
            self.weights.append([m,v])
        self.built = True
            
    def apply_gradients(self, grads_and_vars):
        if not self.built:
            self.build()
        list_grads_and_vars = list(grads_and_vars)
        for i in range(len(list_grads_and_vars)):
            grad, var = list_grads_and_vars[i]
            m = self.weights[i][0]
            v = self.weights[i][1]
            self.weights[i][0].assign(self.beta_1*m + (1-self.beta_1)*grad)  
            self.weights[i][1].assign(self.beta_2*v + (1-self.beta_2)*grad*grad)
            m_ = (1/(1-self.beta_1))*self.weights[i][0]
            v_ = (1/(1-self.beta_2))*self.weights[i][1]
            var.assign(var - self.learning_rate*m_/(tf.math.sqrt(v_)+self.epsilon))

In [14]:
class ProgbarPrint():

    def __init__(self, model):
        self.model = model
    
    def on_epoch_begin(self, epoch):
        self.start_time = tf.timestamp()
        
    def on_epoch_end(self, epoch, logs):
        now = tf.timestamp()
        time = now - self.start_time
        tf.print('Epochs {}/{} - Loss: {} - Metric: {}'.format(epoch+1, self.model.epochs, logs['loss'], logs['metric']))
        tf.print('----- {}s -----'.format(tf.round(1000*time)/1000))
        
class ReduceLROnPlateau():
        
    def __init__(self, model, patience, error, reduce_factor, min_learning_rate):
        self.model = model
        self.patience = patience
        self.error = error
        self.reduce_factor = reduce_factor
        self.min_learning_rate = min_learning_rate
                        
    def on_epoch_end(self, epoch, logs):
        if epoch==0:
            self.loss = logs['loss']
            self.non_decreasing_epochs = 0
        else:
            if ((self.loss-logs['loss'])>self.error):
                self.loss = logs['loss']
                self.non_decreasing_epochs = 0
            else:
                self.non_decreasing_epochs = self.non_decreasing_epochs+1
        if (self.non_decreasing_epochs == self.patience):
            if (self.model.optimizer.learning_rate>self.min_learning_rate):
                self.model.optimizer.learning_rate = self.reduce_factor*self.model.optimizer.learning_rate
                self.non_decreasing_epochs = 0
        
class EarlyStopping():
        
    def __init__(self, model, patience, error):
        self.model = model
        self.patience = patience
        self.error = error
        
    def on_epoch_end(self, epoch, logs):
        if epoch==0:
            self.loss = logs['loss']
            self.non_decreasing_epochs = 0
        else:
            if ((self.loss-logs['loss'])>self.error):
                self.loss = logs['loss']
                self.non_decreasing_epochs = 0
            else:
                self.non_decreasing_epochs = self.non_decreasing_epochs+1
        if (self.non_decreasing_epochs == self.patience):
            self.model.optimizer.stop_training = True

In [15]:
class Model():
    
    def __init__(self):
        self.h1 = SimpleRNN(units=50)
        self.h2 = Dense(units=Y_train.shape[2])
        self.layers = [self.h1, self.h2]
        self.weights = []
        self.built = False
     
    def build(self):
        for layer in self.layers:
            for weight in layer.weights:
                self.weights.append(weight)
        self.built = True
        
    def __call__(self, inputs): 
        a1 = self.h1(inputs)
        y = self.h2(a1)
        if not self.built:
            self.build()
        return y 
                
    def train_step(self, X, Y):
        num_batches = X.shape[0]//self.batch_size
        for batch in range(num_batches+1):
            if batch<num_batches:
                X_batch = X[batch*self.batch_size:(batch+1)*self.batch_size]
                Y_batch = Y[batch*self.batch_size:(batch+1)*self.batch_size]
            else:
                X_batch = X[num_batches*self.batch_size:]
                Y_batch = Y[num_batches*self.batch_size:]
            with tf.GradientTape() as tape:
                H = self(X_batch)
                loss = self.loss(Y_batch, H)
            grads = tape.gradient(loss, self.weights)
            self.optimizer.apply_gradients(zip(grads, self.weights))
        H = self(X)
        loss = self.loss(Y, H)
        metric = self.metric(Y, H)
        logs = {'loss': loss,
                'metric': metric}
        return logs
        
    def fit(self, X, Y, epochs=1000, learning_rate=0.01, batch_size=64):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size 
        self.loss = CategoricalCrossentropy()
        self.metric = CategoricalAccuracy()
        self.optimizer = Adam(model=self, learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-07) 
        self.callbacks = [ProgbarPrint(model=self),
                          ReduceLROnPlateau(model=self, patience=200, error=0.0001, reduce_factor=0.1, min_learning_rate=0.001),
                          EarlyStopping(model=self, patience=500, error=0.0001)]
        tf.print('Train on {} samples'.format(X.shape[0]))
        for epoch in range(epochs):
            self.callbacks[0].on_epoch_begin(epoch)
            logs = self.train_step(tf.constant(X, dtype="float32"), tf.constant(Y, dtype="float32"))
            for callback in self.callbacks:
                callback.on_epoch_end(epoch, logs)
            if self.optimizer.stop_training:
                break
            
    def predict(self, inputs):
        return self(tf.constant(inputs, dtype="float32")).numpy()
        
    def evaluate(self, X, Y):
        loss = self.loss(tf.constant(Y, dtype="float32"), self(tf.constant(X, dtype="float32")))
        metric = self.metric(tf.constant(Y, dtype="float32"), self(tf.constant(X, dtype="float32")))
        loss_numpy = loss.numpy()
        metric_numpy = metric.numpy()
        tf.print('Loss: {} - Metric: {}'.format(loss_numpy, metric_numpy))
        return [loss_numpy, metric_numpy]

In [16]:
model = Model()

model.fit(X_train, Y_train)

Train on 1536 samples
Epochs 1/1000 - Loss: 1.8629859685897827 - Metric: 0.6107177734375
----- 0.853s -----
Epochs 2/1000 - Loss: 1.7007414102554321 - Metric: 0.6677042841911316
----- 0.789s -----
Epochs 3/1000 - Loss: 1.0779882669448853 - Metric: 0.6991780400276184
----- 0.767s -----
Epochs 4/1000 - Loss: 0.9389252066612244 - Metric: 0.7223103642463684
----- 0.733s -----
Epochs 5/1000 - Loss: 0.8581793904304504 - Metric: 0.7483723759651184
----- 0.727s -----
Epochs 6/1000 - Loss: 0.8185132145881653 - Metric: 0.75201416015625
----- 0.781s -----
Epochs 7/1000 - Loss: 0.8040559887886047 - Metric: 0.7553914189338684
----- 0.848s -----
Epochs 8/1000 - Loss: 0.7582359313964844 - Metric: 0.7720133662223816
----- 0.765s -----
Epochs 9/1000 - Loss: 0.7414300441741943 - Metric: 0.78436279296875
----- 0.762s -----
Epochs 10/1000 - Loss: 0.7288547158241272 - Metric: 0.7892659306526184
----- 0.738s -----
Epochs 11/1000 - Loss: 0.7369187474250793 - Metric: 0.7849934697151184
----- 0.734s -----
Epoc

Epochs 93/1000 - Loss: 0.5513735413551331 - Metric: 0.8343505859375
----- 0.758s -----
Epochs 94/1000 - Loss: 0.5503204464912415 - Metric: 0.8348795771598816
----- 0.836s -----
Epochs 95/1000 - Loss: 0.5493125319480896 - Metric: 0.8350626826286316
----- 0.754s -----
Epochs 96/1000 - Loss: 0.5483300089836121 - Metric: 0.83538818359375
----- 0.773s -----
Epochs 97/1000 - Loss: 0.5473642349243164 - Metric: 0.8358154296875
----- 0.719s -----
Epochs 98/1000 - Loss: 0.5464163422584534 - Metric: 0.83612060546875
----- 0.757s -----
Epochs 99/1000 - Loss: 0.5454848408699036 - Metric: 0.8364054560661316
----- 0.725s -----
Epochs 100/1000 - Loss: 0.5445546507835388 - Metric: 0.8365071415901184
----- 0.736s -----
Epochs 101/1000 - Loss: 0.543606698513031 - Metric: 0.8369954228401184
----- 0.793s -----
Epochs 102/1000 - Loss: 0.5426365733146667 - Metric: 0.8369954228401184
----- 0.715s -----
Epochs 103/1000 - Loss: 0.5416595339775085 - Metric: 0.8373005986213684
----- 0.717s -----
Epochs 104/1000 -

----- 0.759s -----
Epochs 185/1000 - Loss: 0.4993503987789154 - Metric: 0.849853515625
----- 0.747s -----
Epochs 186/1000 - Loss: 0.49977168440818787 - Metric: 0.8497111201286316
----- 0.73s -----
Epochs 187/1000 - Loss: 0.4998803436756134 - Metric: 0.8498942255973816
----- 0.745s -----
Epochs 188/1000 - Loss: 0.49956226348876953 - Metric: 0.8500162959098816
----- 0.772s -----
Epochs 189/1000 - Loss: 0.49902796745300293 - Metric: 0.8499552607536316
----- 0.775s -----
Epochs 190/1000 - Loss: 0.4980524480342865 - Metric: 0.8504435420036316
----- 0.742s -----
Epochs 191/1000 - Loss: 0.49735260009765625 - Metric: 0.8507283329963684
----- 0.795s -----
Epochs 192/1000 - Loss: 0.4963715374469757 - Metric: 0.8512369990348816
----- 0.738s -----
Epochs 193/1000 - Loss: 0.49576857686042786 - Metric: 0.8512166142463684
----- 0.735s -----
Epochs 194/1000 - Loss: 0.4949217140674591 - Metric: 0.85162353515625
----- 1.173s -----
Epochs 195/1000 - Loss: 0.4945147931575775 - Metric: 0.8517863154411316
-

----- 0.742s -----
Epochs 276/1000 - Loss: 0.4772357642650604 - Metric: 0.8562214970588684
----- 0.741s -----
Epochs 277/1000 - Loss: 0.47860726714134216 - Metric: 0.85577392578125
----- 0.738s -----
Epochs 278/1000 - Loss: 0.486133337020874 - Metric: 0.8536173701286316
----- 0.734s -----
Epochs 279/1000 - Loss: 0.5236865878105164 - Metric: 0.8419392704963684
----- 0.737s -----
Epochs 280/1000 - Loss: 0.49306800961494446 - Metric: 0.8512369990348816
----- 0.751s -----
Epochs 281/1000 - Loss: 0.48156437277793884 - Metric: 0.8544108271598816
----- 0.775s -----
Epochs 282/1000 - Loss: 0.48410412669181824 - Metric: 0.8539225459098816
----- 0.761s -----
Epochs 283/1000 - Loss: 0.49358463287353516 - Metric: 0.8507487177848816
----- 0.729s -----
Epochs 284/1000 - Loss: 0.49190643429756165 - Metric: 0.8514607548713684
----- 0.735s -----
Epochs 285/1000 - Loss: 0.48143765330314636 - Metric: 0.8544921875
----- 0.747s -----
Epochs 286/1000 - Loss: 0.4777616560459137 - Metric: 0.8554890751838684
-

----- 0.76s -----
Epochs 367/1000 - Loss: 0.4676876962184906 - Metric: 0.8583170771598816
----- 0.76s -----
Epochs 368/1000 - Loss: 0.4673287868499756 - Metric: 0.85797119140625
----- 0.756s -----
Epochs 369/1000 - Loss: 0.4677465856075287 - Metric: 0.8579508662223816
----- 0.75s -----
Epochs 370/1000 - Loss: 0.46824121475219727 - Metric: 0.8573201298713684
----- 0.739s -----
Epochs 371/1000 - Loss: 0.4674740731716156 - Metric: 0.8570963740348816
----- 0.723s -----
Epochs 372/1000 - Loss: 0.46663951873779297 - Metric: 0.8571370244026184
----- 0.739s -----
Epochs 373/1000 - Loss: 0.47228431701660156 - Metric: 0.8565470576286316
----- 0.927s -----
Epochs 374/1000 - Loss: 0.47446155548095703 - Metric: 0.8554890751838684
----- 0.74s -----
Epochs 375/1000 - Loss: 0.46911969780921936 - Metric: 0.8568318486213684
----- 0.741s -----
Epochs 376/1000 - Loss: 0.47641924023628235 - Metric: 0.854736328125
----- 0.743s -----
Epochs 377/1000 - Loss: 0.47541508078575134 - Metric: 0.85546875
----- 0.72

----- 0.831s -----
Epochs 458/1000 - Loss: 0.4586373269557953 - Metric: 0.8601887822151184
----- 0.78s -----
Epochs 459/1000 - Loss: 0.45546814799308777 - Metric: 0.8612874150276184
----- 0.809s -----
Epochs 460/1000 - Loss: 0.456211656332016 - Metric: 0.8614705204963684
----- 0.795s -----
Epochs 461/1000 - Loss: 0.4553562104701996 - Metric: 0.8614094853401184
----- 0.798s -----
Epochs 462/1000 - Loss: 0.4549527168273926 - Metric: 0.8617756962776184
----- 0.798s -----
Epochs 463/1000 - Loss: 0.4521547257900238 - Metric: 0.86224365234375
----- 0.774s -----
Epochs 464/1000 - Loss: 0.4537382423877716 - Metric: 0.86260986328125
----- 0.761s -----
Epochs 465/1000 - Loss: 0.45313021540641785 - Metric: 0.8623453974723816
----- 0.779s -----
Epochs 466/1000 - Loss: 0.45276081562042236 - Metric: 0.862548828125
----- 0.774s -----
Epochs 467/1000 - Loss: 0.45141348242759705 - Metric: 0.8625285029411316
----- 0.75s -----
Epochs 468/1000 - Loss: 0.4544887840747833 - Metric: 0.86199951171875
----- 0.

----- 0.724s -----
Epochs 549/1000 - Loss: 0.4499329626560211 - Metric: 0.8635050654411316
----- 0.731s -----
Epochs 550/1000 - Loss: 0.45232895016670227 - Metric: 0.8628336787223816
----- 0.745s -----
Epochs 551/1000 - Loss: 0.4470764398574829 - Metric: 0.8642578125
----- 0.737s -----
Epochs 552/1000 - Loss: 0.4503974914550781 - Metric: 0.8626912236213684
----- 0.727s -----
Epochs 553/1000 - Loss: 0.45059147477149963 - Metric: 0.8629353642463684
----- 0.753s -----
Epochs 554/1000 - Loss: 0.4502764642238617 - Metric: 0.8628336787223816
----- 0.738s -----
Epochs 555/1000 - Loss: 0.45406675338745117 - Metric: 0.8614705204963684
----- 0.736s -----
Epochs 556/1000 - Loss: 0.4618602693080902 - Metric: 0.8594157099723816
----- 0.739s -----
Epochs 557/1000 - Loss: 0.4735022485256195 - Metric: 0.8559367060661316
----- 0.73s -----
Epochs 558/1000 - Loss: 0.47088584303855896 - Metric: 0.8568522334098816
----- 0.721s -----
Epochs 559/1000 - Loss: 0.4664480686187744 - Metric: 0.8578287959098816
--

----- 0.849s -----
Epochs 640/1000 - Loss: 0.4466297924518585 - Metric: 0.86297607421875
----- 0.746s -----
Epochs 641/1000 - Loss: 0.45055094361305237 - Metric: 0.8621419072151184
----- 0.759s -----
Epochs 642/1000 - Loss: 0.4572724401950836 - Metric: 0.8600260615348816
----- 0.784s -----
Epochs 643/1000 - Loss: 0.4548821449279785 - Metric: 0.8610636591911316
----- 0.753s -----
Epochs 644/1000 - Loss: 0.45175495743751526 - Metric: 0.861328125
----- 0.738s -----
Epochs 645/1000 - Loss: 0.4535259008407593 - Metric: 0.8604736328125
----- 0.76s -----
Epochs 646/1000 - Loss: 0.45785439014434814 - Metric: 0.8590291142463684
----- 0.752s -----
Epochs 647/1000 - Loss: 0.4671598970890045 - Metric: 0.8560994267463684
----- 0.773s -----
Epochs 648/1000 - Loss: 0.4576960504055023 - Metric: 0.8592529296875
----- 0.77s -----
Epochs 649/1000 - Loss: 0.44880518317222595 - Metric: 0.8620198369026184
----- 0.752s -----
Epochs 650/1000 - Loss: 0.44308510422706604 - Metric: 0.8631591796875
----- 0.726s -

----- 0.724s -----
Epochs 731/1000 - Loss: 0.43977198004722595 - Metric: 0.8653971552848816
----- 0.819s -----
Epochs 732/1000 - Loss: 0.43902459740638733 - Metric: 0.8650105595588684
----- 0.768s -----
Epochs 733/1000 - Loss: 0.4387001097202301 - Metric: 0.8658650517463684
----- 0.762s -----
Epochs 734/1000 - Loss: 0.43735471367836 - Metric: 0.8662922978401184
----- 0.859s -----
Epochs 735/1000 - Loss: 0.43741750717163086 - Metric: 0.8658244013786316
----- 1.074s -----
Epochs 736/1000 - Loss: 0.43977227807044983 - Metric: 0.8656005859375
----- 1.109s -----
Epochs 737/1000 - Loss: 0.43909287452697754 - Metric: 0.8653564453125
----- 1.38s -----
Epochs 738/1000 - Loss: 0.43637290596961975 - Metric: 0.8659871220588684
----- 1.032s -----
Epochs 739/1000 - Loss: 0.4372759759426117 - Metric: 0.86505126953125
----- 1.098s -----
Epochs 740/1000 - Loss: 0.4409407079219818 - Metric: 0.8655802607536316
----- 0.852s -----
Epochs 741/1000 - Loss: 0.44164538383483887 - Metric: 0.8642374873161316
---

----- 0.725s -----
Epochs 822/1000 - Loss: 0.4422447979450226 - Metric: 0.8635457158088684
----- 0.706s -----
Epochs 823/1000 - Loss: 0.4369446337223053 - Metric: 0.8656005859375
----- 0.799s -----
Epochs 824/1000 - Loss: 0.43362656235694885 - Metric: 0.8665364384651184
----- 0.737s -----
Epochs 825/1000 - Loss: 0.4316519796848297 - Metric: 0.8677978515625
----- 0.74s -----
Epochs 826/1000 - Loss: 0.43371906876564026 - Metric: 0.8674113154411316
----- 0.728s -----
Epochs 827/1000 - Loss: 0.4309968948364258 - Metric: 0.8676554560661316
----- 0.728s -----
Epochs 828/1000 - Loss: 0.43065890669822693 - Metric: 0.8678995966911316
----- 0.714s -----
Epochs 829/1000 - Loss: 0.42904844880104065 - Metric: 0.8682861328125
----- 0.704s -----
Epochs 830/1000 - Loss: 0.43002942204475403 - Metric: 0.8679606318473816
----- 0.704s -----
Epochs 831/1000 - Loss: 0.43142732977867126 - Metric: 0.86724853515625
----- 0.831s -----
Epochs 832/1000 - Loss: 0.42833948135375977 - Metric: 0.8685302734375
----- 0

----- 0.724s -----
Epochs 913/1000 - Loss: 0.43634772300720215 - Metric: 0.8662109375
----- 0.722s -----
Epochs 914/1000 - Loss: 0.44896364212036133 - Metric: 0.86212158203125
----- 0.711s -----
Epochs 915/1000 - Loss: 0.4576941728591919 - Metric: 0.8594767451286316
----- 0.743s -----
Epochs 916/1000 - Loss: 0.45166802406311035 - Metric: 0.8616536259651184
----- 0.768s -----
Epochs 917/1000 - Loss: 0.45692959427833557 - Metric: 0.8594767451286316
----- 0.756s -----
Epochs 918/1000 - Loss: 0.45469486713409424 - Metric: 0.8599039912223816
----- 0.725s -----
Epochs 919/1000 - Loss: 0.43829312920570374 - Metric: 0.8662516474723816
----- 0.734s -----
Epochs 920/1000 - Loss: 0.4384554922580719 - Metric: 0.8653767704963684
----- 0.728s -----
Epochs 921/1000 - Loss: 0.4307072162628174 - Metric: 0.867919921875
----- 0.72s -----
Epochs 922/1000 - Loss: 0.4393278658390045 - Metric: 0.8641154170036316
----- 0.721s -----
Epochs 923/1000 - Loss: 0.4431510269641876 - Metric: 0.8636271357536316
----- 

In [17]:
for i in range(100):
    name = ''
    for idx in np.argmax(X_train[i,:,:], axis=1):
        name = name + idx_to_char[idx]
        if idx==0:
            break
    name_prediction = name[0]
    for idx in np.argmax(model.predict(X_train)[i,:,:], axis=1):
        name_prediction = name_prediction + idx_to_char[idx]
        if idx==0:
            break
    print('Original Name: {} - Predicted Name: {}'.format(name.split()[0], name_prediction.split()[0]))

Original Name: serendipaceratops - Predicted Name: salenaosaseratops
Original Name: peishansaurus - Predicted Name: panshantaurus
Original Name: pneumatoarthrus - Predicted Name: paeuratossasous
Original Name: vulcanodon - Predicted Name: veatanoson
Original Name: dimodosaurus - Predicted Name: danohosaurus
Original Name: coronosaurus - Predicted Name: chlhsosaurus
Original Name: shuangbaisaurus - Predicted Name: saaanggisaaurus
Original Name: eugongbusaurus - Predicted Name: eoclngsasaurus
Original Name: canardia - Predicted Name: chmamoiau
Original Name: megalosaurus - Predicted Name: magalosaurus
Original Name: kittysaurus - Predicted Name: kuntasaurus
Original Name: saichania - Predicted Name: sauchanoa
Original Name: arcusaurus - Predicted Name: anchsaurus
Original Name: dandakosaurus - Predicted Name: dacdanisaurus
Original Name: siamodracon - Predicted Name: sanmisoittp
Original Name: leptospondylus - Predicted Name: laiiolaondylus
Original Name: cheneosaurus - Predicted Name: c