In [7]:
import tensorflow as tf
import numpy as np
import sys

Time to load data



1.   Load all data
2.   Make One Hot Encoding
3.   Prepare input and output sequence

a b i l i t y

a -> b

b -> i, a,b -> i

i -> l, a,b,i -> l

l -> i, a,b,i,l -> i

i -> t, a,b,i,l,i -> t

t -> y, a,b,i,l,i,t -> y

input sequence **a b i l i t**

output sequence **b i l i t y**



In [8]:
def prepare_data(word_filename,alphabet_filename):
    f = open(word_filename)
    line = f.readline()
    all_words = []
    while line:
        word = line.strip("\n")
        if(len(word)>2):# words with 3 or more characters
            all_words.append(word)
        line = f.readline()
    f.close()
    # sorted_words = sorted(all_words,key=lambda x:len(x))
    # np.random.shuffle(all_words)
    f = open(alphabet_filename)
    line = f.readline()
    alphabets = []
    while line:
        character = line.strip("\n")
        alphabets.append(character)
        line = f.readline()
    f.close()
    return all_words,alphabets

def convert_to_onehot(character,all_characters):
    vector = np.zeros([len(all_characters)])
    idx = all_characters.index(character)
    vector[idx] = 1
    return vector

def make_train_and_target(word,all_characters):
    word = word.lower().replace("'","")
    x = []
    y = []
    for i in range(len(word)-1):
        ch = word[i]
        x.append(convert_to_onehot(ch,all_characters))
        next_ch = word[i+1]
        y.append(convert_to_onehot(next_ch,all_characters))
    return x,y

Test Data loading

In [9]:
words,characters = prepare_data('/content/sample_data/words.txt','/content/sample_data/alphabates.txt')
print(words[:10])
print(characters)
x,y = make_train_and_target(words[100],characters)
print(words[100])
print(x)
print(y)

['ability', 'able', 'about', 'above', 'accept', 'according', 'account', 'across', 'act', 'action']
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
blue
[array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0.])]
[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0.]), array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])]


Create RNN Encoder with three hardcoded layers

In [10]:
class MYRNN(tf.keras.Model):
  def __init__(self,Nc):
    super(MYRNN,self).__init__()
    # Gated Recurrent Unit, extension of regular RNN, works like LSTM
    self.rnn_1 = tf.keras.layers.GRU(16,return_sequences=True) # return sequences returns all outputs from timesteps
    self.rnn_2 = tf.keras.layers.GRU(32,return_sequences=True)
    self.rnn_3 = tf.keras.layers.GRU(64,return_sequences=True) # set resturn_sequence = False for M:1 model
    self.output_layer = tf.keras.layers.Dense(Nc,activation='softmax') # Time distributed Dense
    print("Initalized")
  
  def call(self,inputs):# inputs 3D array N,T,F
    rnn_1_out = self.rnn_1(inputs) # N,T,16
    rnn_2_out = self.rnn_2(rnn_1_out) # N,T,32
    rnn_3_out = self.rnn_3(rnn_2_out) # N,T,64
    output = self.output_layer(rnn_3_out) # N,T,Nc - in our example Nc = 26
    return output

Use RNN in models, optimize for next character prediction

In [11]:
class CharacterModel:
  def __init__(self,Nc,save_dir):
    self.network = MYRNN(Nc)
    self.optimizer = tf.keras.optimizers.Adam(0.0001)
    self.checkpoint = tf.train.Checkpoint(model=self.network)
    self.savepath = save_dir
    self.weight_manager = tf.train.CheckpointManager(self.checkpoint,self.savepath,max_to_keep=1)
  
  def train_step(self,x,y):
    x = tf.expand_dims(x,0) # 1,T,Nc
    y = tf.expand_dims(y,0) # 1,T,Nc
    with tf.GradientTape() as tape:
      logit = self.network(x)
      loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y,logit,from_logits=False)) # do not apply softmax anymore
      weights = self.network.trainable_variables
      grads = tape.gradient(loss,weights) # auto grad is used
    self.optimizer.apply_gradients(zip(grads,weights)) # minimize the loss
    return loss
  
  def train(self,train_data,all_characters,epochs):
    total = len(train_data)
    for e in range(epochs): # run optimization for iterations
      e_loss = 0
      np.random.shuffle(train_data) # shuffling the words
      for i in range(total):
        x,y =  make_train_and_target(train_data[i],all_characters)
        b_loss = self.train_step(x,y)
        e_loss += b_loss
        sys.stdout.write("\rBatch %d/%d Loss %0.4f word %s"%(i,total,b_loss,train_data[i]))
        sys.stdout.flush()
      e_loss /= total # average loss over 1 epoch for all batches
      self.weight_manager.save()
      print("\n Epoch %d/%d Loss %0.4f"%(e,epochs,e_loss))
  
  def load_model(self):
    restore_from = tf.train.latest_checkpoint(self.savepath)
    self.checkpoint.restore(restore_from)
    print("model restored from %s"%restore_from)
  
  def predict(self,x):
    x = tf.expand_dims(x,0)
    logit = self.network(x)
    prediction = tf.argmax(logit,-1)
    return prediction.numpy()

Time to train model

In [12]:
SAVE_IN = 'Weights/'
model = CharacterModel(len(characters),'')
model.train(words,characters,20)

Initalized


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Batch 971/972 Loss 3.5695 word though
 Epoch 0/20 Loss 3.0772
Batch 971/972 Loss 2.6072 word rise
 Epoch 1/20 Loss 2.9012
Batch 971/972 Loss 2.5007 word none
 Epoch 2/20 Loss 2.8675
Batch 971/972 Loss 2.9112 word argue
 Epoch 3/20 Loss 2.8445
Batch 971/972 Loss 2.9157 word account
 Epoch 4/20 Loss 2.8196
Batch 971/972 Loss 2.6047 word sell
 Epoch 5/20 Loss 2.7810
Batch 971/972 Loss 2.4604 word safe
 Epoch 6/20 Loss 2.7227
Batch 971/972 Loss 2.7657 word administration
 Epoch 7/20 Loss 2.6733
Batch 971/972 Loss 3.6018 word staff
 Epoch 8/20 Loss 2.6333
Batch 971/972 Loss 3.0845 word floor
 Epoch 9/20 Loss 2.5997
Batch 971/972 Loss 2.7986 word serious
 Epoch 10/20 Loss 2.5704
Batch 

**Excercise**



1.   Implement Batch Mechanism with BS > 2
2.   Change Architecture
3.   Extend code that can take more than one word (include space as character)

