In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import numpy as np
import random
import re
from collections import Counter 
from tqdm import tqdm

In [2]:
datatf = tfds.load('tiny_shakespeare')

In [3]:
shakes_train, shakes_test = datatf['train'], datatf['test']
shakes_train = shakes_train.map(lambda t: t['text'])
# shakes_train = shakes_train.map(lambda t: t.numpy())

In [4]:
i = 1
string= '123456'
s = string[:-i]
s

'12345'

In [5]:
id_counter = 1
dict_codes = {}
dict_words = {}
dict_counts = {}
word_counts = 0

In [6]:
data =[]
bunches = []
coded_data = None

window_length = 20
for c, i in enumerate(shakes_train):
  
  text = i.numpy()
  text = str(text).strip('b"')
  text = text.lower()

  text = text.replace("\\n", " ")
  text = text.replace("  ", " ")

  data = [char for char in text]
  chars = set(data)
  dict_codes = {}
  dict_chars = {}
  for i, char in enumerate(chars):
    dict_codes[char] = i
    dict_chars[i] = char

  coded_data = [[float(dict_codes[char])] for char in data]

 
print(coded_data[:20])

[[20.0], [11.0], [9.0], [25.0], [17.0], [35.0], [5.0], [11.0], [17.0], [11.0], [23.0], [13.0], [34.0], [18.0], [35.0], [7.0], [13.0], [20.0], [2.0], [9.0]]


In [7]:
input = np.array([coded_data[ i : i + 20] for i in range(len(coded_data) - 21)])
target = np.array([coded_data[i + 1: i + 21] for i in range(len(coded_data) - 21)])


In [8]:
input.shape, target.shape

((997536, 20, 1), (997536, 20, 1))

In [9]:
dataset = tf.data.Dataset.from_tensor_slices(input)
labels = tf.data.Dataset.from_tensor_slices(target)

datas = dataset.batch(64)
targets = labels.batch(64)

In [10]:
ds = tf.data.Dataset.zip((dataset, labels)).batch(64)

In [11]:
datas

<BatchDataset shapes: (None, 20, 1), types: tf.float64>

In [25]:
# We use a dynamic learning rate which decays exponantially
# As an optimiser we use adam

lr = tf.keras.optimizers.schedules.ExponentialDecay(0.001, 
                                                    5000, 
                                                    0.96,
                                                    staircase=True)
opt = tf.optimizers.Adam(lr)

In [28]:
def most_similar(code, emb):
  csf = keras.losses.CosineSimilarity()
  emb_word = emb[code]
  cos_sim = [csf(emb_word, emb[i]) if code != i else 1 for i in range(10000)]
  min_val = min(cos_sim)
  min_index = cos_sim.index(min_val)
  return dict_words_new[min_index]


In [31]:
test_sent = "I sincerely like pla"
for c, i in enumerate(test_sent):
  
  data = [char for char in test_sent]
  chars = set(data)
  dict_codes = {}
  dict_chars = {}
  for i, char in enumerate(chars):
    dict_codes[char] = i
    dict_chars[i] = char

  coded_data = [[float(dict_codes[char])] for char in data]
 
print(coded_data[:20])

test = tf.reshape(tf.convert_to_tensor(coded_data), (1,20,1))
test

[[8.0], [5.0], [7.0], [9.0], [3.0], [2.0], [11.0], [10.0], [11.0], [0.0], [12.0], [5.0], [0.0], [9.0], [4.0], [11.0], [5.0], [6.0], [0.0], [1.0]]


<tf.Tensor: shape=(1, 20, 1), dtype=float32, numpy=
array([[[ 8.],
        [ 5.],
        [ 7.],
        [ 9.],
        [ 3.],
        [ 2.],
        [11.],
        [10.],
        [11.],
        [ 0.],
        [12.],
        [ 5.],
        [ 0.],
        [ 9.],
        [ 4.],
        [11.],
        [ 5.],
        [ 6.],
        [ 0.],
        [ 1.]]], dtype=float32)>

In [32]:
def testing(model):
  pred = model(test).numpy()
  sentence = [dict_chars[int(i)] for i in pred[0][1]]

  return sentence

In [33]:
class LSTM(tf.keras.Model):
    
    def __init__(self, return_sequences=True):
        
        super(LSTM, self).__init__()
        self.rnn = tf.keras.layers.LSTM(8, return_sequences=True)
        # self.rnn = tf.keras.layers.RNN([tf.keras.layers.LSTMCell(8, return_sequences=True) for _ in range(5)], return_sequences)
        self.output_layer = tf.keras.layers.Dense(units=1, activation='sigmoid')
        
    
    def call(self, x):
        
        x = self.rnn(x)
        x = self.output_layer(x)
        return x

In [34]:
def train(model, input, target, loss_f, optimizer): 
  with tf.GradientTape() as tape: 
    prediction = model(input)
    loss = loss_f(target, prediction)
    gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  acc = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)


  return np.mean(loss.numpy()), np.mean(acc)

In [35]:
tf.keras.backend.clear_session()

num_epochs = 10
learning_rate = lr
running_average_factor = 0.95


cross_entropy_loss = tf.keras.losses.categorical_crossentropy

optimizer = opt

In [36]:
model = LSTM(return_sequences=True)

# Custom training loop
# Each epoch the model will learn on the shuffled and batched training data and will then evaluate the training step on the whole test dataset

for epoch in range(num_epochs):
  print('Epoch:__' + str(epoch))

  # tr_ds = ds.shuffle(buffer_size=128).prefetch(2)
  # te_ds = te_ds.shuffle(buffer_size=128).prefetch(2)
  train_losses = []
  train_accuracies = [] 

  running_average = 0
  batch_acc = []
  for (input, target) in tqdm(ds):

    train_l, train_acc = train(model, input, target, cross_entropy_loss, optimizer)
    running_average = (running_average_factor * running_average) + (1 - running_average_factor) * train_l
    batch_acc.append(train_acc)

  pred = testing(model)
  print(pred)
  train_losses.append(running_average)
  train_accuracies.append(np.mean(batch_acc))
  print('Train Accuracy: ', train_accuracies[-1])

  # for word in track_codes:
  #   sim_word = most_similar(word, model.layer_1.get_weights()[0])
  #   print( dict_words_new[word], ': ', sim_word)

  0%|          | 0/15587 [00:00<?, ?it/s]

Epoch:__0


100%|██████████| 15587/15587 [03:01<00:00, 86.03it/s]


TypeError: ignored

In [None]:
# Visualize accuracy and loss for training and test data. 
# One plot training and test loss.
# One plot training and test accuracy.
plt.figure()
line1, = plt.plot(train_losses)
line2, = plt.plot(test_losses)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.legend((line1,line2),("training","test"))
plt.show()

plt.figure()
line1, = plt.plot(train_accuracies)
line2, = plt.plot(test_accuracies)
plt.xlabel("Training steps")
plt.ylabel("Accuracy")
plt.show()