In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import numpy as np
import random
import re
from collections import Counter 
from tqdm import tqdm
import sklearn
from sklearn.metrics import pairwise_distances

In [2]:
datatf = tfds.load('tiny_shakespeare')

In [3]:
shakes_train, shakes_test = datatf['train'], datatf['test']
shakes_train = shakes_train.map(lambda t: t['text'])
# shakes_train = shakes_train.map(lambda t: t.numpy())

In [4]:
id_counter = 1
dict_codes = {}
dict_words = {}
dict_counts = {}
word_counts = 0

In [5]:
data =[]
for c, i in enumerate(shakes_train):
  
  text = i.numpy()
  text = str(text).strip('b"')
  text = text.lower()

  text = text.replace("\\n", " ")
  text = text.replace("  ", " ")
  text = re.sub('[^a-z0-9 ]+', '', text)
  text = text.split(' ')
  print(text)
  data = data + text
  for word in text:
    word_counts += 1
    if word in dict_codes:
      dict_counts[word] += 1
    else:
      dict_codes[word] = id_counter
      dict_counts[word] = 1
      dict_words[id_counter] = word
      id_counter += 1
  break
print(data)



In [6]:
print(len(data))

182504


In [7]:
len(dict_counts)

12073

In [8]:
k = Counter(dict_counts)
top_words = k.most_common(10000)


In [9]:
print(top_words)



In [10]:
dict_codes_new = {}
dict_words_new = {}

for word, _ in top_words:
  dict_codes_new[word] = dict_codes[word]
  dict_words_new[dict_codes[word]] = word

In [11]:
keys = list(dict_codes_new.keys())
coded_data = [dict_codes_new[x] if x in keys else 0 for x in data]

In [12]:
word = []
context = []

for i in range(len(coded_data[:-4])):
  for m in range(1,5):
    # print(m)
    word.append(coded_data[i])
    word.append(coded_data[i+m])
    context.append(coded_data[i+m])
    context.append(coded_data[i])

In [13]:
ds_x = tf.data.Dataset.from_tensor_slices(word)
ds_y = tf.data.Dataset.from_tensor_slices(context)
ds_x = ds_x.map(lambda te: tf.one_hot(te, 10000))
ds_x = ds_x.shuffle(2).batch(64)
ds_y = ds_y.map(lambda te: tf.one_hot(te, 10000))
ds_y = ds_y.shuffle(2).batch(64)

ds = tf.data.Dataset.zip((ds_x,ds_y))
# ds = ds.map(lambda t : print(t))

In [14]:
from keras import Model
class Skip_Gram(Model): 
  def __init__(self):
    super(Skip_Gram, self).__init__()

    self.layer_1 = keras.layers.Dense(64, 'relu', use_bias=False)
    self.layer_2 = keras.layers.Dense(10000, 'softmax', use_bias=False)
    
  def call(self, x):
    emb = self.layer_1(x)
    out = self.layer_2(emb)

    return out

In [15]:
# We use a dynamic learning rate which decays exponantially
# As an optimiser we use adam

lr = tf.keras.optimizers.schedules.ExponentialDecay(0.001, 
                                                    5000, 
                                                    0.96,
                                                    staircase=True)
opt = tf.optimizers.Adam(lr)

In [16]:
# Training method returns mean loss and mean accuracy of the batch

def train(model, input, target, loss_f, optimizer): 
  with tf.GradientTape() as tape: 
    prediction = model(input)
    loss = loss_f(target, prediction)
    gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  acc = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)


  return np.mean(loss.numpy()), np.mean(acc)


# Test method takes in whole test dataset and returns mean loss and mean accuracy on the whole test data
def test(model, test_data, loss_f): 
  test_acc = []
  test_loss = []

  for (input, target) in test_data: 
    prediction = model(input, training=False)
    loss = loss_f(target, prediction)
    loss = np.mean(loss.numpy())
    acc = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
    test_loss.append(loss)
    test_acc.append(np.mean(acc))
  

  fin_loss = np.mean(np.array(test_loss))
  fin_acc = np.mean(test_acc)

  return fin_loss, fin_acc

In [17]:
# Define additional hyperparameters

# Loss is categorical crossentropy
# The model will train for 30 epochs

tf.keras.backend.clear_session()

num_epochs = 10
learning_rate = lr
running_average_factor = 0.95


cross_entropy_loss = tf.keras.losses.categorical_crossentropy

optimizer = opt

train_losses = []
train_accuracies = []
track_words = ['queen', 'throne', 'wine', 'poison', 'love', 'strong', 'day']
track_codes = [dict_codes_new[word] for word in track_words]
#track_vectors = [tf.one_hot(word, 10000) for word in track_codes]

In [36]:
def most_similar(code, emb):
  csf = keras.losses.CosineSimilarity()
  emb_word = emb[code]

  new_m = np.concatenate([emb, emb_word[None,:]], axis=0)
  distance_matrix = sklearn.metrics.pairwise_distances(new_m, metric="cosine")
  distances = distance_matrix[-1,:-1]

  inx = np.argsort(distances)

  return dict_words_new[inx[1]]


In [37]:
for word in track_codes: 
  sim = most_similar(word, model.layer_1.get_weights()[0])
  print(sim)

elizabeth
sink
yorks
enrolld
appetite
churchmen
time


In [None]:
# csf = keras.losses.CosineSimilarity()
# csf(tf.constant([1,0]), tf.constant([1,0]))

In [21]:
model = Skip_Gram()
model.layer_1.get_weights()

[]

In [38]:
model = Skip_Gram()

# Custom training loop
# Each epoch the model will learn on the shuffled and batched training data and will then evaluate the training step on the whole test dataset

for epoch in range(num_epochs):
  print('Epoch:__' + str(epoch))

  tr_ds = ds.shuffle(buffer_size=128).prefetch(2)
  # te_ds = te_ds.shuffle(buffer_size=128).prefetch(2)


  running_average = 0
  batch_acc = []
  for (input, target) in tqdm(ds): 
    train_l, train_acc = train(model, input, target, cross_entropy_loss, optimizer)
    running_average = (running_average_factor * running_average) + (1 - running_average_factor) * train_l
    batch_acc.append(train_acc)

  train_losses.append(running_average)
  train_accuracies.append(np.mean(batch_acc))
  print('Train Accuracy: ', train_accuracies[-1])

  for word in track_codes:
    sim_word = most_similar(word, model.layer_1.get_weights()[0])
    print( dict_words_new[word], ': ', sim_word)

  0%|          | 6/22813 [00:00<06:41, 56.81it/s]

Epoch:__0


100%|██████████| 22813/22813 [05:08<00:00, 73.92it/s]


Train Accuracy:  0.11186398654276071
queen :  york
throne :  bears
wine :  skyaspiring
poison :  darling
love :  faint
strong :  idolatry


  0%|          | 6/22813 [00:00<06:27, 58.90it/s]

day :  this
Epoch:__1


100%|██████████| 22813/22813 [05:11<00:00, 73.31it/s]


Train Accuracy:  0.15246515144873538
queen :  margaret
throne :  plantagenet
wine :  bestrides
poison :  ifs
love :  oak
strong :  swear


  0%|          | 7/22813 [00:00<05:45, 66.00it/s]

day :  contrary
Epoch:__2


100%|██████████| 22813/22813 [05:07<00:00, 74.11it/s]


Train Accuracy:  0.16520391333888573
queen :  margaret
throne :  york
wine :  howsoever
poison :  dancing
love :  far
strong :  swear


  0%|          | 7/22813 [00:00<05:37, 67.53it/s]

day :  contrary
Epoch:__3


100%|██████████| 22813/22813 [05:06<00:00, 74.35it/s]


Train Accuracy:  0.17049352342962346
queen :  margaret
throne :  york
wine :  howsoever
poison :  same
love :  leg
strong :  bold


  0%|          | 7/22813 [00:00<05:34, 68.11it/s]

day :  iv
Epoch:__4


100%|██████████| 22813/22813 [05:07<00:00, 74.27it/s]


Train Accuracy:  0.17322360057861746
queen :  margaret
throne :  york
wine :  croppd
poison :  same
love :  leg
strong :  humane


  0%|          | 6/22813 [00:00<07:02, 53.95it/s]

day :  contrary
Epoch:__5


100%|██████████| 22813/22813 [05:07<00:00, 74.24it/s]


Train Accuracy:  0.17559889098321133
queen :  margaret
throne :  iv
wine :  sage
poison :  grains
love :  far
strong :  humane


  0%|          | 6/22813 [00:00<06:40, 56.90it/s]

day :  contrary
Epoch:__6


100%|██████████| 22813/22813 [05:08<00:00, 73.83it/s]


Train Accuracy:  0.17744405602068997
queen :  margaret
throne :  iv
wine :  archbishop
poison :  same
love :  leg
strong :  powerful


  0%|          | 7/22813 [00:00<05:57, 63.71it/s]

day :  contrary
Epoch:__7


100%|██████████| 22813/22813 [05:07<00:00, 74.18it/s]


Train Accuracy:  0.17954674966028142
queen :  margaret
throne :  iv
wine :  leather
poison :  same
love :  leg
strong :  humane


  0%|          | 7/22813 [00:00<05:44, 66.12it/s]

day :  mockery
Epoch:__8


100%|██████████| 22813/22813 [05:05<00:00, 74.74it/s]


Train Accuracy:  0.18185902774733703
queen :  margaret
throne :  iv
wine :  leather
poison :  grains
love :  leg
strong :  minutes


  0%|          | 7/22813 [00:00<05:46, 65.87it/s]

day :  division
Epoch:__9


100%|██████████| 22813/22813 [05:05<00:00, 74.60it/s]


Train Accuracy:  0.18244120676807082
queen :  margaret
throne :  iv
wine :  leather
poison :  grains
love :  leg
strong :  minutes
day :  division


In [None]:
  for word in track_codes:
    sim_word = most_similar(word, model.layer_1.get_weights()[0])
    print( word, ': ', sim_word)

In [None]:
emb = model.layer_1.get_weights()[0]
tf.shape(emb)

In [None]:
# Visualize accuracy and loss for training and test data. 
# One plot training and test loss.
# One plot training and test accuracy.
plt.figure()
line1, = plt.plot(train_losses)
line2, = plt.plot(test_losses)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.legend((line1,line2),("training","test"))
plt.show()

plt.figure()
line1, = plt.plot(train_accuracies)
line2, = plt.plot(test_accuracies)
plt.xlabel("Training steps")
plt.ylabel("Accuracy")
plt.show()