# **IDL Assignment 5 - Language Modeling & Recurrent Neural Networks**

## **Assigning Tensorflow version and importing the libraries required for the tasks**


In [2]:
%tensorflow_version 2.x

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os

tf.__version__

  import pandas.util.testing as tm


'2.2.0'

In [None]:
os.getcwd()
os.chdir("/content/drive/My Drive/Colab Notebooks/IDL /IDL Assignments/Assignment helper files") 


## Preprocess the text data

In [4]:
!python prepare_data.py shakespeare_input.txt skp

2020-05-22 14:45:22.652495: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
Split input into 22981 sequences...
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences...
Serialized 2000 sequences...
Serialized 2100 sequences...
Serialized 2200 sequences...
Serialized 2300 sequences...
Serialized 2400 sequences...
Serialized 2500 sequences...
Serialized 2600 sequences...
Serialized 2700 sequences...
Serialized 2800 sequences...
Serialized 2900

**total serialized seq is 22981**

**Loading the data from skp.tfrecords and skp_vocab**

The files mentioned are the output obtained after running the program *prepare_data.py* for the Shakespeare data. These files are loaded as data using tf.data and create a vocabulary dictionary 

**Note:** The vocab contains elements as dict with (key,val) as (character, index). Reverse mapping is done and stored as ind_to_ch which has (key,val) as (index,character)

In [5]:
from prepare_data import parse_seq
import pickle

# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("skp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x, 200))

# a map from characters to indices
vocab = pickle.load(open("skp_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab)
print(vocab_size)

print("Indices to char")
print(ind_to_ch)

{'-': 1, '.': 2, 'u': 3, 'o': 4, 'Z': 5, 'U': 6, 'i': 7, ';': 8, '[': 9, 'y': 10, '?': 11, 'n': 12, 'z': 13, ',': 14, 'S': 15, 't': 16, 'N': 17, ':': 18, 'R': 19, '$': 20, 'e': 21, 'J': 22, 'q': 23, 'L': 24, 'l': 25, 'W': 26, 'w': 27, 'F': 28, 'V': 29, 'E': 30, 'Q': 31, 'I': 32, '!': 33, 'G': 34, 'a': 35, 'd': 36, 'v': 37, 'b': 38, 'O': 39, 'K': 40, '3': 41, 'T': 42, 'm': 43, 'Y': 44, ']': 45, ' ': 46, 'g': 47, 'r': 48, 'P': 49, 'k': 50, "'": 51, 'H': 52, 'A': 53, 'C': 54, 'B': 55, 'x': 56, 'p': 57, 'D': 58, 'h': 59, '&': 60, 'M': 61, 'X': 62, '\n': 63, 'f': 64, 'c': 65, 'j': 66, 's': 67, '<S>': 0}
68
Indices to char
{1: '-', 2: '.', 3: 'u', 4: 'o', 5: 'Z', 6: 'U', 7: 'i', 8: ';', 9: '[', 10: 'y', 11: '?', 12: 'n', 13: 'z', 14: ',', 15: 'S', 16: 't', 17: 'N', 18: ':', 19: 'R', 20: '$', 21: 'e', 22: 'J', 23: 'q', 24: 'L', 25: 'l', 26: 'W', 27: 'w', 28: 'F', 29: 'V', 30: 'E', 31: 'Q', 32: 'I', 33: '!', 34: 'G', 35: 'a', 36: 'd', 37: 'v', 38: 'b', 39: 'O', 40: 'K', 41: '3', 42: 'T', 43: '

In [6]:
data

<MapDataset shapes: (200,), types: tf.int32>

In [None]:
## Declare the sizes of batch, shuffle and repeat

SHUFFLE_SIZE = 1000
BATCH_SIZE = 128
REPEAT_TIMES = 20

In [None]:
def batch_shuffle_repeat(data):


    data = data.shuffle(SHUFFLE_SIZE)
    data = data.padded_batch(BATCH_SIZE, padded_shapes=None,drop_remainder=False)   
    data = data.repeat(REPEAT_TIMES)


    return data

In [None]:
dataset = batch_shuffle_repeat(data)


In [10]:
for x in dataset.take(1):
  print(tf.shape(x))
  print(type(x))
  print(repr(tf.shape(x)))
  print(x)

tf.Tensor([128 200], shape=(2,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([128, 200], dtype=int32)>
tf.Tensor(
[[ 0 26 59 ... 67 16 46]
 [ 0 16 46 ... 48 35 16]
 [ 0 21 46 ...  7 67 46]
 ...
 [ 0 25  7 ... 16 46 67]
 [ 0 63 32 ... 46 16 48]
 [ 0 46 59 ... 67 16  4]], shape=(128, 200), dtype=int32)


In [None]:
n_h = 512

## w_xh is input to hidden weight --> known as U from the literature
## w_hh is hidden to hidden weights --> known as W from the literature
## w_ho is hidden to output weights --> known as V from the literature
## b_h and b_o are the biases at the hidden layer and output layer


w_xh = tf.Variable(tf.initializers.glorot_uniform()([vocab_size,n_h]))

w_hh = tf.Variable(tf.initializers.glorot_uniform()([n_h,n_h]))
b_h = tf.Variable(tf.zeros([n_h]))

w_ho = tf.Variable(tf.initializers.glorot_uniform()([n_h,vocab_size]))
b_o = tf.Variable(tf.zeros([vocab_size]))

variables = [w_xh,w_hh,b_h,w_ho,b_o]

In [None]:
opt = tf.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)


@tf.function
def rnn_sequence(batch_data):
    with tf.GradientTape() as tape:
        h_t = tf.zeros([tf.shape(batch_data)[0],n_h])
        loss = tf.TensorArray(tf.float32,size=tf.shape(batch_data)[1]-1)

        for timestep in tf.range(tf.shape(batch_data)[1]-1):
            x_t = tf.one_hot(batch_data[:,timestep],vocab_size)
            h_t = tf.nn.tanh(tf.matmul(x_t,w_xh) + tf.matmul(h_t,w_hh) + b_h)
            logits = tf.matmul(h_t,w_ho) + b_o

            local_loss = loss_fn(batch_data[:,timestep+1],logits)

            loss = loss.write(timestep, local_loss)
        loss = loss.stack()

        batch_loss = tf.reduce_mean(loss)
        
    
    grads = tape.gradient(batch_loss, variables)
    opt.apply_gradients(zip(grads, variables))

    return batch_loss


In [None]:
##### DONT RUN THIS BLOCK .. THIS IS JUST FOR TRIAL PURPOSES


###############################################################################################################################################
opt = tf.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

"""
        tf.print(tf.shape(batch_data))
        tf.print(type(batch_data))
        tf.print(repr(tf.shape(batch_data)))
        tf.print(batch_data)

        for i in tf.range(tf.shape(batch_data)[1]-1):
          tf.print(i)
"""



@tf.function
def rnn_try(batch_data):
    with tf.GradientTape() as tape:
        h_t = tf.zeros([tf.shape(batch_data)[0],n_h])
        loss = tf.TensorArray(tf.float32,size=tf.shape(batch_data)[1]-1)
        

        for timestep in tf.range(tf.shape(batch_data)[1]-1):
            tf.print("="*100)
            tf.print(timestep)

            x_t = tf.one_hot(batch_data[:,timestep],vocab_size)
            tf.print(tf.shape(x_t))
            tf.print(type(x_t))
            tf.print(repr(tf.shape(x_t)))



            h_t = tf.nn.tanh(tf.matmul(x_t,w_xh) + tf.matmul(h_t,w_hh) + b_h)
            tf.print(tf.shape(h_t))
            tf.print(type(h_t))
            tf.print(repr(tf.shape(h_t)))


            logits = tf.matmul(h_t,w_ho) + b_o
            tf.print(tf.shape(logits))
            tf.print(type(logits))
            tf.print(repr(tf.shape(logits)))


            local_loss = loss_fn(batch_data[:,timestep+1],logits)
            tf.print(tf.shape(local_loss))
            tf.print(type(local_loss))
            tf.print(repr(tf.shape(local_loss)))


            loss = loss.write(timestep, local_loss)

        loss = loss.stack()
        tf.print(tf.shape(loss))
        tf.print(type(loss))
        tf.print(repr(tf.shape(loss)))





In [13]:
import time

start = time.time()


batch_nr = 0
for batch_data in dataset:
      batch_start = time.time()
      batch_nr = batch_nr+1
      batch_loss = rnn_sequence(batch_data)
      batch_stop = time.time()
#      rnn_try(batch_data)
      print("Batch Number: {} Loss: {} Time taken: {}".format(batch_nr,batch_loss,batch_stop-batch_start))
#      if not steps % 100:
#          train_acc_metric(lbl_batch, logits)
#          acc = train_acc_metric.result()
#          print("Loss: {} Accuracy: {}".format(loss, acc))
#          train_acc_metric.reset_states()

stop = time.time()
print("took {} seconds\n".format(stop-start))

Batch Number: 1 Loss: 4.268254280090332 Time taken: 3.021419048309326
Batch Number: 2 Loss: 4.130809307098389 Time taken: 0.3425624370574951
Batch Number: 3 Loss: 3.7822329998016357 Time taken: 0.3167083263397217
Batch Number: 4 Loss: 3.567951202392578 Time taken: 0.32003140449523926
Batch Number: 5 Loss: 3.523487091064453 Time taken: 0.32482409477233887
Batch Number: 6 Loss: 3.432318687438965 Time taken: 0.3274343013763428
Batch Number: 7 Loss: 3.3816611766815186 Time taken: 0.3256051540374756
Batch Number: 8 Loss: 3.381779432296753 Time taken: 0.32646989822387695
Batch Number: 9 Loss: 3.3538711071014404 Time taken: 0.3188135623931885
Batch Number: 10 Loss: 3.3600902557373047 Time taken: 0.32323265075683594
Batch Number: 11 Loss: 3.341378927230835 Time taken: 0.32181215286254883
Batch Number: 12 Loss: 3.309678554534912 Time taken: 0.31246161460876465
Batch Number: 13 Loss: 3.2677252292633057 Time taken: 0.32370471954345703
Batch Number: 14 Loss: 3.300065040588379 Time taken: 0.3307433

In [None]:
### DONT RUN THIS BLOCK



for dat in dataset.take(1):
  h_t = tf.zeros([tf.shape(dat)[0],n_h])
  for time_step in tf.range(tf.shape(dat)[1]-1):
    data = tf.one_hot(dat[:,time_step], vocab_size)
    #print("----------------------")
    #print(data[0,:])

    a = (tf.matmul(data,w_xh))+ (tf.matmul(h_t, w_hh)) + b_h
    #print(tf.shape(a))
    h_t = tf.nn.tanh(a)
    logits = (tf.matmul(h_t, w_ho)) + b_o
    logits = tf.convert_to_tensor(logits)
    output = tf.nn.softmax(logits)
    #print((output[0]))

sampled_indices = tf.random.categorical(output, num_samples= 1)
print(sampled_indices)
#ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}
#ind_to_ch(sampled_indices)
gen = map(sampled_indices, ind_to_ch)
print(list(gen))

In [16]:
characters = [0,5,6]
print(characters)

print(characters[-1:])

[0, 5, 6]
[6]


In [24]:

characters_to_generate = 2500

h_pred = tf.zeros([1,n_h])

character = [0]

text = []
for current_character in range(characters_to_generate):
    x_pred = tf.one_hot(character[-1:],vocab_size)
    h_pred = tf.nn.tanh(tf.matmul(x_pred,w_xh) + tf.matmul(h_pred,w_hh) + b_h)
    logits = tf.matmul(h_pred,w_ho) + b_o

    preds = tf.nn.softmax(logits)
    preds = preds.numpy()[0]
    choice = np.random.choice(vocab_size, p = preds)
    #print(preds.shape)
    #predictions = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    #print(predictions)
    character.append(choice)
    start_char = ind_to_ch[choice]
    #print(input_eval)
    text.append(start_char)


#print(text)  
print(''.join(text))

this accy and not is he go
did spin and blowd you, and matoce the sunving Turth's forsers;
Ween, we she set hus awh be codved alaush.

CUSTIL:
I say find away from I would you thought you cume,
The prentable sufter of up you?

istapp'd is, she reled both, for a rate more in his headt,
Which, be a valiant I duly ewelton to him? I Franch
And bare that leave the retlabied to my sun
This rause a greet caper to make thy wars in trilld I before discilen,
The ispecient crieve what he be a kingly.

AUS:
Cret'l I ming it, on Wand here
Ap courtious dain.
A servent of men he wall be eved extect is hend, within the ceptardont. Thy trausation,
If I maulted before thee, lieces,
Then all then you their mideh courtest.

PoIN
AS:
Dround I besuting and a bestution of my fiese Who own diemned:
Why, and come thy hawning to fortune love;
Thou wilt, my I'll from things, you shouse.

VIOLA:
Nay, there's fallfore thee, Palabur in the formones hear:
O, for the unthated in this enes.

BUSTIA:
O, this ho! who ch

#### **Using np.random.choice**

In [49]:
num_generate = 100
#from prepare_data import chs_to_inds

#start_char = '<S>'
#input_eval = chs_to_inds(start_char,vocab)
#print(input_eval)
#input_eval = tf.one_hot(input_eval, vocab_size)
#print(input_eval)

text_generated = []
#temp = 1.5
h_t = tf.zeros([1,n_h])
choice = [0]

for i in range(num_generate):
  x_t = tf.one_hot(choice[-1:],depth = vocab_size)
  a = (tf.matmul(x_t,w_xh))+ (tf.matmul(h_t, w_hh)) + b_h
  #print(type(a))
  h_t = tf.nn.tanh(a)
  logits = (tf.matmul(h_t, w_ho)) + b_o
  #print(logits)
  predictions = tf.nn.softmax(logits)
  #predictions = predictions[0,:]
  # predictions = tf.squeeze(predictions, 0)
  #print("----------------------"*3)
  #print(predictions.shape)
  #print(predictions)
  #print(predictions.numpy().shape)
  #predictions = predictions / temp

  ## The predictions is a tensor 
  predictions = predictions.numpy()[0]
  #predictions = np.array(predictions)
  #print(predictions.shape)
  #print(predictions)
  char_choice = np.random.choice(vocab_size, p = predictions)
  #print(char_choice)
  choice.append(char_choice)
  #predictions = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
  #print(predictions)
  start_char = ind_to_ch[char_choice]
  #print(start_char)
  #print(input_eval)
  text_generated.append(start_char)
  
print(start_char + ''.join(text_generated))

lchess
Some nent
break an ey;
Yor's quest of England, thyoria?

VIANA:
To, now he or fill I dame:
I'l


#### **Using np.argmax**

In [39]:
num_generate = 500
#from prepare_data import chs_to_inds

#start_char = '<S>'
#input_eval = chs_to_inds(start_char,vocab)
#print(input_eval)
#input_eval = tf.one_hot(input_eval, vocab_size)
#print(input_eval)

text_generated = []
#temp = 1.5
h_t = tf.zeros([1,n_h])
choice = [0]

for i in range(num_generate):
  x_t = tf.one_hot(choice[-1:],depth = vocab_size)
  a = (tf.matmul(x_t,w_xh))+ (tf.matmul(h_t, w_hh)) + b_h
  #print(type(a))
  h_t = tf.nn.tanh(a)
  logits = (tf.matmul(h_t, w_ho)) + b_o
  #print(logits)
  predictions = tf.nn.softmax(logits)
  #predictions = predictions[0,:]
  # predictions = tf.squeeze(predictions, 0)
  #print("----------------------"*3)
  #print(predictions.shape)
  #print(predictions.numpy().shape)
  #predictions = predictions / temp
  predictions = predictions.numpy()[0]
  #print(predictions.shape)
  #print(predictions)
  char_choice = np.argmax(predictions)
  #print(char_choice)
  #char_choice = np.random.choice(vocab_size, p = predictions)
  #print(char_choice)
  choice.append(char_choice)
  #predictions = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
  #print(predictions)
  start_char = ind_to_ch[char_choice]
  #print(start_char)
  #print(input_eval)
  text_generated.append(start_char)
  
print(start_char + ''.join(text_generated))

s the best of the country shall be so much and my lord,
The sense of my lord, and the common the country shall be so much and my lord,
The sense of my lord, and the common the country shall be so much and my lord,
The sense of my lord, and the common the country shall be so much and my lord,
The sense of my lord, and the common the country shall be so much and my lord,
The sense of my lord, and the common the country shall be so much and my lord,
The sense of my lord, and the common the country s
