In [5]:
import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time

In [6]:
data = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [7]:
text = open(data).read()
# lets look at the text
print(text[:1000])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [8]:
# set-> Build an unordered collection of unique elements.
vocab = sorted(set(text))
print(vocab)
print('Number of unique chars: {}'.format(len(vocab)))

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Number of unique chars: 65


In [9]:
# creating a mapping from unique chars to index and vice versa
idx_from_char = {u:i for i,u in enumerate(vocab)}
char_from_idx = np.array(vocab)

text_as_int = np.array([idx_from_char[c] for c in text])

In [11]:
print('{} ------mapped--to-----> {}'.format(text[:13], text_as_int[:13]))

First Citizen ------mapped--to-----> [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [10]:
# Creating training samples
# the maximum length sentence we want for a single input in characters
seq_length = 100

# breaking the text into chunks of seq_lenght+1
chunks = tf.data.Dataset.from_tensor_slices(text_as_int).batch(seq_length+1, drop_remainder=True)

# text broken into chunks of seq_lenght+1 parts to create training and target data
for item in chunks.take(5):
    print(repr(''.join(char_from_idx[item.numpy()])))
    
# example: 
# chunk = 'Apple' (seq_lenght+1 = 5)
# training point = 'Appl'
# target point = 'pple'

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [12]:
def split_train_target(chunk):
    train = chunk[:-1]
    target = chunk[1:]
    return train, target

dataset = chunks.map(split_train_target)

In [13]:
for in_example, out_example in dataset.take(1):
    print('input string: ', repr(''.join(char_from_idx[in_example.numpy()])))
    print('target string: ', repr(''.join(char_from_idx[out_example.numpy()])))

input string:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
target string:  'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [14]:
for i,(in_idx, out_idx) in enumerate(zip(in_example[:5],out_example[:5])):
    print('Step {:2d}'.format(i))
    print('    input: {} ({:s})'.format(in_idx,char_from_idx[in_idx]))
    print('    expected out: {} ({:s})'.format(out_idx,char_from_idx[out_idx]))

Step  0
    input: 18 (F)
    expected out: 47 (i)
Step  1
    input: 47 (i)
    expected out: 56 (r)
Step  2
    input: 56 (r)
    expected out: 57 (s)
Step  3
    input: 57 (s)
    expected out: 58 (t)
Step  4
    input: 58 (t)
    expected out: 1 ( )


In [15]:
# Batch size 
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset_shuffled = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# text broken into chunks of seq_lenght+1 parts to create training and target data
# then dataset of 'in' and 'out' created
# now SHUFFLED
for in_example, out_example in dataset_shuffled.take(1):
    print('input dataset shape: ', in_example.shape)
    print('target dataset shape: ', out_example.shape)
    # This Shape will be the inpput and output shape of our Model

input dataset shape:  (64, 100)
target dataset shape:  (64, 100)


In [17]:
# The MODEL (using Functional API)
class Model(tf.keras.Model):
    def __init__(self,vocab_size, embedding_dim, units):
        super(Model, self).__init__()
        self.units = units
        self.embedding  = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.units,
                                       recurrent_activation='sigmoid',
                                       return_sequences=True,
                                       recurrent_initializer='glorot_uniform',
                                       stateful=True)
        self.fc = tf.keras.layers.Dense(vocab_size)
        
    def call(self, x):
        embedding = self.embedding(x)
        output = self.gru(embedding)
        prediction = self.fc(output)
        return prediction

vocab_size = len(vocab)   # (y=[vocab_size,1], Wya=[vocab_size,units])
embedding_dim = 256       # (x=[embedding_dim,1], Wax=[embedding_dim,units])
units = 1024              # units are the number of nodes in the kernel layer (a=[units,1] ,Waa=[units,units])

model = Model(vocab_size,embedding_dim,units)

# OR
#
# This approach requires you to enter the specifications of the 
# model (if there, like number of units in wach layer) with the 
# inputs when creating a model 
# 
# The above method can solve this problem by using a constructor to use the 
# specifications when creating an instance of the model, which later can be
# called with inputs to create the model. 
# 
# def Model_2(inputs,vocab_size,embedding_dim,units):
#     embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)
#     x = tf.keras.layers.GRU(units,
#                                  recurrent_activation='sigmoid',
#                                  return_sequences=True,
#                                  recurrent_initializer='glorot_uniform',
#                                  stateful=True)(embedding)
#     outputs = tf.keras.layers.Dense(vocab_size)(x)
#     model = tf.keras.Model(inputs=inputs, outputs=outputs)
#     return model

# model = Model_2(inputs,vocab_size,embedding_dim,units)

In [18]:
optimizer = tf.train.AdamOptimizer()

# using sparse_.. so that we do not nedd to convert to one-hot vectors
def loss_func(real,pred):
    return  tf.losses.sparse_softmax_cross_entropy(labels=real, logits=pred)

In [19]:
# The above method of using a SubClass to instantiate the Model
# require us to 'build' to show the shape of the input layer

model.build(tf.TensorShape([BATCH_SIZE, seq_length]))

In [26]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  16640     
_________________________________________________________________
gru_1 (GRU)                  multiple                  3935232   
_________________________________________________________________
dense_1 (Dense)              multiple                  66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


In [21]:
model.variables

[<tf.Variable 'embedding/embeddings:0' shape=(65, 256) dtype=float32, numpy=
 array([[ 0.00512834,  0.0436937 ,  0.02530426, ...,  0.04631868,
          0.04996283, -0.01745743],
        [ 0.0203251 ,  0.00948094, -0.0430881 , ...,  0.02432409,
         -0.0276683 ,  0.00305264],
        [ 0.02338693,  0.00400842, -0.01564399, ...,  0.01133979,
          0.03369172, -0.00371819],
        ...,
        [ 0.01701743,  0.03860707, -0.03241114, ...,  0.0488064 ,
          0.01927694, -0.0384011 ],
        [ 0.03603819, -0.01086976, -0.04505264, ...,  0.02887638,
         -0.0352305 ,  0.04337858],
        [-0.02075713, -0.02915847,  0.03976054, ...,  0.02994708,
          0.01739   ,  0.00872176]], dtype=float32)>,
 <tf.Variable 'gru/kernel:0' shape=(256, 3072) dtype=float32, numpy=
 array([[-0.02622047,  0.01068696, -0.02080663, ...,  0.02202471,
          0.0102331 , -0.03938706],
        [-0.00127871,  0.03858697,  0.03139468, ..., -0.04026121,
         -0.04024152,  0.02658733],
       

In [22]:
# Directory where the checkpoints will be saved
checkpoint_dir = 'C:/Users/Tarunbir Singh/Documents/Machine Learning/text_generation/training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir,'ckpt')
# Checkpoint instance
checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)

In [88]:
epochs = 30

for epoch in range(epochs):
    start = time.time()
    
    # initialising the hidden state at the start of every epoch
    # initial hidden state is None
    hidden = model.reset_states()
    
    for (batch, (inp,target)) in enumerate(dataset_shuffled):
        with tf.GradientTape() as tape:
            # feeding the hidden state back into the model
            predictions = model(inp)
            loss = loss_func(target, predictions)
            
        # gradients for back-propagation using Gradient Taping
        grads = tape.gradient(loss, model.variables)
        # applying the updation of variables using the calcuated grads
        optimizer.apply_gradients(zip(grads,model.variables))
        
        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch+1,batch,loss))
            
    # saving (checkpoint) the model every 5 epochs
    if (epoch+1)%5==0:
        checkpoint.save(file_prefix=checkpoint_prefix)

    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


IndentationError: unexpected indent (<ipython-input-88-a795d74e0f42>, line 4)

In [23]:
# model needs to train only once
# can be restored from the checkpoints created
model = Model(vocab_size, embedding_dim, units)

checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1,None]))

In [25]:
# Evaluation Step

# number of characters to generate
num_generate = 1000

# The Starting String input, 
# can be experimented
starting_string = 'f'

# convert to index our starting string
input_eval = [idx_from_char[i] for i in starting_string]
input_eval = tf.expand_dims(input_eval, 0)

# variable to store the generated text
text_generated = []

# Low temperatures results in more predictable text.
# Higher temperatures results in more surprising text.
# Experiment to find the best setting.
temperature = 1.0

# here batch size == 1
model.reset_states()
for i in range(num_generate):
    prediction = model(input_eval)
    # remove the batch dimensions (flatten)
    prediction = tf.squeeze(prediction, 0)
    
    # using a multinomial distribution to predict the word returned by the model
    prediction = prediction / temperature
    predicted_id = tf.multinomial(prediction, num_samples=1)[-1,0].numpy()
    
    # we pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id],0)
    
    text_generated.append(char_from_idx[predicted_id])
    
print(starting_string + ''.join(text_generated))
    

f you mistoking and she falls.

PAULINA:
There's comfort's bloody throne of it!
Twenty-challenges, where we were alive,
For John of Gaunt indeed, I will understand thee from so suffer in.
Where you we'll find you know
That you thing entle than a man of victory.
Mayac, leaving this?
Of love be done. Good morrow, fellow.

FLORIZALET:
The mailing of our other sights.
You are made, my liege,
The warrants of a woman's humour.

Vessenger:
There is a man-form load: you must strike?

Shepherd:
Ay, sir.

QUEEN ELIZABETH:
Ay, What work he was?
Hark you, that Margaret's battle are abaded,
For valiant Clifford, with his state, our justice comes to fight:
And I have noned from your feeding, become my saying,
HADY ANNE:
And, sir, he for his mean: madam, safe, that is not so remedy.

FRIARNLLO:
Why with thy cogonax,
From my remembrance would have blush up my thoughty af her head, I swear,
But one that are near youd?

Second Keeper:
Ind brought ut ill:
And every cager, therein flosts do wrink so unpro