In [41]:
import numpy as np
import random

In [36]:
# Data Preprocessing and creating dictionary  

data = open('dinos.txt','r').read().lower()
chars = list(set(data))
data_size,char_size = len(data),len(chars)
print("The size of total Chars is %d and size of unique is %d " %(data_size,char_size))
char_to_ix = {ch:i for i,ch in enumerate(sorted(chars))}
ix_to_char = {i:ch for i,ch in enumerate(sorted(chars))}

The size of total Chars is 19909 and size of unique is 27 


RNN have 2 problems essentially 
- Exploding parameters/gradient
- Vanishing gradient 

The exploding gradient problem can be solved by using a clipping technique whereas the vanishing gradient problem is much complex, for it requires us to remember and to do that I will further explore Memory cell based ideas with models such as GRU and LSTM which are more recommended than the basic RNN cell built created and trained here. 

Let us create the Gradient clipping function to handle the exploding gradient problem. 

In [44]:
# Gradient Clipping 

def clip(gradients,maxvalue):
    dWaa,dWax,dWya,db,dby = gradients['dWaa'],gradients['dWax'],gradients['dWya'],gradients['db'],gradients['dby']
    for gradient in [dWaa,dWax,dWya,db,dby]:
        gradient = np.clip(gradient,-maxvalue,maxvalue,out=gradient)
        
    gradients = {'dWax':dWax,'dWaa':dWaa,'dWya':dWya,'db':db,'dby':dby}
    
    return gradients

In [60]:
np.random.seed(3)
dWax = np.random.randn(5,3)*10
dWaa = np.random.randn(5,5)*10
dWya = np.random.randn(2,5)*10
db = np.random.randn(5,1)*10
dby = np.random.randn(2,1)*10
gradients = {"dWax": dWax, "dWaa": dWaa, "dWya": dWya, "db": db, "dby": dby}
gradients = clip(gradients,10)
print("testing value : "+str(gradients['dWax'][0][1]))

testing value : 4.36509850512


In [62]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

### Sampling 

The other key thing to do what to do after your model is trained. We would like to generate some cool results and outputs based on a sample. This can be done via a feed forward model, in which the one sample entered is further passed on. 


https://img-blog.csdn.net/20180206070620233?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvbGJmNDYxNg==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast



In [114]:
# GRADED FUNCTION: sample

def sample(parameters, char_to_ix, seed):

    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    x = np.zeros((vocab_size,1))
    a_prev = np.zeros((n_a,1))
    
    # Create an empty list of indices, this is the list which will contain the list of indices of the characters to generate (≈1 line)
    indices = []
    
    # Idx is a flag to detect a newline character, we initialize it to -1
    idx = -1 
    
    # Loop over time-steps t. At each time-step, sample a character from a probability distribution and append 
    # its index to "indices". We'll stop if we reach 50 characters (which should be very unlikely with a well 
    # trained model), which helps debugging and prevents entering an infinite loop. 
    counter = 0
    newline_character = char_to_ix['\n']
    
    while (idx != newline_character and counter != 50):
        
        # Step 2: Forward propagate x using the equations (1), (2) and (3)
        a = np.tanh(np.dot(Wax,x) + np.dot(Waa,a_prev) + b)
        z = np.dot(Wya,a) + by
        y = softmax(z)
        # for grading purposes
        np.random.seed(counter+seed) 
        
        # Step 3: Sample the index of a character within the vocabulary from the probability distribution y
        idx = np.random.choice(list(range(vocab_size)), p = y.ravel())
        # Append the index to "indices"
        indices.append(idx)
        
        # Step 4: Overwrite the input character as the one corresponding to the sampled index.
        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        
        # Update "a_prev" to be "a"
        a_prev = a
        
        # for grading purposes
        seed += 1
        counter +=1
        
    ### END CODE HERE ###

    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    return indices

In [115]:
np.random.seed(3)
_, n_a = 20, 100
Wax, Waa, Wya = np.random.randn(n_a, char_size), np.random.randn(n_a, n_a), np.random.randn(char_size, n_a)
b, by = np.random.randn(n_a, 1), np.random.randn(char_size, 1)
parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}

In [116]:
indices = sample(parameters, char_to_ix,0)
print("Sampling:")
print("list of sampled indices:", indices)
print("list of sampled characters:", [ix_to_char[i] for i in indices])

Sampling:
list of sampled indices: [10, 7, 14, 26, 19, 10, 0]
list of sampled characters: ['j', 'g', 'n', 'z', 's', 'j', '\n']


In [150]:
a = np.array([[1,2,3,4],[2,2,3,4]])
b = a.shape[0]
a

array([[1, 2, 3, 4],
       [2, 2, 3, 4]])

In [153]:
a[:,2:4]

array([[3, 4],
       [3, 4]])