In [103]:
import numpy as np
from utils import *
import copy

In [104]:
data = open("./dinos.txt").read()
data = data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f"There are {data_size} total characters and {vocab_size} unique characters!")

There are 19909 total characters and 27 unique characters!


In [105]:
chars = sorted(chars)
print(chars)

['\n', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [106]:
char_to_ix = {char:ix for ix, char in enumerate(chars)}
ix_to_char = {ix:char for ix, char in enumerate(chars)}

In [107]:
ix_to_char

{0: '\n',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z'}

In [108]:
def clip(gradients, maxValue):
    '''
    Clips the gradients' values between minimum and maximum.
    
    Arguments:
    gradients -- a dictionary containing the gradients "dWaa", "dWax", "dWya", "db", "dby"
    maxValue -- everything above this number is set to this number, and everything less than -maxValue is set to -maxValue
    
    Returns: 
    gradients -- a dictionary with the clipped gradients.
    '''
    gradients = copy.deepcopy(gradients)
    dWaa, dWax, dWya, db, dby = gradients["dWaa"], gradients["dWax"], gradients["dWya"], gradients["db"], gradients["dby"]
    for gradient in gradients:
        np.clip(gradients[gradient], -maxValue, maxValue, gradients[gradient])
    
    return gradients

In [109]:
def sample(parameters, char_to_ix):
    """
    Sample a sequence of characters according to a sequence of probability distributions output of the RNN

    Arguments:
    parameters -- Python dictionary containing the parameters Waa, Wax, Wya, by, and b. 
    char_to_ix -- Python dictionary mapping each character to an index.

    Returns:
    indices -- A list of length n containing the indices of the sampled characters.
    """
    Waa, Wax, Wya, by, b = parameters["Waa"], parameters["Wax"], parameters["Wya"], parameters["by"], parameters["b"]
    a_t = np.zeros((Waa.shape[1], 1))
    x_t = np.zeros((Wax.shape[1], 1))
    indices = []
    a_t1 = np.tanh(np.matmul(Waa, a_t)+np.matmul(Wax, x_t) + b)
    z_t1 = np.matmul(Wya, a_t1) + by
    y_t = softmax(z_t1).reshape(-1,)
    choice = np.random.choice(list(char_to_ix.values()), p=y_t)
    indices.append(choice)
    while (choice != char_to_ix["\n"]):
        a_t = a_t1
        x_t = np.zeros_like(x_t)
        x_t[choice] = 1
        a_t1 = np.tanh(np.matmul(Waa, a_t)+np.matmul(Wax, x_t) + b)
        z_t1 = np.matmul(Wya, a_t1) + by
        y_t = softmax(z_t1).reshape(-1,)
        choice = np.random.choice(list(char_to_ix.values()), p=y_t)
        indices.append(choice)  
    return indices

In [110]:
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    """
    Execute one step of the optimization to train the model.
    
    Arguments:
    X -- list of integers, where each integer is a number that maps to a character in the vocabulary.
    Y -- list of integers, exactly the same as X but shifted one index to the left.
    a_prev -- previous hidden state.
    parameters -- python dictionary containing:
                        Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x)
                        Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a)
                        Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        b --  Bias, numpy array of shape (n_a, 1)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
    learning_rate -- learning rate for the model.
    
    Returns:
    loss -- value of the loss function (cross-entropy)
    gradients -- python dictionary containing:
                        dWax -- Gradients of input-to-hidden weights, of shape (n_a, n_x)
                        dWaa -- Gradients of hidden-to-hidden weights, of shape (n_a, n_a)
                        dWya -- Gradients of hidden-to-output weights, of shape (n_y, n_a)
                        db -- Gradients of bias vector, of shape (n_a, 1)
                        dby -- Gradients of output bias vector, of shape (n_y, 1)
    a[len(X)-1] -- the last hidden state, of shape (n_a, 1)
    """
    loss, cache = rnn_forward(X, Y, a_prev, parameters)
    gradients, a = rnn_backward(X, Y, parameters, cache)
    gradients = clip(gradients, 5)
    parameters = update_parameters(parameters, gradients, learning_rate)
    return loss, gradients, a[len(X)-1]

In [111]:
def model(data_x, ix_to_char, char_to_ix, num_iterations = 35000, n_a = 50, dino_names = 7, vocab_size = 27):
    """
    Trains the model and generates dinosaur names. 
    
    Arguments:
    data_x -- text corpus, divided in words
    ix_to_char -- dictionary that maps the index to a character
    char_to_ix -- dictionary that maps a character to an index
    num_iterations -- number of iterations to train the model for
    n_a -- number of units of the RNN cell
    dino_names -- number of dinosaur names you want to sample at each iteration. 
    vocab_size -- number of unique characters found in the text (size of the vocabulary)
    
    Returns:
    parameters -- learned parameters
    """
    np.random.shuffle(data_x)
    n_x, n_y = vocab_size, vocab_size
    parameters = initialize_parameters(n_a, n_x, n_y)
    a_prev = np.zeros((n_a, 1))
    last_name = 'abc'
    for i in range(num_iterations):
        idx = i%len(data_x)
        X =[char_to_ix[c] for c  in data_x[idx]]
        Y = X[1:]
        Y.append(char_to_ix["\n"])
        loss, gradients, a = optimize(X, Y, a_prev, parameters)
        if (i%1000==0):
            for name in range(dino_names):
                sampled_indices = sample(parameters, char_to_ix)
                sampled_name = ''.join([ix_to_char[k] for k in sampled_indices])
                last_name = sampled_name
                print(sampled_name.replace("\n", ""))
            print("\n")
    return parameters, last_name


In [112]:
parameters, last_name = model(data.split("\n"), ix_to_char, char_to_ix, 52001)

flcaudczarjdgswgikvtentlneqaijpyenupscvhkbrvmbbcmp
azglqsbbxhsslbxvklnjmlgwaitmumzw
lwiskbhhqyshrbjerqiddfhwcdipafvqichfshrkiuelohsxadmuq
hqrmapmsjvhw
tvrxeifvtwidzlsqcvzzixcyakvssgkojmnvsq
dwtdadvztu
wrimyaednfbmvr


anaranounus
isahjintanaaoit
urubepap
nuctoliiolctisluturar
airaigosaus
lraicosaurus
mavyanosaur


olan
anys
unenosaurus
thures
girashurus
athos
gtipviochidhaerus


oran
huaneanbsaurus
iasateraus
donoshus
rachausaurus
anzhorlater
anvenasauruprus


ipshcepdresausuy
eysacrus
onociniozin
ucholudh
inotis
ridnamadrs
ichocgobkga


auchelosaurus
anceravur
andreptorka
iakithitos
aldvemmisoritous
iopontosaurus
ambous


abavosaurus
ocarayrus
apasaurus
gicorosaurus
althisausus
enmisirur
aropodon


orhanatos
orophodyin
aneodimus
invenodoma
apuijachus
avonsaurus
renyiosaurus


enionus
ichiliahinooulaphosius
ucirnia
aishetesaurus
ighomecotvus
oruchulosaurus
iptoraptor


engochyis
imchuenius
eplosaurus
usanya
irhasauru
inbasaurus
acelipon


angiakoscondusus
auoeros
oceitonpthoma
luairasa

In [113]:
def generate_names(num, parameters, char_to_ix, ix_to_char):
    for i in range(num):
        sample_ix = sample(parameters, char_to_ix)
        sample_name = ''.join([ix_to_char[k] for k in sample_ix])
        sample_name = sample_name.replace('\n', ' ')
        print(sample_name)

In [114]:
generate_names(10, parameters, char_to_ix, ix_to_char)

enqanasor 
levasauruvus 
enanusaurus 
hutalaptos 
andcoscurus 
agrahodaosochus 
illocres 
troslantor 
annong 
anxaniavelatops 
