In [1]:
import numpy as np
from utils import *
import random

In [2]:
data = open("dinos.txt","r").read()
data = data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))

There are 19910 total characters and 27 unique characters in your data.


In [34]:
char_to_ix = {ch:i for i, ch in enumerate(sorted(chars))}
ix_to_char = {i:ch for i, ch in enumerate(sorted(chars))}
print("char_to_ix : \n" + str(char_to_ix) + "\n")
print("ix_to_char : \n" + str(ix_to_char))

char_to_ix : 
{'\n': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}

ix_to_char : 
{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


In [58]:
def clip(gradients, maxValue):
    
    dWaa, dWax, dWya, db, dby = gradients["dWaa"], gradients["dWax"], gradients["dWya"], gradients["db"], gradients["dby"]
     
     
    for gradient in [dWaa, dWax, dWya, db, dby]:
        
        np.clip(gradient, -maxValue, maxValue, out = gradient)
    
    gradients = {"dWaa": dWaa, "dWax" : dWax, "dWya": dWya, "db" : db, "dby" : dby}
    
    
    
    return gradients
    

In [62]:
def sample(parameters, char_to_ix, seed):
    
    Waa, Wax, Wya, ba, by = parameters["Waa"], parameters["Wax"], parameters["Wya"], parameters["b"], parameters["by"]
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    x = np.zeros((vocab_size, 1))  # 初始化第一个x输入：字符级别
    a_prev = np.zeros((n_a, 1))  # 初始化第一个a值
    
    indices = []
    idx = -1
    
    counter = 0
    newline_character = char_to_ix["\n"]
    
    while(idx != newline_character and counter != 50):
        
        a = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, x) + b)
        z = np.dot(Wya, a) + by
        y = softmax(z)
        
        np.random.seed(seed + counter)
        
        idx = np.random.choice(list(range(vocab_size)), p = y.ravel())
        
        indices.append(idx)
        
        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        
        a_prev = a
        
        seed = seed + 1 
        counter = counter + 1
        
    
    if(counter == 50):
        
        indices.append(newline_character)
        
    
    return indices

In [63]:
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    
    
    loss, cache = rnn_forward(X, Y, a_prev, parameters)
    gradients, a = rnn_backward(X, Y, parameters, cache)
    gradients = clip(gradients, 5)
    parameters = update_parameters(parameters, gradients, learning_rate)
    
    
    
    
    return loss, gradients, a[len(X)-1]

In [74]:
def model(data, ix_to_char, char_to_ix, num_iterations = 3500, n_a = 50, dino_names = 7, vocab_size = 27):
    
    n_x, n_y = vocab_size, vocab_size
    print("1、n_x = n_y :  " + str(vocab_size))
    
    parameters = initialize_parameters(n_a, n_x, n_y) # (50, 27, 27)   Waa: 50 x 50 Wax: 50 x 27 Wya: 27 x 50 by: 27 x 1 ba(b): 50 x 1
    loss = get_initial_loss(vocab_size, dino_names)
    
    with open("dinos.txt") as f:
        
        examples = f.readlines()
    
    examples = [x.lower().strip() for x in examples]    # 去掉空格
    
    print("################################")

    for i in range(10):
        
        print(examples[i])
        
    np.random.seed(0)
    np.random.shuffle(examples)
    
    print("################################")

    for i in range(10):
        
        print(examples[i])
    
    a_prev = np.zeros((n_a, 1))  # 50 x 1
    
    for j in range(num_iterations):
        
        index = j % len(examples)
        
        X = [None] + [char_to_ix[ch] for ch in examples[index]]
        Y = X[1:] + [char_to_ix["\n"]]
        
        curr_loss, gradients, a_prev = optimize(X, Y, a_prev, parameters)
        loss = smooth(loss, curr_loss)
        
        if j % 2000 == 0:
            
            print("Iterations: %d ,Loss : %f" %(j, loss) + "\n")
            seed = 0
            
            for name in range(dino_names):
                
                sample_indices = sample(parameters, char_to_ix, seed)
                print_sample(sample_indices, ix_to_char)
                seed = seed + 1
            
            print("\n")
    
    return parameters

In [75]:
parameters = model(data, ix_to_char, char_to_ix)

1、n_x = n_y :  27
################################
aachenosaurus
aardonyx
abdallahsaurus
abelisaurus
abrictosaurus
abrosaurus
abydosaurus
acanthopholis
achelousaurus
acheroraptor
################################
turiasaurus
pandoravenator
ilokelesia
chubutisaurus
quaesitosaurus
orthomerus
selimanosaurus
thecocoelurus
postosuchus
lirainosaurus
Iterations: 0 ,Loss : 23.087336



ValueError: operands could not be broadcast together with shapes (50,1) (100,1) 