# RNN text generation

In [1]:
#### some helpful imports ####
# import autograd functionality
import autograd.numpy as np

# import testing libraries
import sys
sys.path.append('../')
from mlrefined_libraries import system_identification_lib as syslib
from mlrefined_libraries import text_generation_lib as textlib

# import dataset path
datapath = '../datasets/'

# import various other libraries e.g., for plotting, deep copying
import copy
import matplotlib.pyplot as plt

# this is needed to compensate for %matplotl+ib notebook's tendancy to blow up images when plotted inline
from matplotlib import rcParams
rcParams['figure.autolayout'] = True
%matplotlib notebook

# autoreload function - so if anything behind the scenes is changeed those changes
# are reflected in the notebook without having to restart the kernel
%load_ext autoreload
%autoreload 2

# load in text, preprocess, and parse into characters

Load in the raw characters of the text, and preprocess (remove jargon characters, lower-case all, etc.,).

In [2]:
# load in and preprocess text
csvname = datapath + "war_of_the_worlds.txt"
demo = textlib.text_utils_experimental.Text()
demo.load_preprocess(csvname)

Convert characters to keys, and create functions to go back and forth between keys and characters.

In [3]:
# parse into individual characters
demo.parse_chars()

Window text with given order, and convert window-d input to one-hot encoded vectors.

In [24]:
order = 5
x_onehot,y,y_onehot = demo.encode_io_pairs_fixed(demo.keys,order)

# take subsample of data

In [25]:
num_samples = 10000
x_samples = x_onehot[:,:num_samples]
y_samples = y_onehot[:,:num_samples]
y_orig = y[:,:num_samples]

# Build a higher order model

In [26]:
# an implementation of the least squares cost function for linear regression
def least_squares(w,x,y):
    # compute cost over batch
    cost = np.sum((model(x,w) - y)**2)
    return cost/float(np.size(y))

In [27]:
def f_h(x_t_prev,h_t_prev,w):   
    h_t =  w[0] + np.dot(w[1],h_t_prev) + np.dot(w[2],x_t_prev)
    return h_t

def f_y(h_t,w):
    y_hat_t = w[0] + np.dot(w[1].T,h_t)
    return y_hat_t

# exponential average function
def model(x,w):
    # set initial conditions of h 
    h_t = np.zeros((num_chars,1))
    y_hat = [v[:,np.newaxis] for v in y_samples[:,:order].T]

    # range over x and create h
    for t in range(np.shape(x)[1] - order):
        # update state, get previous point 
        x_t_prev = x[:,t:t+1]

        # update hidden state
        h_t = f_h(x_t_prev,h_t,w[0])
        
        # update y approximate
        y_hat_t = f_y(h_t,w[1])
        y_hat.append(y_hat_t)

    # array-afy the hidden sequence
    y_hat = np.array(y_hat)[:,:,0]
    return y_hat.T

# multiclass softmax
def multiclass_softmax(w,x,y):        
    # pre-compute predictions on all points
    all_evals = model(x,w)

    # compute softmax across data points
    a = np.log(np.sum(np.exp(all_evals),axis = 0)) 

    # compute cost in compact form using numpy broadcasting
    b = all_evals[y.astype(int).flatten(),np.arange(np.size(y))]
    cost = np.sum(a - b)

    # return average
    return cost/float(np.size(y))

In [28]:
scale = 0.01
num_chars = int(x_onehot.shape[0]/float(order))
whh = scale*np.random.randn(num_chars,num_chars)
whx = scale*np.random.randn(num_chars,x_onehot.shape[0])
whb = scale*np.random.randn(num_chars,1)
wh = [whb,whh,whx]

wyh = scale*np.random.randn(num_chars,num_chars)
wyb = scale*np.random.randn(num_chars,1)
wy = [wyb,wyh]
w = [wh,wy]

In [29]:
blah = multiclass_softmax(w,x_samples,y_orig)

In [31]:
# This code cell will not be shown in the HTML version of this notebook
# initialize with input/output data
mylib1 = textlib.rnn_lib.super_setup.Setup(x_samples,y_orig)

# perform preprocessing step(s) - especially input normalization
#mylib1.preprocessing_steps(normalizer = 'standard')

# split into training and validation sets
mylib1.make_train_val_split(train_portion = 1)

# choose cost
mylib1.choose_cost(name = 'multiclass_softmax',model = model)

# fit an optimization
# mylib1.fit(max_its = 1,alpha_choice = 'diminishing',optimizer = 'zero_order',w_init = w,verbose = True)
mylib1.fit(max_its = 10,alpha_choice = 10**(-1),optimizer = 'gradient_descent',w_init = w,verbose = True)

step 2 done in 11.0 secs, train cost = 3.6286
step 3 done in 10.9 secs, train cost = 3.6235
step 4 done in 10.8 secs, train cost = 3.6186


KeyboardInterrupt: 

In [None]:
# show cost function history
mylib1.show_histories()

In [15]:
np.size(w)

2

In [17]:
w.flatten()

AttributeError: 'list' object has no attribute 'flatten'