In [1]:
# As usual, a bit of setup

import time, os, json
import numpy as np
import matplotlib.pyplot as plt

from metropolis_hastings import *
from deshuffling_utils import *
import sys  

from nn.rnn_layers import *
from nn.char_nn import *
from nn.optim import *
from nn.sequence_classifier import *
from nn.gradient_check import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))



In [12]:
def train_model(X_train, X_test, char_to_ix, vocab_size, reg, iters=1000, print_every=50, batch_size=50):
    
    #train on the network
    hidden_dims = 100
    N_train = X_train.shape[0]
    N_test = X_test.shape[0]
    model = CharNN(char_to_ix, hidden_dims)

    i = 0

    it = iters

    config = {}

    loss_trains = []
    loss_tests = []

    while i < it:
        batch_indices = np.random.choice(N_train, batch_size)
        batch_features = X_train[batch_indices, :]

        loss, grads, _ = model.loss(batch_features)

        for name, value in grads.iteritems():
            if name not in config:
                config[name] = {'learning_rate':1e-3}

            model.params[name], config[name] = adam(model.params[name], value, config[name])

        if i%print_every == 0:
            indices = np.random.choice(N_test, batch_size)
            loss_test, _, _ = model.loss(X_test[indices,:])
            
            print "iter %d, loss_train %f, lost_test %f"%(i, loss, loss_test)
            loss_trains.append(loss)
            loss_tests.append(loss_test)

        i += 1

    plt.plot(loss_trains)
    plt.plot(loss_tests)

    plt.show()
    
    return model


In [13]:
def generate_data(data, seq_len=50):
    
    chars = list(set(data))
    data_size, vocab_size = len(data), len(chars)
    print 'data has %d characters, %d unique.' % (data_size, vocab_size)
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }

    d1 = list(data)
    
    l1 = len(d1)/seq_len
    
    indices = range(l1)
    random.shuffle(indices)
    
    matrix = np.zeros((l1, seq_len), dtype='int')
    
    i = 0
    
    for k in indices:
        matrix[i, :] = [char_to_ix[c] for c in d1[k:k+seq_len]]
        i += 1
    
    p = ((l1)*7)/10
    X_train = matrix[:p,:]
    
    X_test = matrix[p:,:]
    
    return X_train, X_test, char_to_ix, ix_to_char, vocab_size

#test for small data, should overfit
data = open('data/warpeace_input.txt','r').read()
data = data[1:500]

X_train, X_test, char_to_ix, ix_to_char, vocab_size = generate_data(data)

model = train_model(X_train, X_test, char_to_ix, vocab_size, 0.0, iters=5, print_every=5, batch_size=50)


data has 499 characters, 45 unique.


ValueError: too many values to unpack