In [1]:
import pickle
import random
import numpy as np

import theano
import theano.tensor as T
import lasagne

from collections import Counter
from lasagne.utils import floatX

Using gpu device 0: GeForce GTX 770 (CNMeM is disabled)


In [2]:
corpus = open('claims.txt').read()

In [3]:
VOCABULARY = set(corpus)

In [4]:
VOCAB_SIZE = len(VOCABULARY)

CHAR_TO_IX = {c: i for i, c in enumerate(VOCABULARY)}
IX_TO_CHAR = {i: c for i, c in enumerate(VOCABULARY)}
CHAR_TO_ONEHOT = {c: np.eye(VOCAB_SIZE)[i] for i, c in enumerate(VOCABULARY)}

In [23]:
SEQUENCE_LENGTH = 50
BATCH_SIZE = 50
RNN_HIDDEN_SIZE = 200

In [24]:
train_corpus = corpus[:(len(corpus) * 9 // 10)]
val_corpus = corpus[(len(corpus) * 9 // 10):]

In [104]:
def data_batch_generator(corpus, size=BATCH_SIZE):
    startidx = np.random.randint(0, len(corpus) - SEQUENCE_LENGTH - 1, size=size)

    while True:
        items = np.array([corpus[start:start + SEQUENCE_LENGTH + 1] for start in startidx])
        startidx = (startidx + SEQUENCE_LENGTH) % (len(corpus) - SEQUENCE_LENGTH - 1)
        yield items

In [26]:
def prep_batch_for_network(batch):
    x_seq = np.zeros((len(batch), SEQUENCE_LENGTH, VOCAB_SIZE), dtype='float32')
    y_seq = np.zeros((len(batch), SEQUENCE_LENGTH), dtype='int32')

    for i, item in enumerate(batch):
        for j in range(SEQUENCE_LENGTH):
            x_seq[i, j] = CHAR_TO_ONEHOT[item[j]]
            y_seq[i, j] = CHAR_TO_IX[item[j + 1]]

    return x_seq, y_seq

In [158]:
x_sym = T.tensor3()
y_sym = T.imatrix()
hid_init_sym = T.matrix()
hid2_init_sym = T.matrix()


l_input = lasagne.layers.InputLayer((None, SEQUENCE_LENGTH, VOCAB_SIZE))

l_rnn = lasagne.layers.GRULayer(l_input,
                                  num_units=RNN_HIDDEN_SIZE,
                                  grad_clipping=5.,
                                  hid_init=hid_init_sym,
                                  )

l_rnn2 = lasagne.layers.GRULayer(l_rnn,
                                  num_units=RNN_HIDDEN_SIZE,
                                  grad_clipping=5.,
                                  hid_init=hid2_init_sym,
                                  )


l_shp = lasagne.layers.ReshapeLayer(l_rnn2, (-1, RNN_HIDDEN_SIZE))

l_decoder = lasagne.layers.DenseLayer(l_shp,
                                      num_units=VOCAB_SIZE,
                                      nonlinearity=lasagne.nonlinearities.softmax)

l_out = lasagne.layers.ReshapeLayer(l_decoder, (-1, SEQUENCE_LENGTH, VOCAB_SIZE))

In [159]:
hid_out, hid2_out, prob_out = lasagne.layers.get_output([l_rnn, l_rnn2, l_out],
                                                        {l_input: x_sym})

hid_out = hid_out[:, -1]
hid2_out = hid2_out[:, -1]

In [160]:
def calc_cross_ent(net_output, targets):
    preds = T.reshape(net_output, (-1, VOCAB_SIZE))
    targets = T.flatten(targets)
    cost = T.nnet.categorical_crossentropy(preds, targets)
    return cost

loss = T.mean(calc_cross_ent(prob_out, y_sym))

In [161]:
lr_sh = theano.shared(floatX(0.002))

MAX_GRAD_NORM = 15

all_params = lasagne.layers.get_all_params(l_out, trainable=True)

all_grads = T.grad(loss, all_params)
all_grads = [T.clip(g, -5, 5) for g in all_grads]
all_grads, norm = lasagne.updates.total_norm_constraint(
    all_grads, MAX_GRAD_NORM, return_norm=True)

updates = lasagne.updates.adam(all_grads, all_params, learning_rate=lr_sh)

f_train = theano.function([x_sym, y_sym, hid_init_sym, hid2_init_sym],
                          [loss, norm, hid_out, hid2_out],
                          updates=updates
                         )

f_val = theano.function([x_sym, y_sym, hid_init_sym, hid2_init_sym], [loss, hid_out, hid2_out])

In [132]:
hid = np.zeros((BATCH_SIZE, RNN_HIDDEN_SIZE), dtype='float32')
hid2 = np.zeros((BATCH_SIZE, RNN_HIDDEN_SIZE), dtype='float32')

train_batch_gen = data_batch_generator(train_corpus)

for iteration in range(20000):
    x, y = prep_batch_for_network(next(train_batch_gen))
    loss_train, norm, hid, hid2 = f_train(x, y, hid, hid2)
    
    if iteration % 250 == 0:
        print('Iteration {}, loss_train: {}, norm: {}'.format(iteration, loss_train, norm))

Iteration 0, loss_train: 4.55749225616, norm: 0.760300338268
Iteration 250, loss_train: 1.66040706635, norm: 0.38279491663
Iteration 500, loss_train: 1.29986417294, norm: 0.293704122305
Iteration 750, loss_train: 1.25550210476, norm: 0.290590673685
Iteration 1000, loss_train: 1.13784217834, norm: 0.32186254859
Iteration 1250, loss_train: 1.19803094864, norm: 0.331361681223
Iteration 1500, loss_train: 1.05421900749, norm: 0.293191343546
Iteration 1750, loss_train: 1.02515113354, norm: 0.250425338745
Iteration 2000, loss_train: 0.989981472492, norm: 0.254016608
Iteration 2250, loss_train: 1.04801535606, norm: 0.27374368906
Iteration 2500, loss_train: 1.11283314228, norm: 0.29329881072
Iteration 2750, loss_train: 0.996952950954, norm: 0.26028880477
Iteration 3000, loss_train: 1.01600551605, norm: 0.25490385294
Iteration 3250, loss_train: 1.07221901417, norm: 0.287651866674
Iteration 3500, loss_train: 1.02552270889, norm: 0.286253511906
Iteration 3750, loss_train: 0.975072741508, norm: 0.2

In [164]:
param_values = lasagne.layers.get_all_param_values(l_out)
d = {'param values': param_values,
     'VOCABULARY': VOCABULARY, 
     'CHAR_TO_IX': CHAR_TO_IX,
     'IX_TO_CHAR': IX_TO_CHAR,
    }
pickle.dump(d, open('gru_2layer_trained.pkl','w'), protocol=pickle.HIGHEST_PROTOCOL)

In [580]:
d = pickle.load(open('lstm_trained.pkl', 'r'))

In [162]:
lasagne.layers.set_all_param_values(l_out, d['param values'])

In [163]:
lr_sh.set_value(0.0002)

hid = np.zeros((BATCH_SIZE, RNN_HIDDEN_SIZE), dtype='float32')
hid2 = np.zeros((BATCH_SIZE, RNN_HIDDEN_SIZE), dtype='float32')

train_batch_gen = data_batch_generator(train_corpus)

for iteration in range(20000):
    x, y = prep_batch_for_network(next(train_batch_gen))
    loss_train, norm, hid, hid2 = f_train(x, y, hid, hid2)
    
    if iteration % 250 == 0:
        print('Iteration {}, loss_train: {}, norm: {}'.format(iteration, loss_train, norm))

Iteration 0, loss_train: 1.14895617962, norm: 0.31606566906
Iteration 250, loss_train: 0.866210460663, norm: 0.241797029972
Iteration 500, loss_train: 0.915615439415, norm: 0.24722892046
Iteration 750, loss_train: 0.904083192348, norm: 0.244569957256
Iteration 1000, loss_train: 0.920575082302, norm: 0.240894839168
Iteration 1250, loss_train: 0.873084366322, norm: 0.215926349163
Iteration 1500, loss_train: 0.942292571068, norm: 0.252579718828
Iteration 1750, loss_train: 0.850891292095, norm: 0.222936615348
Iteration 2000, loss_train: 0.932299792767, norm: 0.247698649764
Iteration 2250, loss_train: 0.837965250015, norm: 0.21953189373
Iteration 2500, loss_train: 0.86218047142, norm: 0.247443512082
Iteration 2750, loss_train: 0.897492289543, norm: 0.243839144707
Iteration 3000, loss_train: 0.859479665756, norm: 0.215487509966
Iteration 3250, loss_train: 0.836775183678, norm: 0.224701076746
Iteration 3500, loss_train: 0.861147880554, norm: 0.225737631321
Iteration 3750, loss_train: 0.908664

KeyboardInterrupt: 

In [38]:
predict_fn = theano.function([x_sym, hid_init_sym, hid2_init_sym], [prob_out, hid_out, hid2_out])

In [39]:
hid = np.zeros((BATCH_SIZE, RNN_HIDDEN_SIZE), dtype='float32')
hid2 = np.zeros((BATCH_SIZE, RNN_HIDDEN_SIZE), dtype='float32')
startidx = None
losses = []

for iteration in range(50):
    batch, startidx = get_data_batch(val_corpus, startidx=startidx)
    x, y = prep_batch_for_network(batch)
    loss_val, hid, hid2 = f_val(x, y, hid, hid2)
    
    losses.append(loss_val)

In [40]:
np.mean(losses)

1.0524255

In [677]:
sentence = ''

x0 = np.zeros((1, SEQUENCE_LENGTH, VOCAB_SIZE), dtype='float32')
#hid0 = np.zeros((1, RNN_HIDDEN_SIZE), dtype='float32')
hid0 = np.copy(hid)[:1]
for _ in range(500):
    p, _ = predict_fn(x0, hid0)
    p = p[0, -1]
    p = p/(p.sum() + 1e-6)
    s = np.random.multinomial(1, p)
    sentence += IX_TO_CHAR[s.argmax(-1)]
    x0[:,:-1] = x0[:,1:]
    x0[:,-1,:] = s
    if sentence[-1] == '\n':
        break


In [678]:
sentence

'mpanilic assembly is an oxide state; amine cramped to generate units comprising a wex carbarr. \n'

In [439]:
sentence = 'A claim'

primer = np.array([CHAR_TO_ONEHOT[c] for c in sentence])

x0 = np.zeros((1, SEQUENCE_LENGTH, VOCAB_SIZE), dtype='float32')
x0[:, -7:] = primer
for _ in range(500):
    p, _ = predict_fn(x0, hid0)
    p = p[0, -1]
    p = p/(p.sum() + 1e-6)
    s = np.random.multinomial(1, p)
    sentence += IX_TO_CHAR[s.argmax(-1)]
    x0[:,:-1] = x0[:,1:]
    x0[:,-1,:] = s
    if sentence[-1] == '\n':
        break

In [440]:
sentence

"A claima', wherein the resentical wolk vatues on the interument instruction of said stalle diachssester into a varvable metal froquee in surface and the bond and their each orkering pradered. \n"

In [165]:
l_input = lasagne.layers.InputLayer((None, 1, VOCAB_SIZE))

l_rnn = lasagne.layers.GRULayer(l_input,
                                  num_units=RNN_HIDDEN_SIZE,
                                  grad_clipping=5.,
                                  hid_init=hid_init_sym,
                                  )

l_rnn2 = lasagne.layers.GRULayer(l_rnn,
                                  num_units=RNN_HIDDEN_SIZE,
                                  grad_clipping=5.,
                                  hid_init=hid2_init_sym,
                                  )


l_shp = lasagne.layers.ReshapeLayer(l_rnn2, (-1, RNN_HIDDEN_SIZE))

l_decoder = lasagne.layers.DenseLayer(l_shp,
                                      num_units=VOCAB_SIZE,
                                      nonlinearity=lasagne.nonlinearities.softmax)

l_out = lasagne.layers.ReshapeLayer(l_decoder, (-1, 1, VOCAB_SIZE))

hid_out, hid2_out, prob_out = lasagne.layers.get_output([l_rnn, l_rnn2, l_out], {
                        l_input: x_sym,
                    })
hid_out = hid_out[:, -1]
hid2_out = hid2_out[:, -1]

In [166]:
lasagne.layers.set_all_param_values(l_out, d['param values'])

In [167]:
predict_fn = theano.function([x_sym, hid_init_sym, hid2_init_sym], [prob_out, hid_out, hid2_out])

In [168]:
sentence = ''
hid = np.zeros((1, RNN_HIDDEN_SIZE), dtype='float32')
hid2 = np.zeros((1, RNN_HIDDEN_SIZE), dtype='float32')
x = np.zeros((1, 1, VOCAB_SIZE), dtype='float32')
primer = 'An apparatus according to claim 1, characterized in that\n'
#primer = '\n' * SEQUENCE_LENGTH
primer = ''
#primer = 'n-hydroxy'
#primer = 'octane'
x[:, :, :] = 1./VOCAB_SIZE

for c in primer:
    x[0, 0, :] = CHAR_TO_ONEHOT[c]
    p, hid, hid2 = predict_fn(x, hid, hid2)
    
for _ in range(500):
    p, hid, hid2 = predict_fn(x, hid, hid2)
    p = p[0, 0]
    p = p/(p.sum() + 1e-6)
    s = np.random.multinomial(1, p)
    sentence += IX_TO_CHAR[s.argmax(-1)]
    x[0, 0, :] = s
    if sentence[-1] == '\n':
        break

In [169]:
print(sentence)

5 substituting the natural light eesition using the signal is smaller than, on the end of the block display. 



In [170]:
primers = val_corpus.split('\n')

In [190]:
sentence = ''
hid = np.zeros((1, RNN_HIDDEN_SIZE), dtype='float32')
hid2 = np.zeros((1, RNN_HIDDEN_SIZE), dtype='float32')
x = np.zeros((1, 1, VOCAB_SIZE), dtype='float32')

primer = np.random.choice(primers) + '\n'

for c in primer:
    p, hid, hid2 = predict_fn(x, hid, hid2)
    x[0, 0, :] = CHAR_TO_ONEHOT[c]
    
for _ in range(500):
    p, hid, hid2 = predict_fn(x, hid, hid2)
    p = p[0, 0]
    p = p/(p.sum() + 1e-6)
    s = np.random.multinomial(1, p)
    sentence += IX_TO_CHAR[s.argmax(-1)]
    x[0, 0, :] = s
    if sentence[-1] == '\n':
        break
        
print('PRIMER: ' + primer)
print('GENERATED: ' + sentence)

PRIMER: The intelligent imaging system of any of the preceding claims, further characterized in that said personality module (2114) is connected to said at least one print head, and wherein said personality module (2114) is connected to sense movement of said moving web and synchronizes the output of raster data from said staging memory to said at least one print head with movement of said web (72). 

GENERATED: A printhead capture according to any one of the preceding claims, characterized in that said incliner cabin (124) is outside the mold area c) that prevent that the hole pin is no more preventing second resistance of the tne form and with "(A). 

