In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import theano
import nnb

In [3]:
from nltk.tokenize import word_tokenize
from __future__ import print_function
import codecs

In [4]:
from chios import feats_glove

glove = feats_glove._get_glove_dict('data/glove.6B.50d.txt')

In [5]:
import csv
import sys

def glove_bow(sent, ndim=2):
    tokens = word_tokenize(sent)
    gtokens = [glove[t] for t in tokens if t in glove]
    if not gtokens:
        return np.zeros((1, 50)) if ndim == 2 else np.zeros(50)
    gtokens = np.array(gtokens)
    if ndim == 2:
        return gtokens
    else:
        return gtokens.mean(axis=0)

def load_sents(fname, ndim=2):
    labels = []
    sent0 = []
    sent1 = []
    sent2 = []
    i = 0
    print('', file=sys.stderr)
    with open(fname) as f:
        c = csv.DictReader(f)
        for l in c:
            labels.append(int(l['isCorrect']))
            sent0.append(l['qText'].decode('utf8'))
            sent1.append(l['aText'].decode('utf8'))
            sent2.append(l['abstract'].decode('utf8'))
            print('%d\r', i, end='', file=sys.stderr)
            i += 1
    print('', file=sys.stderr)
    return (np.array([glove_bow(s, ndim) for s in sent0]),
            np.array([glove_bow(s, ndim) for s in sent1]),
            np.array([glove_bow(s, ndim) for s in sent2]),
            np.array(labels, dtype='int32'))

In [6]:
s0, s1, s2, y = load_sents('solraoccur-ck12.csv')
s0a, s1a, s2a, y = load_sents('solraoccur-ck12.csv', ndim=1)


 4825

 4825


In [6]:
np.size(y[y==1]), np.size(y)

(1261, 4826)

In [276]:
s0[0][:,0], s0a[0]

(array([ 0.51564,  0.19855,  0.21705,  0.09245,  0.14504,  0.33042,
         0.21705,  0.11008, -0.14578]),
 array([ 0.18672222,  0.25739411, -0.50672167, -0.13433971,  0.57635244,
         0.41791667, -0.38240133, -0.31375211, -0.00928411,  0.05760128,
        -0.05973359,  0.17729222, -0.08681472,  0.00613817,  0.43688222,
        -0.10499633, -0.04779122,  0.21209866, -0.4491907 , -0.25576667,
        -0.12617167,  0.24664222,  0.08513444,  0.17559189,  0.31300111,
        -1.73918222, -0.66073689,  0.07377767,  0.45624778, -0.14804556,
         2.859     , -0.296494  , -0.36126567, -0.46518333, -0.01673733,
         0.03278956, -0.02011344,  0.060729  ,  0.17624778, -0.13548533,
        -0.05123767,  0.12163253, -0.185221  ,  0.43272011,  0.29413433,
        -0.11844867,  0.13815767, -0.12095689,  0.23342541,  0.43198778]))

In [7]:
def balance_dataset(tdata, y):
    tdata = list(tdata)
    class1 = np.where(y==1)[0]
    n_imbal = np.sum(y==-1) - np.sum(y==1)
    for i in np.random.choice(class1, size=n_imbal):
        tdata.append(tdata[i])
    return tdata

def try_model(model, s0, s1, s2, y, val_on_train=False, lrate=0.01, L2_reg=0.0):
    print('Preprocessing...')
    y = y*2-1  # classify as -1/+1 for tanh activation    
    
    n = s0.shape[0]
    tdata = [[s0[i], s1[i], s2[i], np.array([y[i]], dtype='int32')] for i in range(n)]
    n_train = int(n*0.8) if not val_on_train else 0    
    if n_train > 0:
        tdata_train = balance_dataset(tdata[:n_train], y[:n_train])
        tdata_val = balance_dataset(tdata[n_train:], y[n_train:])
    else:
        n_train = n
        tdata_train = balance_dataset(tdata, y)
        tdata_val = balance_dataset(tdata, y)
    
    print('Cost function...')
    expected_output = nnb.InputLayer(ndim=1, dtype='int32')
    cost_func = nnb.cost.MeanSquareError()
    network_cost = (model & expected_output) | cost_func
    
    print('Compiling...')
    trainer = nnb.train.SGDTrainer(model=network_cost, learning_rate=lrate, L2_reg=L2_reg)
    print('Training...')

    train_sup = nnb.train.TrainSupervisor(trainer=trainer, dataset=list(tdata_train), eval_dataset=list(tdata_val),
                                          batch_size=n_train//100 if n_train >= 100 else n_train,
                                          epochs_num=200, max_no_improve=40)
    train_sup.train()

    print('Checking...')
    ff = model.compile()
    for i in range(7):
        print(i, ff(tdata_train[i][0], tdata_train[i][1], tdata_train[i][2]), tdata_train[i][-1])
    def eval_dataset(ff, tdata):
        return np.array([ff(tdata[i][0], tdata[i][1], tdata[i][2])[0] for i in range(len(tdata))])
    def accuracy(tdata, yy):
        y = np.array([t[-1][0] for t in tdata])
        n_cor = np.sum(y[yy > 0]+1)/2 + np.sum(-y[yy < 0]+1)/2
        return n_cor / float(np.shape(yy)[0])
    print('Accuracy:',
          'train', accuracy(tdata_train, eval_dataset(ff, tdata_train)),
          'val', accuracy(tdata_val, eval_dataset(ff, tdata_val)),
          'val_base', accuracy(tdata_val, np.zeros(np.shape(tdata_val)[0])-1))

## Bag-of-words Models

In [7]:
# the simplest model that just tries to classify by linear combination of averages
L0 = nnb.InputLayer(ndim=1)
L1 = nnb.InputLayer(ndim=1)
L2 = nnb.InputLayer(ndim=1)
CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=50*3, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0a, s1a, s2a, y)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[--------- ] 
Finished. Took 0.00549978415171 minutes.
Evaluating...
Error = 0.501362623832
New best!
~Epoch 2~
[--------- ] 
Finished. Took 0.00539129972458 minutes.
Evaluating...
Error = 0.503881584326
~Epoch 3~
[--------- ] 
Finished. Took 0.0053907473882 minutes.
Evaluating...
Error = 0.497145278045
New best!
~Epoch 4~
[--------- ] 
Finished. Took 0.00527220169703 minutes.
Evaluating...
Error = 0.495405360828
New best!
~Epoch 5~
[--------- ] 
Finished. Took 0.00519735018412 minutes.
Evaluating...
Error = 0.494150887001
New best!
~Epoch 6~
[--------- ] 
Finished. Took 0.00517598390579 minutes.
Evaluating...
Error = 0.49417934948
~Epoch 7~
[--------- ] 
Finished. Took 0.00519626537959 minutes.
Evaluating...
Error = 0.494296162773
~Epoch 8~
[--------- ] 
Finished. Took 0.00520528554916 minutes.
Evaluating...
Error = 0.495565837841
~Epoch 9~
[--------- ] 
Finished. Took 0.0051739970843 minutes.
Evaluating...
Error = 0

In [568]:
# only slightly more complex model that first merges l0, l1, then tries to learn a linear combination of that with l2
L0 = nnb.InputLayer(ndim=1)
L1 = nnb.InputLayer(ndim=1)
C01 = nnb.ConcatenationModel()
C1 = nnb.PerceptronLayer(insize=2*50, outsize=50)
qajoin = (L0 & L1) | C01 | C1
L2 = nnb.InputLayer(ndim=1)
CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=2*50, outsize=1, activation_func=nnb.activation.tanh)
model = (qajoin & L2) | CC | C2
try_model(model, s0a, s1a, s2a, y)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[--------- ] 
Finished. Took 0.016385948658 minutes.
Evaluating...
Error = 0.50049292534
New best!
~Epoch 2~
[--------- ] 
Finished. Took 0.0174004832904 minutes.
Evaluating...
Error = 0.50610278228
~Epoch 3~
[--------- ] 
Finished. Took 0.0171181996663 minutes.
Evaluating...
Error = 0.506186629279
~Epoch 4~
[--------- ] 
Finished. Took 0.0171550989151 minutes.
Evaluating...
Error = 0.50113928957
~Epoch 5~
[--------- ] 
Finished. Took 0.016674331824 minutes.
Evaluating...
Error = 0.500789331228
~Epoch 6~
[--------- ] 
Finished. Took 0.0174194494883 minutes.
Evaluating...
Error = 0.501884327586
~Epoch 7~
[--------- ] 
Finished. Took 0.0167300343513 minutes.
Evaluating...
Error = 0.50029072991
New best!
~Epoch 8~
[--------- ] 
Finished. Took 0.0172538638115 minutes.
Evaluating...
Error = 0.501607817685
~Epoch 9~
[--------- ] 
Finished. Took 0.0180797855059 minutes.
Evaluating...
Error = 0.49996798107
New best!
~Epoch 10

In [577]:
# extension of the above with extra hidden layer
L0 = nnb.InputLayer(ndim=1)
L1 = nnb.InputLayer(ndim=1)
C01 = nnb.ConcatenationModel()
C1a = nnb.PerceptronLayer(insize=2*50, outsize=50)
C1b = nnb.PerceptronLayer(insize=50, outsize=25, activation_func=nnb.activation.tanh)
qajoin = (L0 & L1) | C01 | C1a | C1b
L2 = nnb.InputLayer(ndim=1)
CC = nnb.ConcatenationModel()
C2a = nnb.PerceptronLayer(insize=50+25, outsize=25)
C2b = nnb.PerceptronLayer(insize=25, outsize=1, activation_func=nnb.activation.tanh)
model = (qajoin & L2) | CC | C2a | C2b
try_model(model, s0a, s1a, s2a, y)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[--------- ] 
Finished. Took 0.0217814326286 minutes.
Evaluating...
Error = 0.500357608689
New best!
~Epoch 2~
[--------- ] 
Finished. Took 0.0217702666918 minutes.
Evaluating...
Error = 0.500007342751
New best!
~Epoch 3~
[--------- ] 
Finished. Took 0.021881600221 minutes.
Evaluating...
Error = 0.500145769065
~Epoch 4~
[--------- ] 
Finished. Took 0.0218525330226 minutes.
Evaluating...
Error = 0.500343043051
~Epoch 5~
[--------- ] 
Finished. Took 0.0218734184901 minutes.
Evaluating...
Error = 0.500018127542
~Epoch 6~
[--------- ] 
Finished. Took 0.0218481183052 minutes.
Evaluating...
Error = 0.500029008664
~Epoch 7~
[--------- ] 
Finished. Took 0.0223319689433 minutes.
Evaluating...
Error = 0.501230559823
~Epoch 8~
[--------- ] 
Finished. Took 0.0218985676765 minutes.
Evaluating...
Error = 0.500803193592
~Epoch 9~
[--------- ] 
Finished. Took 0.0218106150627 minutes.
Evaluating...
Error = 0.500096080358
~Epoch 10~
[-

## RNN Models

In [8]:
# a more complex model that does away with the averaging of words in sentences, instead using an RNN:
# [:200] subset for fast sanity check; without intermediate perceptrons
# FIRST SUCCESS-LOOKING STUFF!!!

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200])

  from scan_perform.scan_perform import *


Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0628002643585 minutes.
Evaluating...
Error = 0.579013365775
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0613608002663 minutes.
Evaluating...
Error = 0.526072313757
New best!
~Epoch 3~
[----------] 
Finished. Took 0.0614840467771 minutes.
Evaluating...
Error = 0.599939170187
~Epoch 4~
[----------] 
Finished. Took 0.0616254846255 minutes.
Evaluating...
Error = 0.538122519286
~Epoch 5~
[----------] 
Finished. Took 0.0624274810155 minutes.
Evaluating...
Error = 0.520889916575
New best!
~Epoch 6~
[----------] 
Finished. Took 0.0617220362027 minutes.
Evaluating...
Error = 0.508043564146
New best!
~Epoch 7~
[----------] 
Finished. Took 0.0608848492304 minutes.
Evaluating...
Error = 0.546115475313
~Epoch 8~
[----------] 
Finished. Took 0.0606798330943 minutes.
Evaluating...
Error = 0.49982176631
New best!
~Epoch 9~
[----------] 
Finished. Took 0.0618589520454 minutes.
Evaluating...
Error 

In [9]:
# cautious regularization even better

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1.0/(3*50+10))

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0582627534866 minutes.
Evaluating...
Error = 0.508872767739
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0580453356107 minutes.
Evaluating...
Error = 0.503538897169
New best!
~Epoch 3~
[----------] 
Finished. Took 0.0566072503726 minutes.
Evaluating...
Error = 0.50161568079
New best!
~Epoch 4~
[----------] 
Finished. Took 0.0579905351003 minutes.
Evaluating...
Error = 0.500734741892
New best!
~Epoch 5~
[----------] 
Finished. Took 0.0575432817141 minutes.
Evaluating...
Error = 0.509149692829
~Epoch 6~
[----------] 
Finished. Took 0.0577412327131 minutes.
Evaluating...
Error = 0.537933883044
~Epoch 7~
[----------] 
Finished. Took 0.0573727806409 minutes.
Evaluating...
Error = 0.501164665474
~Epoch 8~
[----------] 
Finished. Took 0.0585675199827 minutes.
Evaluating...
Error = 0.501627615539
~Epoch 9~
[----------] 
Finished. Took 0.0581838488579 minutes.
Evaluating...
Error = 0.501858

In [10]:
# moar experiments; current baseline

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
c
CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0579431136449 minutes.
Evaluating...
Error = 0.500822869849
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0585313359896 minutes.
Evaluating...
Error = 0.51528822822
~Epoch 3~
[----------] 
Finished. Took 0.0586636503537 minutes.
Evaluating...
Error = 0.515161762627
~Epoch 4~
[----------] 
Finished. Took 0.0584617535273 minutes.
Evaluating...
Error = 0.499726592052
New best!
~Epoch 5~
[----------] 
Finished. Took 0.0576244672139 minutes.
Evaluating...
Error = 0.509845852022
~Epoch 6~
[----------] 
Finished. Took 0.0579910993576 minutes.
Evaluating...
Error = 0.49871410408
New best!
~Epoch 7~
[----------] 
Finished. Took 0.0583647966385 minutes.
Evaluating...
Error = 0.501097660389
~Epoch 8~
[----------] 
Finished. Took 0.0589242339134 minutes.
Evaluating...
Error = 0.502250514144
~Epoch 9~
[----------] 
Finished. Took 0.058595597744 minutes.
Evaluating...
Error = 0.503446394983
~Epoc

In [596]:
# moar experiments

# XXX: this is invalid way to specify activation_func, why did it have an effect?
L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, activation_func=nnb.activation.tanh)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, activation_func=nnb.activation.tanh)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, activation_func=nnb.activation.tanh)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0547526836395 minutes.
Evaluating...
Error = 0.508826981799
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0535140156746 minutes.
Evaluating...
Error = 0.563419099929
~Epoch 3~
[----------] 
Finished. Took 0.0528361479441 minutes.
Evaluating...
Error = 0.510709208079
~Epoch 4~
[----------] 
Finished. Took 0.0533079147339 minutes.
Evaluating...
Error = 0.500050257785
New best!
~Epoch 5~
[----------] 
Finished. Took 0.0530267477036 minutes.
Evaluating...
Error = 0.536795323265
~Epoch 6~
[----------] 
Finished. Took 0.053573067983 minutes.
Evaluating...
Error = 0.517057083816
~Epoch 7~
[----------] 
Finished. Took 0.0535600821177 minutes.
Evaluating...
Error = 0.555145365423
~Epoch 8~
[----------] 
Finished. Took 0.0534473180771 minutes.
Evaluating...
Error = 0.521086990062
~Epoch 9~
[----------] 
Finished. Took 0.0530782183011 minutes.
Evaluating...
Error = 0.50045284589
~Epoch 10~
[--

In [597]:
# moar experiments

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.058017885685 minutes.
Evaluating...
Error = 0.506859348519
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0582812468211 minutes.
Evaluating...
Error = 0.513755296363
~Epoch 3~
[----------] 
Finished. Took 0.0579884489377 minutes.
Evaluating...
Error = 0.519423497121
~Epoch 4~
[----------] 
Finished. Took 0.0583023667336 minutes.
Evaluating...
Error = 0.52423108911
~Epoch 5~
[----------] 
Finished. Took 0.0586972633998 minutes.
Evaluating...
Error = 0.502146940568
New best!
~Epoch 6~
[----------] 
Finished. Took 0.0581946333249 minutes.
Evaluating...
Error = 0.501618242121
New best!
~Epoch 7~
[----------] 
Finished. Took 0.058636935552 minutes.
Evaluating...
Error = 0.50427002724
~Epoch 8~
[----------] 
Finished. Took 0.0585767865181 minutes.
Evaluating...
Error = 0.500344790499
New best!
~Epoch 9~
[----------] 
Finished. Took 0.0595157027245 minutes.
Evaluating...
Error = 0.541830910

In [598]:
# moar experiments

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=5)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=5)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=5)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*5, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0552168647448 minutes.
Evaluating...
Error = 0.533162661501
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0548652648926 minutes.
Evaluating...
Error = 0.500166086692
New best!
~Epoch 3~
[----------] 
Finished. Took 0.0550167520841 minutes.
Evaluating...
Error = 0.500813871845
~Epoch 4~
[----------] 
Finished. Took 0.0550339698792 minutes.
Evaluating...
Error = 0.499317301639
New best!
~Epoch 5~
[----------] 
Finished. Took 0.0544922828674 minutes.
Evaluating...
Error = 0.499207956887
New best!
~Epoch 6~
[----------] 
Finished. Took 0.056290547053 minutes.
Evaluating...
Error = 0.49893973001
New best!
~Epoch 7~
[----------] 
Finished. Took 0.0557192325592 minutes.
Evaluating...
Error = 0.505432846863
~Epoch 8~
[----------] 
Finished. Took 0.0547106822332 minutes.
Evaluating...
Error = 0.529741650527
~Epoch 9~
[----------] 
Finished. Took 0.0554574847221 minutes.
Evaluating...
Error =

In [11]:
# moar experiments - try hierarchy

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]

C1 = nnb.PerceptronLayer(insize=2*10, outsize=10)  #, activation_func=nnb.activation.ReLU)
qamodel = (L0 & L1) | nnb.ConcatenationModel() | C1

C2 = nnb.PerceptronLayer(insize=2*10, outsize=1, activation_func=nnb.activation.tanh)
model = (qamodel & L2) | nnb.ConcatenationModel() | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-4)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0639068007469 minutes.
Evaluating...
Error = 0.52367817288
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0626420338949 minutes.
Evaluating...
Error = 0.509335139541
New best!
~Epoch 3~
[----------] 
Finished. Took 0.0624714811643 minutes.
Evaluating...
Error = 0.552403016943
~Epoch 4~
[----------] 
Finished. Took 0.0626221179962 minutes.
Evaluating...
Error = 0.517385515971
~Epoch 5~
[----------] 
Finished. Took 0.0631368517876 minutes.
Evaluating...
Error = 0.538668932572
~Epoch 6~
[----------] 
Finished. Took 0.063644250234 minutes.
Evaluating...
Error = 0.500204302745
New best!
~Epoch 7~
[----------] 
Finished. Took 0.0627485990524 minutes.
Evaluating...
Error = 0.511323317212
~Epoch 8~
[----------] 
Finished. Took 0.0628132184347 minutes.
Evaluating...
Error = 0.503261687349
~Epoch 9~
[----------] 
Finished. Took 0.0629855155945 minutes.
Evaluating...
Error = 0.502552247495
~Epo

In [8]:
# moar experiments

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, h0=np.zeros(shape=(10,), dtype=theano.config.floatX), model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, h0=np.zeros(shape=(10,), dtype=theano.config.floatX), model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, h0=np.zeros(shape=(10,), dtype=theano.config.floatX), model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0888191302617 minutes.
Evaluating...
Error = 0.500957958481
New best!
~Epoch 2~
[----------] 
Finished. Took 0.0870272358259 minutes.
Evaluating...
Error = 0.50158509735
~Epoch 3~
[----------] 
Finished. Took 0.0885104179382 minutes.
Evaluating...
Error = 0.504333870123
~Epoch 4~
[----------] 
Finished. Took 0.0876606663068 minutes.
Evaluating...
Error = 0.503840694988
~Epoch 5~
[----------] 
Finished. Took 0.0882599473 minutes.
Evaluating...
Error = 0.505143237825
~Epoch 6~
[----------] 
Finished. Took 0.0875824173292 minutes.
Evaluating...
Error = 0.501512641975
~Epoch 7~
[----------] 
Finished. Took 0.0871998349826 minutes.
Evaluating...
Error = 0.515675752182
~Epoch 8~
[----------] 
Finished. Took 0.0874417861303 minutes.
Evaluating...
Error = 0.495795221328
New best!
~Epoch 9~
[----------] 
Finished. Took 0.0871418992678 minutes.
Evaluating...
Error = 0.489819101383
New best!
~Epoch

  from scan_perform.scan_perform import *


In [9]:
# moar experiments

L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L0 & L1 & L2) | CC | C2
try_model(model, s0[:200], s1[:200], s2[:200], y[:200], L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[----------] 
Finished. Took 0.0845589836438 minutes.
Evaluating...
Error = 0.501358014788
New best!
~Epoch 2~
[----------] 
Finished. Took 0.085214749972 minutes.
Evaluating...
Error = 0.499122361749
New best!
~Epoch 3~
[----------] 
Finished. Took 0.0838472008705 minutes.
Evaluating...
Error = 0.498413014051
New best!
~Epoch 4~
[----------] 
Finished. Took 0.0840673685074 minutes.
Evaluating...
Error = 0.503338874358
~Epoch 5~
[----------] 
Finished. Took 0.0820211172104 minutes.
Evaluating...
Error = 0.497372318792
New best!
~Epoch 6~
[----------] 
Finished. Took 0.0860601345698 minutes.
Evaluating...
Error = 0.499694159084
~Epoch 7~
[----------] 
Finished. Took 0.0840774178505 minutes.
Evaluating...
Error = 0.529186830661
~Epoch 8~
[----------] 
Finished. Took 0.0852189024289 minutes.
Evaluating...
Error = 0.513377143498
~Epoch 9~
[----------] 
Finished. Took 0.0835673809052 minutes.
Evaluating...
Error = 0.511034

### RNN Models (full dataset)

In [None]:
L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model0 = (L0 & L1 & L2) | CC | C2
try_model(model0, s0, s1, s2, y, L2_reg=1e-3)

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~
[--------- ] 
Finished. Took 1.60305870374 minutes.
Evaluating...
Error = 0.500098191283
New best!
~Epoch 2~
[--------- ]

In [None]:
L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=20)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=20)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=20)[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*20, outsize=1, activation_func=nnb.activation.tanh)
model1 = (L0 & L1 & L2) | CC | C2
try_model(model1, s0, s1, s2, y, L2_reg=1e-3)

In [None]:
L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10, model=nnb.SimpleRecurrence(insize=50, outsize=10, activation_func=nnb.activation.ReLU))[-1]

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model2 = (L0 & L1 & L2) | CC | C2
try_model(model2, s0, s1, s2, y, L2_reg=1e-3)

In [None]:
L0 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L1 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]
L2 = nnb.InputLayer(ndim=2) | nnb.RecurrentNeuralNetwork(insize=50, outsize=10)[-1]

C1 = nnb.PerceptronLayer(insize=2*10, outsize=10)  #, activation_func=nnb.activation.ReLU)
qamodel = (L0 & L1) | nnb.ConcatenationModel() | C1

C2 = nnb.PerceptronLayer(insize=2*10, outsize=1, activation_func=nnb.activation.tanh)
model3 = (qamodel & L2) | nnb.ConcatenationModel() | C2
try_model(model3, s0, s1, s2, y, L2_reg=1e-4)

In [20]:
# CNN experiment

L = []
for i in range(3):
    L.append(nnb.InputLayer(ndim=2) |
             nnb.ConvolutionalLayer(insize=50, window=3, outsize=10, activation_func=nnb.activation.ReLU) |
             nnb.MaxPoolingLayer(window=1)[0])

CC = nnb.ConcatenationModel()
C2 = nnb.PerceptronLayer(insize=3*10, outsize=1, activation_func=nnb.activation.tanh)
model = (L[0] & L[1] & L[2]) | CC | C2
model.compile()
try_model(model, s0[:200], s1[:200], s2[:200], y[:200])

Preprocessing...
Cost function...
Compiling...
Training...
~Epoch 1~


ValueError: Output dimensions are not valid 1x-1
Apply node that caused the error: ConvOp{('imshp', (1, 50, None)),('kshp', (50, 3)),('nkern', 10),('bsize', 1),('dx', 1),('dy', 1),('out_mode', 'valid'),('unroll_batch', None),('unroll_kern', None),('unroll_patch', True),('imshp_logical', (1, 50, None)),('kshp_logical', (50, 3)),('kshp_logical_top_aligned', True)}(InplaceDimShuffle{x,x,1,0}.0, InplaceDimShuffle{0,x,1,2}.0)
Inputs types: [TensorType(float64, (True, True, False, False)), TensorType(float64, (False, True, False, False))]
Inputs shapes: [(1, 1, 50, 1), (10, 1, 50, 3)]
Inputs strides: [(400, 400, 8, 8), (1200, 1200, 24, 8)]
Inputs values: ['not shown', 'not shown']

Backtrace when the node is created:
  File "build/bdist.linux-x86_64/egg/nnb/nn_model.py", line 995, in apply
    subsample=(1, stride)

Debugprint of the apply node: 
ConvOp{('imshp', (1, 50, None)),('kshp', (50, 3)),('nkern', 10),('bsize', 1),('dx', 1),('dy', 1),('out_mode', 'valid'),('unroll_batch', None),('unroll_kern', None),('unroll_patch', True),('imshp_logical', (1, 50, None)),('kshp_logical', (50, 3)),('kshp_logical_top_aligned', True)} [@A] <TensorType(float64, (True, False, True, False))> ''   
 |InplaceDimShuffle{x,x,1,0} [@B] <TensorType(float64, (True, True, False, False))> ''   
 | |input [@C] <TensorType(float64, matrix)>
 |InplaceDimShuffle{0,x,1,2} [@D] <TensorType(float64, (False, True, False, False))> ''   
   |W [@E] <TensorType(float64, 3D)>

Storage map footprint:
 - TensorConstant{-3}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - b, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - TensorConstant{1.0}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - W, Shape: (10, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - TensorConstant{(1, 1, 1) of 0.5}, Shape: (1, 1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - TensorConstant{-inf}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4.0 Byte(s)
 - <TensorType(float64, 3D)>, Shape: (10, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - <TensorType(float64, 3D)>, Shape: (10, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - b, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - b, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - Constant{-1}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - input, Shape: (461, 50), ElemSize: 8 Byte(s), TotalSize: 184400 Byte(s)
 - input, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - b, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - Constant{0}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{0}, Shape: (1,), ElemSize: 1 Byte(s), TotalSize: 1.0 Byte(s)
 - TensorConstant{(1, 1, 1, 1) of 0.5}, Shape: (1, 1, 1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - MakeVector.0, Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s)
 - W, Shape: (10, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - TensorConstant{(1,) of 2.0}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - input, Shape: (1, 50), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
 - W, Shape: (10, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - input, Shape: (11, 50), ElemSize: 8 Byte(s), TotalSize: 4400 Byte(s)
 - TensorConstant{1}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{(1,) of 0.5}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - max, Shape: (10, 10), ElemSize: 8 Byte(s), TotalSize: 800 Byte(s)
 - InplaceDimShuffle{x,1,x,3}.0, Shape: (1, 10, 1, 9), ElemSize: 8 Byte(s), TotalSize: 720 Byte(s)
 - Shape_i{2}.0, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - MakeVector.0, Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s)
 - Elemwise{Composite{(i0 + ((i1 + i2) // i0))}}[(0, 2)].0, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Alloc.0, Shape: (10, 10), ElemSize: 8 Byte(s), TotalSize: 800 Byte(s)
 - TensorConstant{10}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{0.0}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - <TensorType(float64, vector)>, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - <TensorType(float64, vector)>, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - <TensorType(float64, matrix)>, Shape: (30, 1), ElemSize: 8 Byte(s), TotalSize: 240 Byte(s)
 - <TensorType(float64, vector)>, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - Reshape{3}.0, Shape: (10, 1, 10), ElemSize: 8 Byte(s), TotalSize: 800 Byte(s)
 - Elemwise{Composite{(i0 / Cast{float64}(i1))}}.0, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - <TensorType(float64, 3D)>, Shape: (10, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - Subtensor{int64}.0, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - W, Shape: (30, 1), ElemSize: 8 Byte(s), TotalSize: 240 Byte(s)
 - TensorConstant{(1, 1) of 0.0}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - Alloc.0, Shape: (30,), ElemSize: 8 Byte(s), TotalSize: 240 Byte(s)
 - W.T, Shape: (1, 30), ElemSize: 8 Byte(s), TotalSize: 240 Byte(s)
 - TensorConstant{(1,) of 1.0}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - <TensorType(float64, vector)>, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - InplaceDimShuffle{x,x,1,0}.0, Shape: (1, 1, 50, 11), ElemSize: 8 Byte(s), TotalSize: 4400 Byte(s)
 - InplaceDimShuffle{0,x,1,2}.0, Shape: (10, 1, 50, 3), ElemSize: 8 Byte(s), TotalSize: 12000 Byte(s)
 - InplaceDimShuffle{x,x,1,0}.0, Shape: (1, 1, 50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)



In [17]:
theano.config.exception_verbosity = 'high'