In [40]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
from load_new import getData
import math

srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X


In [8]:
trX,trY,teX,teY=getData(oh=1)

2224
3106
536
759


In [3]:
def model(X, w, p_drop_input, p_drop_hidden):
    X = dropout(X, p_drop_input)
    H={}
    h=[]
    H["h0"]=X
    nbLay=len(w)-1
    for i in range(1,nbLay+1):
        id="h"+`i`
        idp="h"+`i-1`
        H[id] = rectify(T.dot(H[idp], w[i-1]))
        H[id] = dropout(H[id], p_drop_hidden)
        h=np.append(h,H[id])
    py_x = softmax(T.dot(H[id], w[i]))
    return h, py_x

def loss(result,Y):
    l=len(Y)
    err=0
    for i in range(l):
        err+=-math.log(result[i,Y[i]])
    return err/l

def one_hot(x,n):
    if type(x) == list:
        x = np.array(x)
    x = x.flatten().astype(int)
    o_h = np.zeros((len(x),n))
    o_h[np.arange(len(x)),x] = 1
    return o_h

def L2_sum(x):
    return sum(x.eval().flatten()**2)
L2_reg = np.vectorize(L2_sum)
    


In [43]:
#def neuraln(trX,trY,nb_step=5,nbLay=3,nbNodes=100,p_drop_input=0.2,p_drop_hidden=0.4,lambda2=10000,size_output=4):
import random
random.seed(1)
np.random.seed(1)

nb_step=300
nbLay=3
nbNodes=100
p_drop_input=0
p_drop_hidden=0
lambda2=10000
size_input=93
size_output=4

if 1:   
    print nb_step,nbLay,nbNodes,p_drop_input,p_drop_hidden
    check=nb_step/10
    
    params=[init_weights((size_input,nbNodes))]
    params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
    params.append(init_weights((nbNodes, size_output)))
    
    X = T.fmatrix()
    Y = T.fmatrix()

    noise_h, noise_py_x = model(X,params, p_drop_input, p_drop_hidden)
    h, py_x = model(X, params,0,0)
    y_x = T.argmax(py_x, axis=1)
    
    

    L1=T.sum([T.sum(abs(params[i])) for i in range(len(params))])

    L2 = T.sum([T.sum((params[i])**2) for i in range(len(params))])
    
    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))+0.0001*L1+0.001*L2
    
    updates = RMSprop(cost, params, lr=0.001)
    
    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    predictProb=theano.function(inputs=[X], outputs=py_x, allow_input_downcast=True)

    print(len(trX))
    i=0
    while i<nb_step:
        i+=1
        for start, end in zip(range(0, len(trX), 10), range(10, len(trX), 10)):
            cost = train(trX[start:end], trY[start:end])
        #cost=train(trX,trY)
        scoreTr=np.mean(np.argmax(trY, axis=1) == predict(trX))
        
        if i%10==0:#(nb_step/1)==0:
            result=predictProb(trX)
            argY=np.argmax(trY, axis=1)
            logTr=loss(result,argY)
            print i,nbLay,scoreTr,logTr, np.mean(np.argmax(teY, axis=1) == predict(teX))
            print cost,0.0001*L1.eval(),0.001*L2.eval()

params

300 3 100 0 0
4283
10 3 0.749708148494 0.621113440328 0.750583839327
0.961685676823 261.537295277 22.6269652546
20 3 0.820919915947 0.456888448035 0.817375058384
0.427142039222 367.04002392 49.0565166036
30 3 0.867849638104 0.349509997249 0.852872489491
0.588412575587 410.901951329 73.4137053748
40 3 0.890030352557 0.289960283652 0.87202241943
0.617462033852 423.288886622 85.3330173215
50 3 0.902638337614 0.266288606489 0.875758991126
0.545374263771 432.062177293 94.4079544878
60 3 0.909409292552 0.250803008019 0.878561419897
0.55119096193 442.573689178 101.045786787
70 3 0.90637403689 0.253620164397 0.878561419897
0.584261020199 445.998177958 105.768305786
80 3 0.908241886528 0.246528883999 0.87435777674
0.633222736853 452.64828581 108.79101495
90 3 0.910109736166 0.238522138148 0.877160205511
0.617843539455 451.046754383 112.687784526
100 3 0.908241886528 0.245929822587 0.869219990659
0.553907867396 451.614674621 115.593436161
110 3 0.91127714219 0.235855060834 0.882765063055
0.54997

[<TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>]

In [44]:
if 1:  
    i=0
    while i<nb_step:
        i+=1
        for start, end in zip(range(0, len(trX), 10), range(10, len(trX), 10)):
            cost = train(trX[start:end], trY[start:end])
        #cost=train(trX,trY)
        scoreTr=np.mean(np.argmax(trY, axis=1) == predict(trX))
        
        if i%10==0:#(nb_step/1)==0:
            result=predictProb(trX)
            argY=np.argmax(trY, axis=1)
            logTr=loss(result,argY)
            print i,nbLay,scoreTr,logTr, np.mean(np.argmax(teY, axis=1) == predict(teX))
            print cost,0.0001*L1.eval(),0.001*L2.eval()

params

10 3 0.929021713752 0.189983357288 0.880429705745
0.53552300623 0.0426072876102 0.132001358373
20 3 0.929255194957 0.190375314442 0.879962634283
0.581625875113 0.0422055179308 0.130858573011
30 3 0.920382909176 0.213887109812 0.880429705745
0.617730668896 0.0418121430044 0.130455378825
40 3 0.919915946766 0.209644271054 0.875291919664
0.61533920482 0.0417405470809 0.130467886419
50 3 0.929488676162 0.19710415419 0.877627276973
0.603230618393 0.0416962118081 0.131067061212
60 3 0.922484240019 0.197055511259 0.878094348435
0.666728819961 0.0415854505629 0.131067499345
70 3 0.92598645809 0.202003314094 0.879962634283
0.624741431786 0.0418492837756 0.132223829614
80 3 0.925286014476 0.199647849402 0.880896777207
0.608079573738 0.0419239357775 0.133604083802
90 3 0.925519495681 0.194385417111 0.875758991126
0.655864897712 0.0415647763033 0.134706476557
100 3 0.92598645809 0.207991516082 0.875758991126
0.675580555092 0.0416251050576 0.136006757785
110 3 0.922951202428 0.217538295677 0.870154

[<TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>]

In [23]:
params=[init_weights((size_input,nbNodes))]
params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
params.append(init_weights((nbNodes, size_output)))

In [34]:
L2_sqr = T.sum(L2_reg(np.array(params)))


In [36]:
L2_sqr.eval()

array(2.9730365943150203)

In [37]:
params=[init_weights((size_input,nbNodes))]
params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
params.append(init_weights((nbNodes, size_output)))

In [38]:
L2_sqr.eval()

array(2.9730365943150203)

In [39]:
L2_sqr = T.sum(L2_reg(np.array(params)))
L2_sqr.eval()

array(2.950055420821496)

In [51]:
(np.array(params)**2).sum()

Elemwise{add,no_inplace}.0

In [None]:
    # symbolic Theano variable that represents the L1 regularization term
    L1  = T.sum(abs(P))

    # symbolic Theano variable that represents the squared L2 term
    L2_sqr = T.sum(P ** 2)

In [27]:
L1  = T.sum(abs(params[0]))

In [28]:
L1

Sum{acc_dtype=float64}.0

In [29]:
L1.eval()

array(74.1269312188022)

In [31]:
params[0].eval()


array([[-0.0104779 , -0.01752108,  0.00152609, ..., -0.00392234,
        -0.01745957,  0.00917721],
       [-0.00469877,  0.00674319, -0.00766582, ..., -0.0167621 ,
         0.00526074,  0.00495152],
       [ 0.00108371,  0.00532334, -0.00675386, ...,  0.00297124,
        -0.00271686, -0.00603125],
       ..., 
       [ 0.01752274, -0.00239432,  0.0007575 , ...,  0.0155845 ,
         0.01754397, -0.00494991],
       [ 0.01123058, -0.0161085 , -0.00366557, ..., -0.0106872 ,
        -0.00701879,  0.00298605],
       [-0.00663227,  0.02798805, -0.02102916, ...,  0.00085213,
         0.00667791,  0.00051049]])