In [7]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
from loadfft import getData
import math

srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def rectify(X):
    return T.maximum(X, 0.)

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X


In [8]:
trX,trY,teX,teY=getData(oh=1)

2224
3106
536
759


In [9]:
def model(X, w, p_drop_input, p_drop_hidden):
    X = dropout(X, p_drop_input)
    H={}
    h=[]
    H["h0"]=X
    nbLay=len(w)-1
    for i in range(1,nbLay+1):
        id="h"+`i`
        idp="h"+`i-1`
        H[id] = rectify(T.dot(H[idp], w[i-1]))
        H[id] = dropout(H[id], p_drop_hidden)
        h=np.append(h,H[id])
    py_x = softmax(T.dot(H[id], w[i]))
    return h, py_x

def loss(result,Y):
    l=len(Y)
    err=0
    for i in range(l):
        err+=-math.log(result[i,Y[i]])
    return err/l

def one_hot(x,n):
    if type(x) == list:
        x = np.array(x)
    x = x.flatten().astype(int)
    o_h = np.zeros((len(x),n))
    o_h[np.arange(len(x)),x] = 1
    return o_h

    


In [13]:
#def neuraln(trX,trY,nb_step=5,nbLay=3,nbNodes=100,p_drop_input=0.2,p_drop_hidden=0.4,lambda2=10000,size_output=4):
import random
random.seed(1)
np.random.seed(1)

nb_step=2
nbLay=3
nbNodes=100
p_drop_input=0
p_drop_hidden=0
lambda2=10000
size_input=93
size_output=4

if 1:   
    print nb_step,nbLay,nbNodes,p_drop_input,p_drop_hidden
    check=nb_step/10
    
    params=[init_weights((size_input,nbNodes))]
    params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
    params.append(init_weights((nbNodes, size_output)))
    
    X = T.fmatrix()
    Y = T.fmatrix()

    noise_h, noise_py_x = model(X,params, p_drop_input, p_drop_hidden)
    h, py_x = model(X, params,0,0)
    y_x = T.argmax(py_x, axis=1)
    
    

    L1=T.sum([T.sum(abs(params[i])) for i in range(len(params))])
    print(L1.eval())
    L2 = T.sum([T.sum((params[i])**2) for i in range(len(params))])
    print(L2.eval())
    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))+0.00005*L1+0.005*L2
    
    updates = RMSprop(cost, params, lr=0.001)
    
    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    predictProb=theano.function(inputs=[X], outputs=py_x, allow_input_downcast=True)

    print(len(trX))
    i=0
    while i<nb_step:
        i+=1
        for start, end in zip(range(0, len(trX), 10), range(10, len(trX), 10)):
            cost = train(trX[start:end], trY[start:end])
        #cost=train(trX,trY)
        scoreTr=np.mean(np.argmax(trY, axis=1) == predict(trX))
        
        if i%1==0:#(nb_step/1)==0:
            result=predictProb(trX)
            argY=np.argmax(trY, axis=1)
            logTr=loss(result,argY)
            print i,nbLay,scoreTr,logTr, np.mean(np.argmax(teY, axis=1) == predict(teX))
            print cost,0.00005*L1.eval(),0.005*L2.eval()

params

2 3 100 0 0
236.443763208
2.9619058228
4283
1 3 0.461825823021 1.00038153878 0.481550677254
0.881840229289 0.00703385966472 0.0178950271077
2 3 0.738968013075 0.696749575213 0.732368052312
0.73103068153 0.0103915367519 0.0424203539006


[<TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>,
 <TensorType(float64, matrix)>]

In [23]:
params=[init_weights((size_input,nbNodes))]
params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
params.append(init_weights((nbNodes, size_output)))

In [34]:
L2_sqr = T.sum(L2_reg(np.array(params)))


In [36]:
L2_sqr.eval()

array(2.9730365943150203)

In [37]:
params=[init_weights((size_input,nbNodes))]
params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
params.append(init_weights((nbNodes, size_output)))

In [38]:
L2_sqr.eval()

array(2.9730365943150203)

In [39]:
L2_sqr = T.sum(L2_reg(np.array(params)))
L2_sqr.eval()

array(2.950055420821496)

In [51]:
(np.array(params)**2).sum()

Elemwise{add,no_inplace}.0

In [None]:
    # symbolic Theano variable that represents the L1 regularization term
    L1  = T.sum(abs(P))

    # symbolic Theano variable that represents the squared L2 term
    L2_sqr = T.sum(P ** 2)

In [27]:
L1  = T.sum(abs(params[0]))

In [28]:
L1

Sum{acc_dtype=float64}.0

In [29]:
L1.eval()

array(74.1269312188022)

In [31]:
params[0].eval()


array([[-0.0104779 , -0.01752108,  0.00152609, ..., -0.00392234,
        -0.01745957,  0.00917721],
       [-0.00469877,  0.00674319, -0.00766582, ..., -0.0167621 ,
         0.00526074,  0.00495152],
       [ 0.00108371,  0.00532334, -0.00675386, ...,  0.00297124,
        -0.00271686, -0.00603125],
       ..., 
       [ 0.01752274, -0.00239432,  0.0007575 , ...,  0.0155845 ,
         0.01754397, -0.00494991],
       [ 0.01123058, -0.0161085 , -0.00366557, ..., -0.0106872 ,
        -0.00701879,  0.00298605],
       [-0.00663227,  0.02798805, -0.02102916, ...,  0.00085213,
         0.00667791,  0.00051049]])

In [52]:
np.random.randn(3,2)

array([[ 0.8366137 , -0.21894838],
       [-0.39036628,  0.52636657],
       [ 0.7701492 ,  0.11877007]])

In [59]:
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

params=[init_weights((95,nbNodes))]
params.extend([init_weights((nbNodes,nbNodes)) for i in range(nbLay-1)])
params.append(init_weights((nbNodes, 600)))

L1=T.sum([T.sum(abs(params[i])) for i in range(len(params))])
print(L1.eval())
L2 = T.sum([T.sum((params[i])**2) for i in range(len(params))])
print(L2.eval())

714.479668685
8.93347761021
