In [341]:
import numpy as np
import theano
import theano.tensor as T
from theano import function
from theano.ifelse import ifelse
from scipy.stats import zscore

### Test translating from numpy --> Theano

In [337]:
class MyClass:
    def __init__(self, distr='poisson'):
        self.distr = distr
    
    #--------------
    # nonlinearity
    #--------------
    def qu(self, z):
        if(self.distr=='poisson'):
            eps = T.dscalar('eps')
            eps = 0.1
            q = T.log(1+eps+T.exp(z))
        elif(self.distr=='normal'):
            q = z
        elif(self.distr=='binomial'):
            q = T.exp(z)/(1+T.exp(z))
        return q
    
    #-----------------------
    # conditional intensity
    #-----------------------
    def lmb(self, beta0, beta, x):
        z = beta0 + T.dot(x,beta)
        l = self.qu(z)
        return l
    
    #-----------------
    # log likelihoods
    #-----------------
    def logL(self, beta0, beta, x, y):
        l = self.lmb(beta0, beta, x)
        if(self.distr=='poisson'):
            logL = T.sum(y*T.log(l) - l)
        elif(self.distr=='normal'):
            logL = -0.5*T.sum((y-l)**2)
        elif(self.distr=='binomial'):
            z = beta0 + T.dot(x,beta)
            logL = T.sum(y*z - T.log(1+T.exp(z)))
        return logL

    #---------
    # penalty
    #---------
    def penalty(self, alpha, beta):
        P = 0.5*(1-alpha)*T.sum(beta**2) + alpha*T.sum(T.abs_(beta))
        return P

    #---------------
    # loss function
    #---------------
    def loss(self, beta0, beta, alpha, reg_lambda, x, y):    
        L = self.logL(beta0, beta, x, y)
        P = self.penalty(alpha, beta)
        J = -L + reg_lambda*P
        return J

    #--------------------------------------
    # differentiable part of loss function
    #--------------------------------------
    def L2loss(self, beta0, beta, alpha, reg_lambda, x, y):
        L = self.logL(beta0, beta, x, y)
        P = 0.5*(1-alpha)*T.sum(beta**2)
        J = -L + reg_lambda*P
        return J

    #-------------------
    # proximal operator
    #-------------------
    def prox(self,x,l):
        sx = T.sgn(x) * (T.abs_(x) - l) * (T.abs_(x) > l)
        return sx

    #-----------
    # fit model
    #-----------
    def fit(self, x, y, reg_params, opt_params):
    # Implements batch gradient descent (i.e. vanilla gradient descent by computing gradient over entire training set)
        
        # Dataset shape
        n = x.shape[0]
        p = x.shape[1]
        
        # Initialize shared variable
        beta0 = theano.shared(np.random.randn(), name='beta0')
        beta = theano.shared(np.random.randn(p), name='beta')
        
        # Regularization parameters
        reg_lambda = reg_params['reg_lambda']
        alpha = reg_params['alpha']

        # Optimization parameters
        max_iter = opt_params['max_iter']
        e = opt_params['learning_rate']

        # Initialize parameters
        beta0_hat = np.random.randn()
        beta_hat = np.random.randn(p)
        fit = []

        # Outer loop with descending lambda
        for l,rl in enumerate(reg_lambda):
            fit.append({'beta0': 0., 'beta': np.zeros(p), 'L': 10., 'DL': 10.})
            len(fit)
            print('Lambda: {}\n').format(rl)

            # Warm initialize parameters
            if(l == 0):
                fit[-1]['beta0'] = beta0_hat
                fit[-1]['beta'] = beta_hat
            else:
                fit[-1]['beta0'] = fit[-2]['beta0']
                fit[-1]['beta'] = fit[-2]['beta']

            #---------------------------
            # Iterate until convergence
            #---------------------------
            no_convergence = 1
            convergence_threshold = 1e-3
            t = 0

            # Initialize parameters
            beta0.set_value(fit[-1]['beta0'])
            beta.set_value(fit[-1]['beta'])

            # Initialize loss
            L = []
            DL = []

            #Give formula for gradient
            L2loss = self.L2loss(beta0, beta, alpha, rl, x, y)
            grad_beta0, grad_beta = T.grad(L2loss, [beta0, beta])
                
            while(no_convergence and t < max_iter):
                # Update time step
                t = t+1
                
                # Update parameters
                beta0 = beta0 -e*grad_beta0
                beta = self.prox(beta -e*grad_beta, rl*alpha)

                # Calculate loss
                L.append(self.loss(beta0, beta, alpha, rl, x, y))
                print('    iter:{}, loss:{}'.format(t, L[-1]))
                # Delta loss and convergence criterion
                if t > 1:
                    DL.append(L[-1] - L[-2])
                    no_convergence = ifelse(T.lt(T.abs_())
                    
                    #if(np.abs(DL[-1]/L[-1]) < convergence_threshold):
                    #    no_convergence = 0
                    #    print('Converged')
                    #    print('    Loss function: {}').format(L[-1])
                    #    print('    dL/L: {}\n').format(DL[-1]/L[-1])

                #if t==99:
                #        no_convergence = 0
                #        print('Converged')
                        
            #Store the parameters after convergence
            print beta0.eval()
            fit[-1]['beta0'] = beta0.eval()
            fit[-1]['beta'] = beta.eval()

        return fit

    #-----------------------------
    # Define the predict function
    #-----------------------------
    def predict(self, x, fitparams):
        yhat = self.lmb(fitparams['beta0'], fitparams['beta'], zscore(x))
        return yhat


In [None]:
T.

In [338]:
mm = MyClass('poisson')

In [339]:
N = 1000
p = 100

x = np.random.randn(N,p)
beta = np.random.randn(p)
beta0 = np.random.randn()
y = np.random.poisson(mm.lmb(beta0, beta, x).eval())

In [340]:
# Set regularization parameters
reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
alpha = 0.1

fit_params = dict()
fit_params['reg_lambda'] = reg_lambda[-2:]
fit_params['alpha'] = alpha

# Set optimization parameters
opt_params = dict()
opt_params['learning_rate'] = 1e-4
opt_params['max_iter'] = 1000

# Fit model to training data
fit = mm.fit(zscore(x),y,fit_params,opt_params)


Lambda: 0.0154445210495

    iter:1, loss:4310.28965689
    iter:2, loss:4192.27711123
    iter:3, loss:4075.6015873
    iter:4, loss:3959.83978039
    iter:5, loss:3845.31912734
    iter:6, loss:3733.23665665
    iter:7, loss:3623.10008018
    iter:8, loss:3515.87762522
    iter:9, loss:3411.22580035
    iter:10, loss:3309.35706867
    iter:11, loss:3210.4256545
    iter:12, loss:3114.41531115
    iter:13, loss:3021.60904789
    iter:14, loss:2931.35894833
    iter:15, loss:2843.839223
    iter:16, loss:2758.56428024
    iter:17, loss:2675.38734385


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:18, loss:2594.55170854
    iter:19, loss:2515.53683654
    iter:20, loss:2438.24151255
    iter:21, loss:2362.61445301
    iter:22, loss:2288.78344351
    iter:23, loss:2216.39099458
    iter:24, loss:2145.60063094
    iter:25, loss:2076.06548037
    iter:26, loss:2007.65498874
    iter:27, loss:1940.4823149
    iter:28, loss:1874.08168382
    iter:29, loss:1808.28306943
    iter:30, loss:1742.80174357
    iter:31, loss:1677.35931761
    iter:32, loss:1611.6503691
    iter:33, loss:1545.777923
    iter:34, loss:1479.40685396


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:35, loss:1411.84162464
    iter:36, loss:1342.82994653
    iter:37, loss:1272.17392708
    iter:38, loss:1199.89944119
    iter:39, loss:1126.54969195
    iter:40, loss:1051.93209739
    iter:41, loss:976.41838035
    iter:42, loss:900.432283919
    iter:43, loss:824.394892584
    iter:44, loss:748.681621523
    iter:45, loss:673.597338771
    iter:46, loss:600.322072678


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:47, loss:528.223632269
    iter:48, loss:457.215938175
    iter:49, loss:387.595231499
    iter:50, loss:320.151903059
    iter:51, loss:254.884767844
    iter:52, loss:190.981942431
    iter:53, loss:128.511878633
    iter:54, loss:67.8475039217
    iter:55, loss:8.72536016944
    iter:56, loss:-48.8057142468


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:57, loss:-104.708934447
    iter:58, loss:-158.960595588
    iter:59, loss:-211.550511477
    iter:60, loss:-262.482035639
    iter:61, loss:-311.771835287
    iter:62, loss:-359.44953309
    iter:63, loss:-405.367419614
    iter:64, loss:-449.77939127


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:65, loss:-492.750614542
    iter:66, loss:-534.356297395
    iter:67, loss:-574.472533228
    iter:68, loss:-613.25686
    iter:69, loss:-650.97296782
    iter:70, loss:-687.710452211
    iter:71, loss:-723.431935102


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:72, loss:-758.392985314
    iter:73, loss:-792.694567623
    iter:74, loss:-826.430623096
    iter:75, loss:-859.684958428
    iter:76, loss:-892.274766287
    iter:77, loss:-924.511759337


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:78, loss:-956.428332876
    iter:79, loss:-988.031855053
    iter:80, loss:-1019.30105209
    iter:81, loss:-1050.18406908
    iter:82, loss:-1080.59962033
    iter:83, loss:-1110.44191476


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:84, loss:-1139.58888746
    iter:85, loss:-1167.91232003
    iter:86, loss:-1195.2096134
    iter:87, loss:-1221.4580179
    iter:88, loss:-1246.56859364


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:89, loss:-1270.47743047
    iter:90, loss:-1292.8998859
    iter:91, loss:-1314.0969458
    iter:92, loss:-1334.08217738
    iter:93, loss:-1352.88824078


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:94, loss:-1370.15821353
    iter:95, loss:-1386.37594911
    iter:96, loss:-1401.59879234
    iter:97, loss:-1415.88462989
    iter:98, loss:-1429.28851046


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:99, loss:-1441.85976946
    iter:100, loss:-1453.4552542
    iter:101, loss:-1464.29478384
    iter:102, loss:-1474.40111205


INFO (theano.gof.compilelock): Refreshing lock /home/pavan/.theano/compiledir_Linux-3.16--generic-x86_64-with-debian-jessie-sid-x86_64-2.7.11-64/lock_dir/lock


    iter:103, loss:-1483.62067707


KeyboardInterrupt: 

### Time numpy vs. theano fixed point operations

In [108]:
z = T.dvector('z')
q = T.log(1+0.1+T.exp(z))
qu = function([z], q)


In [109]:
z0 = np.random.randn(200000)
type(z0)

numpy.ndarray

In [110]:
%timeit qu(z0)

100 loops, best of 3: 13.5 ms per loop


In [111]:
def qu_np(z):
    eps = 0.1
    q = np.log(1+eps+np.exp(z))
    return q

In [112]:
%timeit qu_np(z0)

100 loops, best of 3: 12.4 ms per loop


In [99]:
class MyClass:
    # qu function in theano
    z = T.dvector('z')
    q = T.log(1+0.1+T.exp(z))
    qu = function([z], q)
    
    # lmb function in theano
    x = T.dmatrix('x')
    beta = T.dvector('beta')
    beta0 = T.dscalar('beta0')
    z = beta0 + T.dot(x,beta)
    l = qu(z)
    #l = T.log(1+0.1+T.exp(z))
    lmb = function([beta0, beta, x], l)