In [None]:
'''
This notebook compares BFGS optimization using scipy compared to sklearn package.
We get the same results.
'''

In [7]:
import numpy as np
import sys
import timeit
import pandas as pd
import scipy as sp
import scipy.optimize
from sklearn import cross_validation, metrics, linear_model

In [15]:
data = pd.read_table('./data/Adrenal_Gland_1.5.txt', delimiter='\t', dtype=str).values

In [16]:
label, e, g = data[:,0:2], data[:,2].astype(float), data[:,3:].astype(float)
g = np.concatenate((np.ones((len(g), 1)), g), axis=1)
vBeta = np.zeros(len(g[0]))

In [17]:
# cost function
def logLikelihoodLogitVerbose(vBeta, mX, vY):
    #import pdb; pdb.set_trace()
    return(-(np.sum(vY*(np.dot(mX, vBeta) - np.log((1.0 + np.exp(np.dot(mX, vBeta))))) + (1-vY)*(-np.log((1.0 + np.exp(np.dot(mX, vBeta)))))))) + .5 * 1 * (vBeta).dot((vBeta))

# gradient
def likelihoodScore(vBeta, mX, vY):
    return (np.dot(mX.T, (logit(mX, vBeta) - vY))) + 1 * (vBeta)

# logistic transformation
def logit(mX, vBeta):
    return((np.exp(np.dot(mX, vBeta))/(1.0 + np.exp(np.dot(mX, vBeta)))))

def hessian(vBeta, mX, vY):
    return (np.dot(np.dot(mX, mX.T), logit(mX, vBeta)))*(1 - logit(mX, vBeta))

In [18]:
optimLogit2 = scipy.optimize.fmin_bfgs(logLikelihoodLogitVerbose, x0 = vBeta, fprime = likelihoodScore, args = (g, e), gtol = 1e-3)

Optimization terminated successfully.
         Current function value: 258634.265528
         Iterations: 33
         Function evaluations: 55
         Gradient evaluations: 55


In [19]:
optimLogit2

array([ -1.98314002e+00,   6.99299926e-03,   4.51243143e-03,
         3.47479161e-04,  -3.30699330e-02,  -1.31112290e-02,
        -9.07980634e-03,   4.32768908e-03,   5.15298043e-02,
         1.49497331e-02,   1.21935400e-02,  -3.15637468e-03,
         3.59416750e-03,   1.65979582e-03,   1.53012206e-04,
         4.23161809e-03,  -3.55467217e-03,   1.28982748e-02,
         1.80667489e-03,  -1.26059371e-03,   1.02337669e-03,
         4.41668379e-03])

In [22]:
l = sklearn.linear_model.LogisticRegression(penalty='l2', C = 1000, tol=.0001, fit_intercept=False)
l.fit(g, e)

LogisticRegression(C=1000, class_weight=None, dual=False, fit_intercept=False,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [23]:
l.coef_

array([[ -1.98316831e+00,   6.99432345e-03,   4.51613356e-03,
          3.46184547e-04,  -3.30760905e-02,  -1.31107447e-02,
         -9.08353036e-03,   4.32865791e-03,   5.15343319e-02,
          1.49490909e-02,   1.22050212e-02,  -3.15271138e-03,
          3.57927622e-03,   1.67033138e-03,   1.45697545e-04,
          4.24224710e-03,  -3.56664032e-03,   1.28971170e-02,
          1.80343684e-03,  -1.26125757e-03,   1.02405507e-03,
          4.41623951e-03]])