In [1]:
import numpy as np
from sklearn import datasets
from scipy.optimize import fmin_cg
from sklearn.linear_model import LogisticRegression
import time
import pandas as pd
import random

## Loading Data for Logistic Regression

In [2]:
#dataset with a binary Y
data = datasets.load_breast_cancer()
X = data.data
Y = data.target
data.data.shape


(569L, 30L)

## Regularized Logistic Regression

In [3]:
def rlr(Lambda, X, Y):
    '''Regularized Logistic Regression
            Parameters: Lambda - postive float
                        X      - matrix, data
                        Y      - vector, binary
            Returns:    w_hat  - vector, coefficient 
                                of the features in the 
                                decision function
    '''
    N, D = X.shape
    
    def sigm(w, x):
        '''Sigmoid function
            Parameters: w      - vector, weights
                        x      - vector, data
            Returns:    1/(1+e^(w^Tx))
        '''
        return 1./(1 + np.exp(-x.dot(w)))

    def F(w):
        '''Negative Log Likelihood Function
            Parameters: w      - vector, initial guess
            Returns:    w_hat  - vector, coefficient of 
                                the features in the 
                                decision functioon
        '''
        return np.sum(Y *np.log(1+np.exp(-X.dot(w))) + (1-Y)* np.log(np.exp(X.dot(w))+1))+Lambda * np.transpose(w[1:]).dot(w[1:])

    #Using Newton's Method to find w_hat
    w_hat = fmin_cg(F, np.zeros(D), disp = False)
    
    return w_hat

## Logistic Regression

In [4]:
def logistic_reg(Lambda, X, Y):
    '''Logistic Regression

        Parameters: Lambda - postive float
                    X      - matrix, data
                    Y      - vector, binary
        Returns:    w_hat  - vector, coefficient 
                            of the features in the 
                            decision function
    '''
    N, D = X.shape

    def sigm(w, x):
        '''Sigmoid function
            Parameters: w      - vector, weights
                        x      - vector, data
            Returns:    1/(1+e^(w^Tx))
        '''
        return 1./(1 + np.exp(-x.dot(w)))

    def F(w):
        '''Negative Log Likelihood Function
            Parameters: w      - vector, initial guess
            Returns:    w_hat  - vector, coefficient of 
                                the features in the 
                                decision functioon
        '''
        return np.sum(Y *np.log(1+np.exp(-X.dot(w))) + (1-Y)* np.log(np.exp(X.dot(w))+1))

    #Using Newton's Method to find w_hat
    w_hat = fmin_cg(F, np.zeros(D), disp = False)
    
    return w_hat

#### Predict Function for Logistic Regression

In [65]:
def predict(x, w):
    '''Predict function for logistic regression (a.k.a. bias)
        Parameters: x - vector, data
                    w - vector, w_hat, or coefficients of the 
                        features of the decision function
        Returns:    k - vector of true and false values
    '''
    def sigm(w, x):
        '''Sigmoid function
            Parameters: w      - vector, weights
                        x      - vector, data
            Returns:    1/(1+e^(w^Tx))
        '''
        return 1./(1 + np.exp(-x.dot(w)))
    k = sigm(w, x)
    return k >= .5

## Logistic Regression Timing

In [66]:
#Timing Implemented Logistic Regression
begin1 = time.time()
predict(X, logistic_reg(Lambda, X, Y))
end1 = time.time()
diff1 = end1 - begin1
print "Time it take to fit and predict bias for Logistic Regression Solver is \t\t\t" + str(diff1)

#Timing Sklearn Logistic Regression
begin = time.time()
model = LogisticRegression(solver = 'lbfgs', C = 1e15, tol=1e-6)
coef = model.fit(X, Y)
what = coef.coef_
bias = coef.intercept_
end = time.time()
diff = end- begin
print "Time it take to fit and predict bias for Scikit learn Logisitic Regression Solver is \t" + str(diff)



Time it take to fit and predict bias for Logistic Regression Solver is 			0.509999990463
Time it take to fit and predict bias for Scikit learn Logisitic Regression Solver is 	0.0360000133514


## Regularized Logistic Regression Timing

In [67]:
#Timing Implemented Regularized Logistic Regression
begin1 = time.time()
for Lambda in xrange(-15, 16):
    w_hat1 = rlr(Lambda, X, Y)
    print "When lambda is 10e" + str(Lambda) + " then w_hat is " + str(w_hat1)
end1 = time.time()
diff1 = end1 - begin1
print "Time it takes to fit for Regularized Logistic Regression is " + str(diff1)

#Timing Implemented Logistic Regression
begin2 = time.time()
w_hat2 = logistic_reg(0, X, Y)
print "When there is no regularization term, then w_hat is  " + str(w_hat2)
end2 = time.time()
diff2 = end2- begin2
print "Time it takes to fit for Logistic Regresstion is " + str(diff2)

#Timing Sklearn Regularized Logistic Regression
begin = time.time()
for Lambda in xrange(-15, 16):
    lam = 2**Lambda
    model = LogisticRegression(solver = 'lbfgs', C = 1./(2*lam), tol=1e-6)
    coef = model.fit(X, Y)
    what = coef.coef_
end = time.time()
diff = end- begin
print "Time it take to fit and predict bias for Scikit learn Logisitic Regression Solver is \t" + str(diff)


When lambda is 10e-15 then w_hat is [  5.92174763e+00   1.69506705e+01   3.18917434e+01  -3.94051434e+02
   1.04580611e-01  -2.04454136e-02  -1.64729501e-01  -8.84521047e-02
   1.98774344e-01   8.54893915e-02  -2.58724903e-01   1.67105920e+00
  -1.89139222e+00  -7.34051296e+01   1.05659749e-02   7.56194277e-03
   3.86702884e-03   3.04734582e-03   2.80897806e-02   4.07896670e-03
   2.76398397e+00   2.03503617e+01   1.01833415e+01  -9.52426276e+02
   1.29822802e-01  -1.33046680e-01  -3.37845615e-01  -1.12588799e-01
   2.60541015e-01   8.36311774e-02]
When lambda is 10e-14 then w_hat is [  5.92174763e+00   1.69506705e+01   3.18917434e+01  -3.94051434e+02
   1.04580611e-01  -2.04454136e-02  -1.64729501e-01  -8.84521047e-02
   1.98774344e-01   8.54893915e-02  -2.58724903e-01   1.67105920e+00
  -1.89139222e+00  -7.34051296e+01   1.05659749e-02   7.56194277e-03
   3.86702884e-03   3.04734582e-03   2.80897806e-02   4.07896670e-03
   2.76398397e+00   2.03503617e+01   1.01833415e+01  -9.52426276




Time it takes to fit for Regularized Logistic Regression is 18.2969999313
When there is no regularization term, then w_hat is  [  5.92174763e+00   1.69506705e+01   3.18917434e+01  -3.94051434e+02
   1.04580611e-01  -2.04454136e-02  -1.64729501e-01  -8.84521047e-02
   1.98774344e-01   8.54893915e-02  -2.58724903e-01   1.67105920e+00
  -1.89139222e+00  -7.34051296e+01   1.05659749e-02   7.56194277e-03
   3.86702884e-03   3.04734582e-03   2.80897806e-02   4.07896670e-03
   2.76398397e+00   2.03503617e+01   1.01833415e+01  -9.52426276e+02
   1.29822802e-01  -1.33046680e-01  -3.37845615e-01  -1.12588799e-01
   2.60541015e-01   8.36311774e-02]
Time it takes to fit for Logistic Regresstion is 0.542000055313
Time it take to fit and predict bias for Scikit learn Logisitic Regression Solver is 	0.802999973297




## Multiclass Logistic Regression

In [46]:
#Loading data for Multiclass Logistic Regression
data = datasets.load_digits()
X = data.data
Y = data.target

#Using Sklearn Multiclass Logistic Regression
for Lambda in xrange(-15, 16):
    lam = 2**Lambda
    model = LogisticRegression(solver='lbfgs', multi_class='multinomial', C = 1./(2*lam), tol=1e-6)
    coef = model.fit(X, Y)
    w_hat0 = coef.coef_
