In [4]:
import numpy as np
import pandas as pd


In [5]:
iris = datasets.load_iris()

In [6]:
def sigmoid(x):
    """
    Inputs
        X: Any real value function
    Outputs
        A number between 0 and 1
    """
    return 1 / (1 + np.exp(-1 * x))

In [179]:
def getLoss(X, y, w):
    """
    Compute loss
    """
    N = X.shape[0]
    
    #make predictions
    y_hat = sigmoid(np.dot(X, w))
    Loss =  (1.0 / N) *  (np.dot(y, -1 * np.log(y_hat)) + np.dot(1 - y, -1 * np.log(1 - y_hat)))
    return Loss
    

In [182]:
def getGrad(X, y, w):
    """
    Compute gradient
    """
    N = X.shape[0]
    
    grad = np.zeros(w.shape[0])
    
    delta = y - sigmoid(np.dot(X, w))
    
    #sum up the difference between y and y_hat accross all rows for each parameter
    for i in range(grad.size):
        grad[i] = (-1.0 / N) * np.dot(delta.T, X[:, i])
    
    return grad
    

In [183]:
def fit(X, y, lr, e):
    """
    Compute optimal parameters for Logistic Regression using gradient descent
    
    Inputs
        X: Numpy array of numeric or dummy coded predictor columns
        y: Binary outcome column
    Output
        w = weight vector of parameters 
    """
    #initialize global variables
    update_norm = np.inf
    w = np.random.random(X.shape[1])
    epoch = 0
    
    while update_norm > e:
        
        grad = getGrad(X, y, w)
        update =  -1 * lr * grad
        print "update: ", update
        w += update
        epoch += 1
        update_norm = np.linalg.norm(update)
        print "norm: ", update_norm
        print "loss: ", getLoss(X,y, w)
        print "----"
        if epoch > 1000:
            break
    
    print "Parameters were optimized after", epoch, "epochs!"

In [72]:
getGrad(X, y, w)

(150,)
(150,)


array([ 0.54290304, -0.13896844,  1.37016705,  0.59597108])

In [43]:
sigmoid(np.dot(X, w)).shape

(150,)

In [82]:
a = np.empty((4))
a[:] = np.inf

In [83]:
a

array([ inf,  inf,  inf,  inf])

In [174]:
X = iris.values()[1]
y = iris.target.reshape(-1, 1)

XY = pd.DataFrame(np.hstack((X,y)), columns = range(5))
XY = XY.loc[XY[4] != 2, :]
X = XY.drop(4, axis = 1).values
y = XY[4].values

In [193]:
fit(X, y, 1, 0.01)

update:  [-2.47855994 -1.69313589 -0.72383271 -0.12045088]
norm:  3.09005027582
loss:  7.69179499184
----
update:  [ 2.96799678  1.38499811  2.12999856  0.66299964]
norm:  3.96278778693
loss:  4.18771298738
----
update:  [-2.50231278 -1.70853526 -0.73180351 -0.12196962]
norm:  3.11946709783
loss:  3.42158567038
----
update:  [ 2.9640013   1.38312591  2.12705867  0.66208196]
norm:  3.95740724654
loss:  5.8742788082
----
update:  [-2.50297066 -1.70897987 -0.73199186 -0.12199876]
norm:  3.12028366194
loss:  0.120767725317
----
update:  [ 0.57898024  0.27135262  0.39809349  0.12353464]
norm:  0.763275572375
loss:  0.0247004300016
----
update:  [-0.1109525  -0.07453124 -0.03351009 -0.00546744]
norm:  0.137906425907
loss:  0.0120887647578
----
update:  [-0.04341851 -0.03032819 -0.0108301  -0.00111283]
norm:  0.054069358724
loss:  0.00976144855513
----
update:  [-0.02607735 -0.01942321 -0.00424076  0.00033914]
norm:  0.0327931154215
loss:  0.00886919460061
----
update:  [-0.01696463 -0.013857

In [137]:
(1.0 / 100) *  (np.dot(y, -1 * np.log(y_hat)) + np.dot(1 - y, -1 *np.log(1 - y_hat)))

1.0356966726655659

In [180]:
getLoss(X, y, w)

2.64517076490896

# testing class

In [1]:
from npLogistic import BinaryLogistic
from sklearn import datasets
import numpy as np

In [2]:
iris = datasets.load_iris()

X = iris.values()[1]
y = np.random.randint(0, 2, size = X.shape[0])

In [3]:
log = BinaryLogistic()

In [4]:
log.fit(X, y)

Parameters were optimized after 1001 epochs!


In [20]:
proba = log.predict_proba(X)
mask = proba >= 0.5

mask.size

150

In [19]:
np.where(mask, np.ones(proba.shape[0]), np.zeros(proba.shape[0]))

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.])