In [0]:
# For a sample array
import pandas as pd
import numpy as np

In [0]:
# Sigmoid function is hypothesis in logistic regression
#     h(x) = g(thetaT.x) where g(z) = 1/1+e^-z and 0 <= h(x) <= 1 


# Can use SGD to minimize loss(cost function)
# cost = -log(h(x))     if y=1
#      = -log(1- h(x))   if y=0
# or cost = -ylog(h(x)) - (1-y)log(1- h(x))
# ^ Implying if the actual class is 1 and the model predicts 0, we should penalize it and vice-versa

# If the weighted sum of inputs > 0, the predicted class is 1 else 0 - binomial Logistic Regression



In [0]:
def sigmoid(X, coeff):
    c = np.dot(X, coeff.T)
    cvec = np.multiply(X, coeff.T)
#     print("cvec: {}, cvec.shape: {}, X.shape: {}".format(cvec, cvec.shape, X.shape))
    return 1.0/(1 + np.exp(-cvec)) 

def cost_function(X, y, coeff):
#  as per the cost func defined in comments
    hx = sigmoid(coeff, X)
    c1 = y * np.log(hx) 
    c2 = (1 - y) * np.log(1 - hx) 
    final = -c1 - c2 
    me = np.mean(final)
    return me

def gradient(X, y, coeff):
    m = X.shape[0]
    mult = np.multiply(sigmoid(X, coeff) - y, X.T)
    return (1 / m) * mult
    
def gradient_descent(X, y, coeff, learning_rate = 0.01, min_cost_change= 0.001):
    cost = cost_function(X, y, coeff) 
    cost_change = learning_rate
    epoch = 1
      
    while(cost_change > min_cost_change): 
        prev_cost = cost 
        coeff = coeff - (learning_rate * gradient(X, y, coeff)) 
        cost = cost_function(X, y, coeff) 
        cost_change = prev_cost - cost 
        epoch += 1
      
    return coeff, epoch 

def predict_y(X, coeff): 
    pred_prob = sigmoid(coeff, X) 
    pred_value = np.where(pred_prob >= 0.5, 1, 0) 
    return pred_value

if __name__ == "__main__": 

    X = np.array([0.50	,0.75	,1.00	,1.25	,1.50	,1.75	,2.00	,2.25	,2.50	,2.75	,3.00	,3.25	,3.50	,3.75	,4.00	,4.25	,4.50	,4.75	,5.00	,5.50])
    y = np.array([0	,0	,0	,0	,0	,0	,1	,0	,1	,0	,1	,0	,1	,0	,1	,1	,1	,1	,1	,1])
    # initial values 
    coeff = np.zeros(X.shape[1])

    coeff, num_epochs = gradient_descent(X, y, coeff) 
  
    print("Estimated regression coefficients:", coeff) 
    print("No. of iterations:", num_epochs) 
  
    y_pred = predict_y(X, coeff) 
      
    # number of correctly predicted labels 
    print("Total labels: ", y.size)
    print("Correctly predicted labels:", np.sum(y == y_pred)) 