In [1]:
from matplotlib import pyplot as plt
import numpy as np    

def abline(slope, intercept):
    """Plot a line from slope and intercept"""
    axes = plt.gca()
    
    x_vals = np.array(axes.get_xlim())
    y_vals = intercept + slope * x_vals
    plt.plot(x_vals, y_vals, '--')

In [2]:
# predicted y for all elements of dataset = X*theta
# X - rows are data points
# theta - column feature vector

def sigmoid(x):
    return 1/(1+np.exp(-x))


def hypothesis(X,theta):
    return sigmoid(np.dot(X,theta))

In [3]:
# initialise example
data_x = np.array([[0,1,2,4,5,6],[2,1,0,2,1,0]])
data_x = np.transpose(data_x)
data_y = np.array([0,0,0,1,1,1])

# prepare X by appending an extra column of 1s to the begining
m = len(data_x)
x_0 = np.ones(m)
X = np.column_stack((x_0,data_x))
theta = np.array([0,0,0])

In [4]:
X

array([[1., 0., 2.],
       [1., 1., 1.],
       [1., 2., 0.],
       [1., 4., 2.],
       [1., 5., 1.],
       [1., 6., 0.]])

In [5]:
np.log(hypothesis(X,theta))

array([-0.69314718, -0.69314718, -0.69314718, -0.69314718, -0.69314718,
       -0.69314718])

In [51]:
def costFunction(X,theta,y):
    h = hypothesis(X,theta)
    return -np.dot(np.log(h),y)+np.dot(np.log(1-h),(1-y))/len(X)

In [52]:
# gradients for each parameter writen as 
#    v = hypothesis(X,theta) - y :- results in a column vector with length equal to the number of data points
#    gradient(theta) = (X' * v) / m
def gradient(X,theta,y):
    v = hypothesis(X,theta)-y
    return (np.dot(np.transpose(X),v))/(len(X))

In [53]:
gradient(X,theta,data_y)

array([ 0.0010855 , -0.00027063, -0.00027276])

In [60]:
import copy

# learning rate
alpha = 0.5

# Stopping threshold
delta = 10**-6

# initialised prev_cost
prev_cost = 1000000

while True:
    
    # stopping variable
    endLoop = True
    
    # copy the previous theta vector
    theta_prev = copy.deepcopy(theta)
    
    # call the cost function
    cost = costFunction(X,theta,data_y)
    gradients = gradient(X,theta,data_y)
    # ensure that we are moving in the right direction
    if prev_cost<cost:
        raise ValueError
    
    # reassign prev_cost to current cost
    prev_cost = cost

    theta = theta - alpha*gradients
    
    # check if the algorithm can terminate and terminate
    if np.all(abs(theta - theta_prev)<delta):
        break


print(theta)

[-23.97719519   6.35237439   5.39783058]


In [63]:
costFunction(X,theta,data_y)

4.249996745633031e-06

In [59]:
hypothesis(X,np.array([np.inf,1,1]))

array([0.11920292, 0.11920292, 0.11920292, 0.88079708, 0.88079708,
       0.88079708])