In [None]:
# Fitting logistic regression from scratch - working on applying some of the covered formulas seen in lecture on this small example
import numpy as np

In [3]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])

In [None]:
# Sigmoid function to compute the probability given z
def sigmoid(z):
    return (1 / (1 + np.e ** (-1 * z)))

In [None]:
# Function to calculate current cost given the weight and bias
def cost(X, y, w, b):
    m = X.shape[0]
    total_cost = 0
    Z_vals = np.dot(X, w) + b
    for i in range(m):
        total_cost += (y[i] * np.log(sigmoid(Z_vals[i]))) + ((1 - y[i]) * np.log((1 - sigmoid(Z_vals[i]))))
    
    total_cost /= (-1 * m)
    return total_cost
    

In [None]:
# Setting initial w and b to see what the cost looks like
w_initial = np.zeros_like(X_train[0])
b = 0
print(f"Starting cost is: {cost(X_train, y_train, w_initial, b)}")

[0. 0. 0. 0. 0. 0.]
Starting cost is: 0.6931471805599453


For the output labels being balanced, this value of about 0.7 is roughly what to expect

In [62]:
# Vectorized gradient descent for logistic regression
def v_gradientDescent(X, y, learningRate, iterations):
    m = X.shape[0]
    w = np.zeros_like(X[0])
    b = 0
    dw = 0
    db = 0

    for i in range(iterations):
        z = np.dot(X, w) + b
        y_hat = sigmoid(z)
        error = y_hat - y

        dw = np.dot(error, X)
        db = np.sum(error)
        dw /= m
        db /= m

        w -= learningRate * dw
        b -= learningRate * db

        if i % 500 == 0:
            print(f"The current cost is: {cost(X_train, y_train, w, b)}")
    
    return w, b

In [63]:
w, bb = v_gradientDescent(X_train, y_train, 0.5, 3000)

The current cost is: 0.6602202061210146
The current cost is: 0.06799240790224992
The current cost is: 0.03427380297424198
The current cost is: 0.022828741719718947
The current cost is: 0.017097449006341425
The current cost is: 0.013661447591949634


As the dataset is small and numbers are relatively scaled to begin with, the relatively large learning rate converged well and the model is fit to the dataset.