In [95]:
import math
import numpy as np
import matplotlib.pyplot as plt

In [96]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [97]:
def linear_regression_model(X_set, W_set, b):
    return np.dot(W_set, X_set) + b

In [121]:
def get_errors(X_matrix, y_set, W_set, b):
    errors = np.array([])

    for index, training_example in enumerate(X_matrix):
        target = y_set[index]
        prediction = linear_regression_model(training_example, W_set, b)
        errors = np.append(errors, np.subtract(prediction, target))

    return errors

: 

In [43]:
def calculate_cost(X_matrix, y_set, W_set, b):
    m = X_matrix.shape[0]

    errors = get_errors(X_matrix, y_set, W_set, b)
    squared_errors = np.square(errors)
    total_cost = np.sum(squared_errors)
    normalized_cost = total_cost / (2 * m)

    return normalized_cost

In [66]:
def calculate_gradient(X_matrix, y_set, W_set, b):
    m = X_matrix.shape[0]

    errors = get_errors(X_matrix, y_set, W_set, b)
    dj_dW = errors @ X_matrix / m
    dj_db = np.sum(errors) / m

    return dj_dW, dj_db

In [110]:
test_W = np.ones((4, ))
test_b = 3

print(get_errors(X_train, y_train, test_W, test_b))
print(calculate_cost(X_train, y_train, test_W, test_b))

[1698. 1232.  715.]
818708.8333333334


In [111]:
print(calculate_gradient(X_train, y_train, test_W, test_b))

(array([1.97542800e+06, 4.53866667e+03, 1.62566667e+03, 5.02383333e+04]), 1215.0)


In [119]:
def gradient_descent(X_matrix, y_set, W_set, b, alpha):
    n = X_matrix.shape[1]

    threshold_value = 1e-02
    threshold_array = np.full((n, ), threshold_value)
    
    num_iters = 0
    iter_progress = []
    cost_progress = []

    dj_dW, dj_db = calculate_gradient(X_matrix, y_set, W_set, b)

    while (threshold_array < np.abs(dj_dW)).all() or threshold_value < abs(dj_db):
        if num_iters % 100000 == 0:
            iter_progress.append(num_iters)
            cost_progress.append(calculate_cost(X_matrix, y_set, W_set, b))
            print(f"Iteration: {num_iters}  W_set: {W_set}  b: {b}  Cost: {cost_progress[-1]}  dj_dW: {dj_dW}  dj_db: {dj_db}")

        dj_dW, dj_db = calculate_gradient(X_matrix, y_set, W_set, b)

        W_set = W_set - alpha * dj_dW
        b = b - alpha * dj_db

        num_iters += 1

    return W_set, b, iter_progress, cost_progress, num_iters

In [120]:
initial_W = np.array([0.15443468, 23.4629676, -65.66840253, 1.82612959])
initial_b = 1.239884067710518

learning_rate = 0.0000001
gradient_descent(X_train, y_train, initial_W, initial_b, learning_rate)

(array([  0.15443468,  23.4629676 , -65.66840253,   1.82612959]),
 1.239884067710518,
 [],
 0)

In [118]:
check_dj_dW = [7.42220891e-06, -1.94809402e-03, 5.44821455e-03, -2.87624082e-04]
check_dj_db = -0.00010778756040963344

threshold_value = 1e-02
threshold_array = np.full((4, ), threshold_value)
print(threshold_array, np.abs(check_dj_dW))
print((threshold_array < np.abs(check_dj_dW)).all(), threshold_value < abs(check_dj_db))
print((threshold_array < np.abs(check_dj_dW)).all() or threshold_value < abs(check_dj_db))

[0.01 0.01 0.01 0.01] [7.42220891e-06 1.94809402e-03 5.44821455e-03 2.87624082e-04]
False False
False


In [None]:
def learning_curve(cost_progress, iter_progress):
    plt.plot(cost_progress, iter_progress, marker = None, color = "r")

def mutiple_learning_curves(X_matrix, y_set, W_set, b, alpha_in, alpha_fin):
    learning_curve = alpha_in
    num_iters = 0
    while learning_curve <= alpha_fin:
        _, _, iter_progress, cost_progress, _ = gradient_descent(X_matrix, y_set, W_set, b, learning_curve)

        plt.plot()

        if num_iters % 2 == 0:
            learning_curve *= 3
        else:
            learning_curve *= 10/3
