In [1]:
import numpy as np
from numpy.typing import NDArray
import pandas as pd
import matplotlib.pyplot as plt
from numba import jit

In [3]:
# MODEL: y = wx + b
# PARAMETERS: w, b
# MODEL: f()
# COST FUNCTION: j()

# w and b are called collectively called as coefficients or separately as weights and biases.
# we need to find the best possible values for w and b, such that our estimates end upas close as possible to the real targets.
# what the cost function does is that it measures the difference between the model's predictions and the true target values.

# ___$j(w, b) = \frac{1}{2N}\sum_{i = 0}^{N}(f(x_i) - y_i)^2$___

In [4]:
# f(x_i) = is the prediction for the ith record (y_hat_i)
# f(x_i) - y_i = gives the difference between the prediction and the actual y value.
# (f(x_i) - y_i)^2 gives us the squared error, which will cancel out signs

# for this reason, this cost function is also called the squared error cost function.
# there are a plethora of cost functions available to use with linear regression models but squared error cost functions are by far the most common
# type of cost functions used in linera regression.

# cost function gives us the half of the average of squares of such differences across all N records.
# our goal is to make j(w, b) as small as possible, i.e bring the predictions and targets as close as possible :)

In [4]:
beans = pd.read_csv(r"../anoban/Dry_Bean_Dataset.csv")

In [5]:
# feature and target

x = beans.MajorAxisLength.to_numpy()
y = beans.Area.to_numpy()

In [6]:
# let's think of an oversimplified version of the model, 
# say that we do not have an intercept (bias) term in the model and the model could simply be defined in the form of x = mx
# now we only need to find the gradient (slope or weigth)

In [35]:
# let's handroll a linear regression model

@jit(nopython = True, parallel = False, fastmath = True)
def gradient_descent(predictions: NDArray[np.float64], targets: NDArray[np.float64]) -> np.float64:
    """
    computes the average of squared differences between the actual targets and predictions.
    and returns the half of the mean squared difference.
    """
    assertpredictions.size == targets.size,
    f"Both predictions and targets must have the same size! but received targets::{targets.size:10,d}, predictions::{predictions.size:10,d}"
    return np.square(predictions - targets).sum() / (2 * targets.size)


@jit(nopython = True, parallel = False, fastmath = True)
def linear_regression(predictor: NDArray[np.float64], targets: NDArray[np.float64], learning_rate: float = 0.1, epsilon: float = 10.0) -> np.float64:
    """
    """

    error_margin: float = 100.0000
    slope: float = (targets - predictor).mean()

    while(error_margin >= epsilon):
        error_margin = np.square((slope * predictor) - targets).sum() / (2 * predictor.size)
        slope += learning_rate
    return slope


In [None]:
# for our bias less model,
# y_hat = f(x) = wx
# as we do not need a bias term, we only have one parameter - w

# ___$j(w) = \frac{1}{2N}\sum_{i=0}^{N}(f(x_i) - y_i)^2$___

In [None]:
# which becomes

# ___$j(w) = \frac{1}{2N}\sum_{i=0}^{N}(w \cdot x_i - y_i)^2$___