In [None]:
import numpy as np

import os, sys, plotly.graph_objects as go
import plotly.figure_factory as ff
module_path = os.path.abspath(os.path.join('../../../../../..'))
if module_path not in sys.path:
    sys.path.append(module_path) 

from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression
from sklearn import datasets 

from erudition.learning.helpers.plots.plotly_render import render, scatter

In [None]:
x,y = datasets.make_regression(n_features=1, n_samples=100, noise=25)

render(go.Figure(data=[scatter(x[:,0], y, 'Data', mode='markers')]), title='Random scatter')

# Hypothesis

Let's define our linear hypothesis to fit the feature space:

$$h_\theta(x) = \theta_0 + \theta_1.x$$

We then use RMSE to find the best possible fit and then define the cost function. The aim here is to minimize the cost function with regard to the theta paramaters

$$J(\theta) = \frac {1}{2n} \sum_{i=1}^n(h_\theta(x_i) - y_i)^2$$

As we vary the parameter values we will see an increase or decrease in the cost function. The goal here is to find the minimum of the cost function.

By taking the partial derivative of the cost function with regard to each parameter value we have the slope of the cost function curve at that point. As the cost function is convex then by walking back down the slope by a certain amount we will move towards the local minima.

$$\frac{\partial J(\theta_0, \theta_1)}{\partial \theta_0} = \frac{\partial}{\partial \theta_0}\frac {1}{2n} \sum_{i=1}^n(h_\theta(x_i) - y_i)^2 $$

Then, using the sum rule we can write:

$$\frac{\partial J(\theta_0, \theta_1)}{\partial \theta_0} = \frac {1}{2n} \sum_{i=1}^n\frac{\partial}{\partial \theta_0}(h_\theta(x_i) - y_i)^2 $$

Next we use the power rule to get:

$$\frac{\partial J(\theta_0, \theta_1)}{\partial \theta_0} = \frac {1}{2n} \sum_{i=1}^n2(h_\theta(x_i) - y_i)\frac{\partial}{\partial \theta_0}(h_\theta(x_i) - y_i)$$


And finally, differentiating with regard to $\theta_n$ gives:

$$\frac{\partial J(\theta_0, \theta_1)}{\partial \theta_0} = \frac {1}{n} \sum_{i=1}^n(h_\theta(x_i) - y_i)$$

and

$$\frac{\partial J(\theta_0, \theta_1)}{\partial \theta_1} = \frac {1}{n} \sum_{i=1}^n(h_\theta(x_i) - y_i).x_i$$





In [None]:
def cost_function(X, y, thetas):
    dt = np.dot(X,thetas) - y
    
    return np.dot(dt.T,dt)/(2*len(X))

In [None]:
def gradient_descent(X, y, thetas, alpha, iters=1000):
    
    for _ in range(iters):
        thetas -= alpha * np.dot(X.T, (np.dot(X, thetas) - y)) / 100
    return thetas


In [None]:
# first create a 100 x 2 matrix with 1s in the first column
X = np.ones((100,2))
X[:,1] = x[:,0]

# make y a 2d matrix
yr = y.reshape(100,1)

# set the initial values for the parameters
thetas = np.zeros((2,1))

# set the learning rate
alpha = 0.2

# set the number of iterations for the gradient descent algo
iters = 1000

In [None]:
theta_grad = gradient_descent(X, yr, thetas, alpha, iters=500)
theta_grad

In [None]:
x_range = np.arange(-2,2, 0.01)

render(
    go.Figure(
        data=[
            scatter(x[:,0], y, 'Data', mode='markers', opacity=1),
            scatter(x_range, 6.11 + 82.75*x_range, 'Data', mode='lines', color='pink', opacity=1)
        ]), 
    title='Actually From Scratch'
)