# Collection of Gradient Descent Methods

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import math

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import os, sys, plotly.graph_objects as go
module_path = os.path.abspath(os.path.join('../../../../..'))
if module_path not in sys.path:
    sys.path.append(module_path) 
from erudition.learning.helpers.plots.plotly_render import render, scatter, bar, shape_rect

# Hand crafted linear regression

In [None]:
X = np.array([1,1.2, 4,3.8,4,5])
y = np.array([3,2,3,3.5,5,7])

diabetes_X[:,0].shape, diabetes_y.shape

render(
    go.Figure(data=[scatter(X, y, 'Data', mode='markers', color='yellow', opacity=0.9, size=5)]),
    title='Scatter',
    x_axis_title='X',
    y_axis_title='y'

)

In [None]:
theta_1x = np.mean(y/X.T)

Xr = X.reshape(6,1)
yr = y.reshape(6,1)
theta_1=np.linalg.inv(Xr.T.dot(Xr)).dot(Xr.T.dot(yr))

In [None]:
model = LinearRegression()
model.fit(Xr, y)

model.coef_, model.intercept_

In [None]:
theta_0 = 0

x_range = np.arange(0,6, 0.1)

render(go.Figure(
    data=[
        scatter(X, y, 'Data', mode='markers', color='yellow', opacity=0.9, size=5),
        scatter(x_range,x_range*theta_1x, 'Guess', mode='lines', size=5, color='yellow'),
        scatter(x_range,x_range*theta_1[0], 'Gaussian', mode='lines', size=5, color='orange'),
        scatter(x_range,x_range*model.coef_[0] + model.intercept_, 'LinearRegression', mode='lines', size=5, color='pink')
    ]
), 'Data Plot', x_axis_title='X', y_axis_title='y')


# Cost function

The cost function is defined as:
    
$$J(\theta) = \frac {1}{2n} \sum_{i=1}^n(h_\theta(x_i) - y_i)^2$$

Which can be written in matrix form as:

$$J(\theta) = \frac {1}{2n}(X\theta-y)^T(X\theta-y)$$

which has the following closed-form solution

$$J(\theta) = (X^TX)^{-1}X^Ty$$

We can also look for a function that minimises the cost function and we do this by using a technique called gradient descent. The key idea behind gradient descent is that we take the partial derivative of the theta values to find the slope of the cost function (assumed to be convex) and Lipschitz continuous (https://en.wikipedia.org/wiki/Lipschitz_continuity).

Let's plot some values of $\theta$ to show the form of the cost function.

In [None]:
theta_0 = np.arange(-10, 10, 1)
theta_1 = np.arange(-10, 10, 1)

for t0, t1 in zip(theta_0, theta_1):
    print(t0, t1)

In [None]:
def cost_function(thetas, n):
    
    j = 1/n * 

In [None]:
theta = np.array((1,1))
theta.T.shape

# Stochastic Gradient Descent

$$J(\mathbf w) = \frac {1}{2} \sum_{i=1}^N \left( y^{(i)}-\phi (\mathbf w^T \mathbf x)^{(i)} \right )^2$$

In [None]:
X, y = make_regression(n_features=7, random_state=42)

# = StandardScaler().fit_transform(X)
render(go.Figure(data=[scatter(X_scaled[:,0],y, 'data', mode='markers', size=5, color='yellow')]), 'Data Plot', x_axis_title='X', y_axis_title='y')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

render(go.Figure(
           data=[
           scatter(X_train[:,0],y_train ,'data', mode='markers', size=5, color='yellow', opacity=1)
           ]), 'Data Plot', x_axis_title='X', y_axis_title='y')

reg = SGDRegressor()
fit=reg.fit(X_train[:,0].reshape(75,1),y_train)

In [None]:
y_pred = reg.predict(X_test[:,0].reshape(25,1))

In [None]:
x_pred = np.arange(-3, 3, 0.24)

In [None]:
y_pred

In [None]:
6/25

In [None]:
x_pred

In [None]:
render(go.Figure(
           data=[
               scatter(X_train[:,0],y_train ,'data', mode='markers', size=5, color='yellow', opacity=1),
               scatter(x_pred.T , y_pred, 'fit', mode='lines', size=5, color='pink', opacity=1)
           ]), 'Data Plot', x_axis_title='X', y_axis_title='y')

reg = SGDRegressor()
fit=reg.fit(X_train,y_train)

In [None]:
X

In [None]:
render(go.Figure(
           data=[
               scatter(X,y ,'data', mode='markers', size=5, color='yellow', opacity=1)
           ]), 'Data Plot', x_axis_title='X', y_axis_title='y')

reg = SGDRegressor()
fit=reg.fit(X_train,y_train)