In [6]:
import numpy as np
import matplotlib.pyplot as plt

# Gradient descent

## Analytical gradient descent

In [7]:
def gradient_descent(x0, f, g, learning_rate=0.05, T=1000):
    """
    X: array of values
    f: function to apply
    g: gradient of the function
    learning_rate: Learning rate for each step
    T: number of iteration
    """
    x = x0
    t = 0
    while t <= T:
        x -= learning_rate * g(x)
        t +=1
    return x

In [8]:
def test_function(x):
    return 3*(x**2) + 4*x + 1

In [9]:
def test_function_gradient(x):
    return 6*x + 4

In [10]:
X = np.random.rand(10)

In [11]:
gradient_descent(10, test_function, test_function_gradient)

-0.6666666666666665

In [12]:
X

array([0.36961263, 0.39049903, 0.56103978, 0.58544124, 0.84428874,
       0.3250809 , 0.65846291, 0.86540455, 0.41428863, 0.78735864])

In [16]:
gradient_descent(100, test_function, test_function_gradient)

-0.6666666666666665

## Numerical gradient descent

In [18]:
def numerical_gradient_descent(x0,f,dx,learning_rate=0.05, T=1000):
    x = x0
    t = 0
    while t <= T:
        x -= learning_rate * (f(x + dx) - f(x)) / dx
        t+=1
    return x

In [19]:
numerical_gradient_descent(10, test_function, dx=0.01)

-0.6716666666666643

# Stochastic Gradient Descent

In [11]:
def logistic_cost(ytrue, yhat):
    return np.log(1 + np.exp(-ytrue*yhat))

# Use rosenbrock's function to test gradient descent

In [1]:
# Improved version of october 8th 
def stochastic_gradient_descent(X, ytrue, T=1000, epsilon=0.1, learning_rate=0.5): 
    nrow = len(ytrue)
    w = np.zeros(X.shape[1])  # initialisation du vecteur w avec les w_i = 0
    t = 0
    m = len(ytrue)
    
    cost = np.inf
    costs = []
    w_list = []

    while (np.linalg.norm(np.gradient(cost)) > epsilon) or (t <= T):
        i = np.random.randint(nrow)
        y_t = ytrue[i] 
        X_t = X[i,:]
        prediction = np.dot(X_t.T, w)
        cost = np.sum(quadratic_cost(prediction, y_t))
        costs.append(cost)
        w_list.append(w)
        w = w - learning_rate * (1/m * X_t.T.dot(prediction - y_t))
        
        t += 1 
    return w, costs 

In [38]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=100, n_features=5)

In [18]:
t = np.array([i for i in range(1001)])

In [19]:
t.shape

(1001,)