# Gradient Descent

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import HTML

# Define the function to minimize
def f(x):
    return x**2

# Define the derivative of the function
def df(x):
    return 2*x

# Define the gradient descent algorithm
def gradient_descent(x0, learning_rate, n_iterations):
    x = x0
    x_values = [x0]
    for i in range(n_iterations):
        x -= learning_rate * df(x)
        x_values.append(x)
    return x_values

# Set initial values and run the algorithm
x0 = 2.5
learning_rate = 0.1
n_iterations = 10
x_values = gradient_descent(x0, learning_rate, n_iterations)

# Create the animation
fig, ax = plt.subplots()
x = np.linspace(-3, 3, 100)
ax.plot(x, f(x))
line, = ax.plot([], [], 'ro')

def animate(i):
    line.set_data(x_values[:i+1], f(np.array(x_values[:i+1])))

anim = animation.FuncAnimation(fig, animate, frames=n_iterations+1, interval=500)

# Display the animation
HTML(anim.to_jshtml())


# Linear Regression + Gradient Descent

In [None]:
import numpy as np


# Define the data
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3

# Define the cost function
def f(theta, X, y):
    m = len(y)
    predictions = X.dot(theta)

    # This function calculates the mean squared error between the predicted
    # and actual values of a dependent variable given a set of coefficients (theta)
    # and a matrix of independent variables (X)

    # The division by 2m instead of m is a common convention when defining the cost function
    # for linear regression. Dividing by 2m makes the math a bit simpler when taking the
    # derivative of the cost function during gradient descent, as the 2 in the denominator
    # cancels out with the 2 that comes from taking the derivative of the squared term.
    return (1/(2*m)) * np.sum((predictions - y)**2)

# Define the derivative of the cost function
def df(theta, X, y):
    m = len(y)
    predictions = X.dot(theta)
    return (1/m) * (X.T.dot(predictions - y))

# Define the gradient descent algorithm
def gradient_descent(X, y, theta0, learning_rate, n_iterations):
    theta = theta0
    theta_values = [theta0]
    for i in range(n_iterations):
        theta -= learning_rate * df(theta, X, y)
        theta_values.append(theta)
        print(f"Iteration {i+1}: {theta}")
    return theta_values

# Set initial values and run the algorithm
theta0 = np.array([0.5, 0.5])
learning_rate = 0.1
n_iterations = 10
theta_values = gradient_descent(X, y, theta0, learning_rate, n_iterations)

print(theta_values)
