# Homework 0

**Name:** -- Víctor Manuel Mariscal Cervantes --

**e-mail:** -- victor.mariscal4459@alumnos.udg.mx --

# MODULES

In [None]:
# Load modules
import numpy as np  # For numerical computations
import matplotlib.pyplot as plt  # For plotting results
import random  # For generating random numbers

# This magic command ensures that plots are displayed inline in the notebook
%matplotlib inline

# Theory on the Gradient Descent algorithm

Gradient Descent is an iterative optimization algorithm used to find the minimum of a function. The algorithm works as follows:

1. **Initialization:** Start with an initial guess for the parameters (e.g., a point in the function's domain).

2. **Gradient Computation:** Calculate the gradient (i.e., the vector of partial derivatives) of the function at the current point. This gradient indicates the direction of steepest ascent.

3. **Parameter Update:** Update the parameters by moving in the opposite direction of the gradient. The update rule is:
   
   \[
   \mathbf{x}_{new} = \mathbf{x}_{current} - \alpha \nabla f(\mathbf{x}_{current})
   \]
   
   where \(\alpha\) is the learning rate which determines the size of the step.

4. **Convergence Check:** Repeat steps 2 and 3 until the change in parameters is less than a predefined tolerance, indicating that a minimum has been reached.

In this notebook, we use the Himmelblau function, a well-known function with multiple local minima, to demonstrate gradient descent.

In [None]:
# Function to be optimized

def himmelblau(x, y):
    """
    Himmelblau function:
    \( f(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2 \)
    This function has multiple local minima. The function is used as a performance test for optimization algorithms.
    """
    return (x**2 + y - 11)**2 + (x + y**2 - 7)**2

def grad_himmelblau(x, y):
    """
    Computes the gradient of the Himmelblau function with respect to x and y.
    The partial derivatives are:
    \( \frac{\partial f}{\partial x} = 4x(x^2 + y - 11) + 2(x + y^2 - 7) \)
    \( \frac{\partial f}{\partial y} = 2(x^2 + y - 11) + 4y(x + y^2 - 7) \)
    """
    dfdx = 4 * x * (x**2 + y - 11) + 2 * (x + y**2 - 7)
    dfdy = 2 * (x**2 + y - 11) + 4 * y * (x + y**2 - 7)
    return np.array([dfdx, dfdy])

# The Himmelblau function and its gradient are defined above. These will be used for the optimization process.

In [None]:
# Run gradient descent algorithm

# Set parameters for gradient descent
learning_rate = 0.001       # Step size for each iteration
max_iters = 10000           # Maximum number of iterations allowed
tolerance = 1e-6            # Convergence tolerance

# Choose a random initial point within the range [-6, 6] for both x and y
x0 = random.uniform(-6, 6)
y0 = random.uniform(-6, 6)
x_current = np.array([x0, y0])
print("Initial point:", x_current)

# List to store the path taken by the algorithm (for later visualization)
path = [x_current.copy()]

# Execute the gradient descent loop
for i in range(max_iters):
    # Compute the gradient at the current point
    grad = grad_himmelblau(x_current[0], x_current[1])
    
    # Update the point by moving in the opposite direction of the gradient
    x_new = x_current - learning_rate * grad
    
    # Append the new point to the path
    path.append(x_new.copy())
    
    # Check for convergence: if the change in position is less than the tolerance, stop iterating
    if np.linalg.norm(x_new - x_current) < tolerance:
        print(f"Convergence reached at iteration {i}")
        break
    
    # Update the current point for the next iteration
    x_current = x_new

# Convert the list of points to a NumPy array for easier manipulation
path = np.array(path)

# Output the final position and function value
print("Final position:", x_current)
print("Final function value:", himmelblau(x_current[0], x_current[1]))

In [None]:
# Plot the results

# Create a grid of x and y values to evaluate the Himmelblau function
x_vals = np.linspace(-6, 6, 400)
y_vals = np.linspace(-6, 6, 400)
X, Y = np.meshgrid(x_vals, y_vals)

# Evaluate the Himmelblau function on the grid
Z = himmelblau(X, Y)

# Set up the plot
plt.figure(figsize=(10, 6))

# Plot the contour lines of the Himmelblau function
contour_levels = np.logspace(-0.5, 5, 20)  # Logarithmically spaced levels for better visualization
contour_plot = plt.contour(X, Y, Z, levels=contour_levels, norm=plt.LogNorm(), cmap='jet')
plt.clabel(contour_plot, inline=True, fontsize=8)

# Plot the path taken by the gradient descent algorithm
plt.plot(path[:, 0], path[:, 1], 'ro-', markersize=3, label='Gradient Descent Path')

# Label the plot
plt.xlabel('x')
plt.ylabel('y')
plt.title('Gradient Descent on the Himmelblau Function')
plt.legend()
plt.colorbar(contour_plot)

# Display the plot
plt.show()