In [None]:
%run supportvectors-common.ipynb

In [None]:
import torch
import matplotlib.pyplot as plt
from typing import Tuple

# Implementing Gradient Descent

Objective: In this exercise, you will complete the implementation of the gradient descent optimization algorithm. The function gradient_descent has been partially provided with comments and a docstring to guide you. Your task is to fill in the missing code sections to complete the function.

## Instructions:

 - **Review the Provided Code**: Examine the partially implemented gradient_descent function. Comments and a docstring have been added to describe what each part of the code should accomplish.

 - **Complete the Function**: Fill in the missing code as indicated by the comments.

 - **Test Your Implementation**: Once you've completed the function, test it with the sample objective function provided to ensure it works correctly. Verify that the optimization process converges as expected.

This exercise will help you apply the concepts of learnt in class and solidify your understanding of gradient descent.

In [None]:
def f(x: torch.tensor) -> torch.tensor:
    return (x**2 + 3*x + 2)

## Plot the function

In [None]:
# Create a tensor 'Xs' with 100 evenly spaced values from -5 to 5,
Xs = torch.linspace(start=-5, end=5, steps=100)

In [None]:
# plot f(Xs) vs Xs
plt.plot(Xs, f(Xs));

Fill in the missing code of the `gradient_descent` function given below. Refer section 1.3 of the `Inside Deep Learning` book. 

In [None]:
import torch

def gradient_descent(x: torch.Tensor, eta: float, epsilon: float, func) -> Tuple[torch.Tensor, list]:
    """
    Performs gradient descent optimization to minimize a given function.

    This function iteratively updates the input tensor `x` by taking steps in the direction of the negative gradient 
    of the specified function `func`. The process continues until the change in the value of `x` is smaller than the 
    specified tolerance `epsilon`. The function also logs the values of `x` at each step.

    Args:
        x (torch.Tensor): The initial tensor to optimize. This tensor should require gradients.
        eta (float): The learning rate for the gradient descent.
        epsilon (float): The convergence threshold. The optimization stops when the change in `x` is less than this value.
        func (Callable): The function to minimize. 

    Returns:
        Tuple[torch.Tensor, list]: The optimized tensor and a log of tensor values at each step of the optimization.
    """
    # Initialize x_min to the initial value of x. This will be updated during gradient descent.
    x_min: torch.Tensor = x.clone()

    # Initialize x_min_prev to a large value to start the optimization loop.
    x_min_prev = x_min * 100

    # List to log the values of x during the optimization process.
    learning_log = []
    
    # Iterate until the change in x is smaller than the tolerance epsilon.
    while torch.linalg.norm(x_min - x_min_prev) > epsilon:
        x_min_prev = x_min.clone()

        # Apply the function to x


        # Perform backpropagation.
        
        
        # Update x by taking a step in the direction of the negative gradient.
        x = None

        # Zero the gradients of x to prepare for the next iteration.
        

        # Update x_min with the new value of x.
        x_min = None

        # Log the current value of x.
        learning_log.append(x_min.detach().clone().numpy())

    return x_min, learning_log


In [None]:
# Create a tensor x with a single value of your choice and turn on the gradient tracking
x = torch.tensor([10.0], requires_grad=True)

# Learning rate
eta=0.001

# Set a small value to decide how close you want to go to the argmin  
epsilon=1e-5

argmin, learning_log = gradient_descent(x, eta, epsilon, f)

print(argmin)

Finally lets see how the value of x_min has changed at each step of learning.

In [None]:
plt.plot(learning_log)