In [2]:
import numpy as np

def gradient_descent(f, grad_f, x0, learning_rate, max_iters, i=None):
    """
    f        : function to minimize
    grad_f   : gradient of f
    x0       : initial point (float or numpy array)
    learning_rate : step size
    max_iters     : total iterations
    i        : if given, return the result at i-th iteration (1-indexed)
    """
    x = np.array(x0, dtype=float)

    history = [x.copy()]
    for k in range(0, max_iters + 1):
        f_val = f(x)
        # Print current iteration
        if x.ndim == 0:  # scalar
            print(f"Iter {k:02d}: x={x:.8f}, f(x)={f_val:.8f}")
        else:  # vector
            x_str = "[" + ", ".join(f"{xi:.4f}" for xi in np.ravel(x)) + "]"
            print(f"Iter {k:02d}: x={x_str}, f(x)={f_val:.4f}")

        # Gradient update
        grad = grad_f(x)
        x = x - learning_rate * grad
        history.append(x.copy())

        if i is not None and k == i:
            return x, f(x), k
    
    return x, f(x), max_iters

In [6]:
# EXAMPLE Only using w

f = lambda w: w**6
grad_f = lambda w: 5*w**5
x0 = 1  # start at origin
learning_rate = 0.01
max_iters = 5
# CHANGE THIS

x_final, f_final, steps = gradient_descent(f, grad_f, x0, learning_rate, max_iters)
print(f"\nFinal after {steps} steps: x={x_final}, f(x)={f_final}")

Iter 00: x=1.00000000, f(x)=1.00000000
Iter 01: x=0.95000000, f(x)=0.73509189
Iter 02: x=0.91131095, f(x)=0.57279543
Iter 03: x=0.87988395, f(x)=0.46403675
Iter 04: x=0.85351475, f(x)=0.38660386
Iter 05: x=0.83086699, f(x)=0.32899481

Final after 5 steps: x=0.8110687106423597, f(x)=0.2846727424487952


In [None]:
# EXAMPLE LINEAR FUNCTION 

X = np.array([[0.5,1.2,-0.3],[-1,0.8,1.5],[2.3,-0.7,0.5],[0,1.5,-1]])
y = np.array([1,2,3,1]).reshape(-1,1)

f = lambda w: np.sum((X @ w - y)**2)
grad_f = lambda w: 2 * X.T @ (X @ w - y)
x0 = np.array([0,0])  # start at origin
learning_rate = 0.01
max_iters = 5
# CHANGE THIS

x_final, f_final, steps = gradient_descent(f, grad_f, x0, learning_rate, max_iters)
print(f"\nFinal after {steps} steps: x={x_final}, f(x)={f_final}")


Iter 00: x=[0.0000, 0.0000], f(x)=38.0000
Iter 01: x=[0.1400, 0.1600], f(x)=33.6152
Iter 02: x=[0.2712, 0.3108], f(x)=29.7393
Iter 03: x=[0.3941, 0.4529], f(x)=26.3130
Iter 04: x=[0.5093, 0.5869], f(x)=23.2842
Iter 05: x=[0.6172, 0.7133], f(x)=20.6066

Final after 5 steps: x=[0.71824674 0.83240436], f(x)=18.2393701674019
