In [1]:
import numpy as np

def gradient_descent(f, grad_f, x0, learning_rate, max_iters, i=None):
    """
    f        : function to minimize
    grad_f   : gradient of f
    x0       : initial point (float or numpy array)
    learning_rate : step size
    max_iters     : total iterations
    i        : if given, return the result at i-th iteration (1-indexed)
    """
    x = np.array(x0, dtype=float)

    history = [x.copy()]
    for k in range(0, max_iters + 1):
        f_val = f(x)
        # Print current iteration
        if x.ndim == 0:  # scalar
            print(f"Iter {k:02d}: x={x:.8f}, f(x)={f_val:.8f}")
        else:  # vector
            x_str = "[" + ", ".join(f"{xi:.4f}" for xi in np.ravel(x)) + "]"
            print(f"Iter {k:02d}: x={x_str}, f(x)={f_val:.4f}")

        # Gradient update
        grad = grad_f(x)
        x = x - learning_rate * grad
        history.append(x.copy())

        if i is not None and k == i:
            return x, f(x), k
    
    return x, f(x), max_iters

In [2]:
# Example: f(w) = (x-1)^2 + (y-2)^2
f = lambda w: pow(w,2)
grad_f = lambda w: 2*w
x0 = 1  # start at origin
learning_rate = 0.4
max_iters = 5
# CHANGE THIS

x_final, f_final, steps = gradient_descent(f, grad_f, x0, learning_rate, max_iters)
print(f"\nFinal after {steps} steps: x={x_final}, f(x)={f_final}")


Iter 00: x=1.00000000, f(x)=1.00000000
Iter 01: x=0.20000000, f(x)=0.04000000
Iter 02: x=0.04000000, f(x)=0.00160000
Iter 03: x=0.00800000, f(x)=0.00006400
Iter 04: x=0.00160000, f(x)=0.00000256
Iter 05: x=0.00032000, f(x)=0.00000010

Final after 5 steps: x=6.399999999999988e-05, f(x)=4.095999999999984e-09
