In [2]:
import numpy as np

def gradient_descent(f, grad_f, x0, learning_rate, max_iters, i=None):
    """
    f        : function to minimize
    grad_f   : gradient of f
    x0       : initial point (float or numpy array)
    learning_rate : step size
    max_iters     : total iterations
    i        : if given, return the result at i-th iteration (1-indexed)
    """
    x = np.array(x0, dtype=float)

    history = [x.copy()]
    for k in range(0, max_iters + 1):
        f_val = f(x)
        # Print current iteration
        if x.ndim == 0:  # scalar
            print(f"Iter {k:02d}: x={x:.8f}, f(x)={f_val:.8f}")
        else:  # vector
            x_str = "[" + ", ".join(f"{xi:.4f}" for xi in np.ravel(x)) + "]"
            print(f"Iter {k:02d}: x={x_str}, f(x)={f_val:.4f}")

        # Gradient update
        grad = grad_f(x)
        x = x - learning_rate * grad
        history.append(x.copy())

        if i is not None and k == i:
            return x, f(x), k
    
    return x, f(x), max_iters

In [7]:
# EXAMPLE Only using w

f = lambda w: np.sin(w)**2
grad_f = lambda w: np.sin(2*w)
x0 = 3  # start at origin
learning_rate = 0.1
max_iters = 5
# CHANGE THIS

x_final, f_final, steps = gradient_descent(f, grad_f, x0, learning_rate, max_iters)
print(f"\nFinal after {steps} steps: x={x_final}, f(x)={f_final}")

Iter 00: x=3.00000000, f(x)=0.01991486
Iter 01: x=3.02794155, f(x)=0.01286106
Iter 02: x=3.05047654, f(x)=0.00827920
Iter 03: x=3.06859907, f(x)=0.00531861
Iter 04: x=3.08314599, f(x)=0.00341212
Iter 05: x=3.09480872, f(x)=0.00218714

Final after 5 steps: x=3.104151859050176, f(x)=0.001401158191523737


In [None]:
# EXAMPLE LINEAR FUNCTION 

X = np.array([[0.5,1.2,-0.3],[-1,0.8,1.5],[2.3,-0.7,0.5],[0,1.5,-1]])
y = np.array([1,2,3,1]).reshape(-1,1)

f = lambda x,y: x**2 + x*y**2
grad_f = lambda w: 2 * X.T @ (X @ w - y)
x = 3  # start at origin
y = 2
learning_rate = 0.2
max_iters = 5
# CHANGE THIS

x_final, f_final, steps = gradient_descent(f, grad_f, x0, learning_rate, max_iters)
print(f"\nFinal after {steps} steps: x={x_final}, f(x)={f_final}")


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 3)