# MATH170 - Chapter *: Gradient Descent

In [8]:
# Part 1: Gradient Descent using SymPy for derivative

import sympy as sp

# 1. Define the symbolic variable and the function
x = sp.Symbol('x', real=True)
y_expr = (x - 3)**2 + 1

# 2. Compute the derivative using SymPy
dy_dx = sp.diff(y_expr, x)
print("Derivative of y:", dy_dx)

# 3. Convert to numeric functions for iteration
f = sp.lambdify(x, y_expr, 'numpy')
df = sp.lambdify(x, dy_dx, 'numpy')

# 4. Perform manual gradient descent iterations
x_val = 0.0          # starting point
eta = 0.1            # learning rate

for i in range(30):
    y_val = f(x_val)
    grad_val = df(x_val)
    x_val = x_val - eta * grad_val
    print(i, x_val, y_val)


Derivative of y: 2*x - 6
0 0.6000000000000001 10.0
1 1.08 6.76
2 1.464 4.6864
3 1.7711999999999999 3.359296
4 2.01696 2.50994944
5 2.213568 1.9663676415999998
6 2.3708544 1.618475290624
7 2.49668352 1.3958241859993603
8 2.597346816 1.2533274790395905
9 2.6778774528 1.1621295865853378
10 2.74230196224 1.1037629354146163
11 2.793841569792 1.0664082786653544
12 2.8350732558336 1.0425012983458268
13 2.86805860466688 1.0272008309413292
14 2.894446883733504 1.0174085318024506
15 2.9155575069868034 1.0111414603535684
16 2.932446005589443 1.0071305346262838
17 2.945956804471554 1.0045635421608217
18 2.9567654435772432 1.0029206669829258
19 2.9654123548617948 1.0018692268690725
20 2.9723298838894356 1.0011963051962065
21 2.9778639071115487 1.000765635325572
22 2.982291125689239 1.0004900066083662
23 2.985832900551391 1.0003136042293543
24 2.988666320441113 1.0002007067067868
25 2.9909330563528904 1.0001284522923435
26 2.9927464450823122 1.0000822094670998
27 2.99419715606585 1.0000526140589439


## TODO: Hands-On 1
Task A. Change y_expr to (x - 1)**4 + 0.5.   
Task B. Rerun the derivative and iteration steps.   
Task C. Try two different learning rates (0.01 and 0.2) and describe what happens.  

In [14]:
# Part 2: Linear Regression with single slope a using MSE = mean((y - a*x)**2)

import sympy as sp
import numpy as np

# 1. Prepare about ten data points
X = np.array([0.5, 1.0, 1.8, 2.2, 3.0, 3.7, 4.1, 4.8, 5.2, 5.9], dtype=float)
Y = np.array([1.2, 2.1, 3.7, 4.2, 6.0, 7.6, 8.4, 9.6, 10.5, 11.8], dtype=float)
n = len(X)

# 2. Build symbolic loss L(a) = (1/n) * sum (y_i - a*x_i)^2
a = sp.Symbol('a', real=True)
L_expr = sum((sp.Float(Y[i]) - a*sp.Float(X[i]))**2 for i in range(n)) / n

# 3. Derivative with respect to a
dL_da = sp.diff(L_expr, a)
print("Loss L(a):", L_expr)
print("dL/da:", dL_da)

# 4. Convert to numeric functions
L = sp.lambdify(a, L_expr, 'numpy')
g = sp.lambdify(a, dL_da, 'numpy')

# 5. Manual gradient descent iteration
a_val = 0.0
eta = 0.01
max_iter = 200

for i in range(max_iter):
    L_val = L(a_val)
    grad_val = g(a_val)
    a_val = a_val - eta * grad_val
    if i % 20 == 0:
        print(i, a_val, L_val)

print("Final a:", a_val, "Final loss:", L(a_val))

Loss L(a): (1.2 - 0.5*a)**2/10 + (2.1 - 1.0*a)**2/10 + (3.7 - 1.8*a)**2/10 + (4.2 - 2.2*a)**2/10 + (6.0 - 3.0*a)**2/10 + (7.6 - 3.7*a)**2/10 + (8.4 - 4.1*a)**2/10 + (9.6 - 4.8*a)**2/10 + (10.5 - 5.2*a)**2/10 + (11.8 - 5.9*a)**2/10
dL/da: 26.744*a - 53.892
0 0.5389200000000001 54.315
20 2.012182004906834 0.016161618218347388
40 2.0151003995011494 0.015948550043379812
60 2.01510618056859 0.015948549207302126
80 2.0151061920203457 0.01594854920729878
100 2.01510619204303 0.015948549207298816
120 2.0151061920430746 0.015948549207298855
140 2.0151061920430746 0.015948549207298855
160 2.0151061920430746 0.015948549207298855
180 2.0151061920430746 0.015948549207298855
Final a: 2.0151061920430746 Final loss: 0.015948549207298855


## TODO: Hands-On 2
Task A. Use this new dataset  
Task B. Build L(a) = mean((y - a*x)**2)  
Task C. Run gradient descent with eta = 0.005 and eta = 0.05, compare convergence  

In [12]:
import sympy as sp
import numpy as np

X = np.array([0.2, 0.9, 1.3, 1.9, 2.6, 3.1, 3.9, 4.3, 5.0, 5.7], dtype=float)
Y = np.array([0.6, 1.7, 2.4, 3.1, 4.2, 5.1, 6.0, 6.7, 7.9, 9.0], dtype=float)
n = len(X)

In [18]:
 # Part 3: Using scipy.optimize to minimize L(a) = mean((y - a*x)**2)

import numpy as np
from scipy.optimize import minimize_scalar, minimize

# 1. Same dataset as before
X = np.array([0.5, 1.0, 1.8, 2.2, 3.0, 3.7, 4.1, 4.8, 5.2, 5.9], dtype=float)
Y = np.array([1.2, 2.1, 3.7, 4.2, 6.0, 7.6, 8.4, 9.6, 10.5, 11.8], dtype=float)
n = len(X)

# 2. Define the loss function numerically
def loss(a):
    pred = a * X
    return np.mean((Y - pred)**2)   # (truth - prediction)^2

# 3. Minimize using scipy
# result = minimize_scalar(loss)
result = minimize(loss, x0=np.array([0]), method='BFGS')
print("Optimal a:", result.x)
print("Minimum loss:", result.fun)

Optimal a: [2.01510616]
Minimum loss: 0.015948549207311764


In [19]:
# Part 3 (extended): Two-parameter linear regression using scipy.optimize.minimize

import numpy as np
from scipy.optimize import minimize

# 1. Same dataset as before
X = np.array([0.5, 1.0, 1.8, 2.2, 3.0, 3.7, 4.1, 4.8, 5.2, 5.9], dtype=float)
Y = np.array([1.2, 2.1, 3.7, 4.2, 6.0, 7.6, 8.4, 9.6, 10.5, 11.8], dtype=float)
n = len(X)

# 2. Define the loss function for two parameters (a, b)
def loss(params):
    a, b = params
    pred = a * X + b
    return np.mean((Y - pred)**2)   # (truth - prediction)^2

# 3. Use SciPy minimize
result = minimize(loss, x0=np.array([0.0, 0.0]), method='BFGS')

# 4. Display results
a_opt, b_opt = result.x
print("Optimal a:", a_opt)
print("Optimal b:", b_opt)
print("Minimum loss:", result.fun)

Optimal a: 1.992209325064098
Optimal b: 0.09508597009443792
Minimum loss: 0.013917698761487917
