# Applying Function Approximation on Real-World ML Models

## ðŸ“š Learning Objectives

By completing this notebook, you will:
- Apply function approximation on real-world ML models
- Understand power series and function approximations
- Use Taylor series for function approximation
- Apply approximations to optimize ML models

## ðŸ”— Prerequisites

- âœ… Understanding of derivatives and calculus
- âœ… Understanding of power series
- âœ… Basic understanding of ML models
- âœ… Python and NumPy knowledge

---

## Official Structure Reference

This notebook covers practical activities from **Course 03, Unit 2**:
- Applying function approximation on real-world ML models
- **Source:** `DETAILED_UNIT_DESCRIPTIONS.md` - Unit 2 Practical Content

---

## Introduction

**Function approximation** allows us to approximate complex functions using simpler forms (like power series), which is essential for optimization and understanding model behavior in machine learning.

In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

print("âœ… Libraries imported!")
print("\nApplying Function Approximation on Real-World ML Models")
print("=" * 60)

## Part 1: Taylor Series Approximation


In [None]:
print("=" * 60)
print("Part 1: Taylor Series Approximation")
print("=" * 60)
# Taylor series: f(x) â‰ˆ f(a) + f'(a)(x-a) + f''(a)(x-a)Â²/2! + ...
def exp_taylor(x, a=0, n_terms=5):
    """Taylor series approximation of exp(x) around point a"""
    result = 0
    x_minus_a = x - a
    for n in range(n_terms):
        # f^(n)(a) = exp(a) for all n
        result += (np.exp(a) * (x_minus_a)**n) / math.factorial(n)
    return result

def sin_taylor(x, a=0, n_terms=5):
    """Taylor series approximation of sin(x) around point a"""
    result = 0
    x_minus_a = x - a
    for n in range(n_terms):
        # Derivatives of sin cycle: sin, cos, -sin, -cos
        derivatives = [np.sin(a), np.cos(a), -np.sin(a), -np.cos(a)]
        result += (derivatives[n % 4] * (x_minus_a)**n) / math.factorial(n)
    return result

# Visualize approximations
x = np.linspace(-2, 2, 100)
plt.figure(figsize=(14, 5))
# Exponential approximation
plt.subplot(1, 2, 1)
plt.plot(x, np.exp(x), 'b-', label='exp(x)', linewidth=2)
for n in [1, 2, 3, 5, 10]:
    y_approx = [exp_taylor(xi, a=0, n_terms=n) for xi in x]
    plt.plot(x, y_approx, '--', label=f'Taylor (n={n})', alpha=0.7)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Taylor Series Approximation of exp(x)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xlim(-2, 2)
plt.ylim(-1, 8)
# Sine approximation
plt.subplot(1, 2, 2)
plt.plot(x, np.sin(x), 'r-', label='sin(x)', linewidth=2)
for n in [1, 3, 5, 7, 9]:
    y_approx = [sin_taylor(xi, a=0, n_terms=n) for xi in x]
    plt.plot(x, y_approx, '--', label=f'Taylor (n={n})', alpha=0.7)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Taylor Series Approximation of sin(x)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xlim(-2, 2)
plt.ylim(-1.5, 1.5)
plt.tight_layout()
plt.show()
print("\nâœ… Taylor series approximations visualized!")


## Part 2: Function Approximation in ML Loss Functions


In [None]:
print("\n" + "=" * 60)
print("Part 2: Function Approximation in ML Loss Functions")
print("=" * 60)
# Example: Approximate loss function around optimal point
# This helps understand optimization behavior
# Generate sample data
np.random.seed(42)
X = np.random.randn(100, 1)
y = 2 * X.flatten() + 1 + 0.5 * np.random.randn(100)
# Fit linear regression
model = LinearRegression()
model.fit(X, y)
w_opt = model.coef_[0]
b_opt = model.intercept_
print(f"\nOptimal parameters: w={w_opt:.4f}, b={b_opt:.4f}")
# Define loss function
def loss_function(w, b):
    y_pred = w * X.flatten() + b
    return mean_squared_error(y, y_pred)
# Create grid around optimal point
w_range = np.linspace(w_opt - 1, w_opt + 1, 50)
b_range = np.linspace(b_opt - 1, b_opt + 1, 50)
W, B = np.meshgrid(w_range, b_range)
# Compute loss surface
loss_surface = np.zeros_like(W)
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        loss_surface[i, j] = loss_function(W[i, j], B[i, j])
# Quadratic approximation (Taylor series up to 2nd order)
# L(w, b) â‰ˆ L(w_opt, b_opt) + gradient^T * [w-w_opt, b-b_opt] + 0.5 * [w-w_opt, b-b_opt]^T * Hessian * [w-w_opt, b-b_opt]
# Compute gradient numerically
def compute_gradient(w, b, h=1e-5):
    grad_w = (loss_function(w + h, b) - loss_function(w - h, b)) / (2 * h)
    grad_b = (loss_function(w, b + h) - loss_function(w, b - h)) / (2 * h)
    return grad_w, grad_b
# Compute Hessian numerically
def compute_hessian(w, b, h=1e-3):
    gw_wplus, gb_wplus = compute_gradient(w + h, b, h)
    gw_wminus, gb_wminus = compute_gradient(w - h, b, h)
    grad_w_w = (gw_wplus - gw_wminus) / (2 * h)
    grad_b_w = (gb_wplus - gb_wminus) / (2 * h)
    gw_bplus, gb_bplus = compute_gradient(w, b + h, h)
    gw_bminus, gb_bminus = compute_gradient(w, b - h, h)
    grad_w_b = (gw_bplus - gw_bminus) / (2 * h)
    grad_b_b = (gb_bplus - gb_bminus) / (2 * h)
    return np.array([[grad_w_w, grad_w_b], [grad_b_w, grad_b_b]])
grad_w, grad_b = compute_gradient(w_opt, b_opt)
hessian = compute_hessian(w_opt, b_opt)
loss_opt = loss_function(w_opt, b_opt)
print(f"\nOptimal loss: {loss_opt:.4f}")
print(f"Gradient at optimum: [{grad_w:.6f}, {grad_b:.6f}]")
print(f"Hessian at optimum:\n{hessian}")
# Quadratic approximation
def quadratic_approximation(w, b):
    dw = w - w_opt
    db = b - b_opt
    delta = np.array([dw, db])
    return loss_opt + np.array([grad_w, grad_b]) @ delta + 0.5 * delta @ hessian @ delta
# Visualize
loss_approx = np.zeros_like(W)
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        loss_approx[i, j] = quadratic_approximation(W[i, j], B[i, j])
fig = plt.figure(figsize=(14, 5))
ax1 = fig.add_subplot(1, 2, 1, projection='3d')
surf1 = ax1.plot_surface(W, B, loss_surface, cmap='viridis', alpha=0.8)
ax1.scatter([w_opt], [b_opt], [loss_opt], c='r', s=100, marker='*')
ax1.set_xlabel('w')
ax1.set_ylabel('b')
ax1.set_zlabel('Loss')
ax1.set_title('True Loss Surface')
ax2 = fig.add_subplot(1, 2, 2, projection='3d')
surf2 = ax2.plot_surface(W, B, loss_approx, cmap='plasma', alpha=0.8)
ax2.scatter([w_opt], [b_opt], [loss_opt], c='r', s=100, marker='*')
ax2.set_xlabel('w')
ax2.set_ylabel('b')
ax2.set_zlabel('Loss')
ax2.set_title('Quadratic Approximation (Taylor)')
plt.tight_layout()
plt.show()
print("\nâœ… Function approximation applied to ML loss function!")


## Part 3: Linear Approximation for Optimization


In [None]:
print("\n" + "=" * 60)
print("Part 3: Linear Approximation for Optimization")
print("=" * 60)

# Linear approximation is used in gradient descent
# f(x) â‰ˆ f(x0) + f'(x0)(x - x0)
# For optimization: x_new = x_old - learning_rate * f'(x_old)

def gradient_descent_with_approximation(f, df, x0, learning_rate=0.1, n_iter=50):
    """Gradient descent using linear approximation"""
    x = x0
    history = [x]
    values = [f(x)]
    for i in range(n_iter):
        x = x - learning_rate * df(x)
        history.append(x)
        values.append(f(x))
    return x, history, values

# Example: Minimize f(x) = (x - 3)^2
def f_example(x):
    return (x - 3)**2

def df_example(x):
    return 2 * (x - 3)

x0 = 0.0
x_opt, history, values = gradient_descent_with_approximation(f_example, df_example, x0, learning_rate=0.1, n_iter=20)

print(f"\nStarting point: x0 = {x0}")
print(f"Optimal point: x* = {x_opt:.4f} (true optimum: 3.0)")
print(f"Final function value: f(x*) = {f_example(x_opt):.6f}")

# Visualize
x_range = np.linspace(-1, 5, 100)
y_range = [f_example(x) for x in x_range]

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(x_range, y_range, 'b-', label='f(x) = (x-3)Â²', linewidth=2)
plt.plot(history, values, 'ro-', label='Gradient Descent', markersize=8)
plt.axvline(x=3, color='g', linestyle='--', label='True Optimum', alpha=0.7)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Gradient Descent Using Linear Approximation')
plt.legend()
plt.grid(True, alpha=0.3)

# Show linear approximations at each step
plt.subplot(1, 2, 2)
plt.plot(x_range, y_range, 'b-', label='f(x)', linewidth=2, alpha=0.5)
for i, x_i in enumerate(history[:5]):
 # Linear approximation: f(x) â‰ˆ f(x_i) + f'(x_i)(x - x_i)
 y_i = f_example(x_i)
 df_i = df_example(x_i)
 approx = [y_i + df_i * (x - x_i) for x in x_range]
 plt.plot(x_range, approx, '--', alpha=0.7, label=f'Approx at x={x_i:.2f}')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Linear Approximations at Each Step')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xlim(-1, 5)

plt.tight_layout()
plt.show()

print("\nâœ… Linear approximation used for gradient descent optimization!")

## Summary

### Key Concepts:
1. **Taylor Series**: Approximate functions using power series
2. **Linear Approximation**: f(x) â‰ˆ f(a) + f'(a)(x-a)
3. **Quadratic Approximation**: Includes second-order terms (Hessian)
4. **Optimization**: Use approximations to find optimal points
5. **Gradient Descent**: Uses linear approximation to minimize functions

### Best Practices:
- Use Taylor series for local approximations
- Verify approximations near the expansion point
- Higher-order terms improve accuracy
- Linear approximation sufficient for gradient descent

### Applications:
- Function optimization
- Gradient descent algorithms
- Understanding loss surfaces
- Model behavior analysis

**Reference:** Course 03, Unit 2: "Calculus for Machine Learning" - Function approximation practical content