## Exercise 1

In [3]:
# a)

import numpy as np

rng = np.random.default_rng(315)
n = 100
x = rng.uniform(-2.0, 2.0, size=n)
sigma = 0.0
noise = sigma * rng.normal(size=n)
y = 2 - x + 5 * x**2 + noise
X = np.column_stack((x, x**2))

# Standardize features (zero mean, unit variance for each feature)
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X_std[X_std == 0] = 1  # safeguard to avoid division by zero for constant features
X_norm = (X - X_mean) / X_std

# Center the target to zero mean (optional, to simplify intercept handling)
y_mean = y.mean()
y_centered = y - y_mean

# Quick verification
print("Feature means before standardization:", X_mean)
print("Feature stds before standardization:", X_std)
print("Means after standardization (≈0):", X_norm.mean(axis=0))
print("Stds after standardization (≈1):", X_norm.std(axis=0))
print("y_centered mean (≈0):", y_centered.mean())


Feature means before standardization: [0.22725888 1.68133322]
Feature stds before standardization: [1.2765918  1.19760296]
Means after standardization (≈0): [-5.88418203e-17  2.85570179e-16]
Stds after standardization (≈1): [1. 1.]
y_centered mean (≈0): -1.3500311979441904e-15


## Exercise 2

In [4]:
# Exercise 2)

# Inspired by the lecture examples:
# Gradient for OLS:    ∇J(θ) = (2/n) X^T (Xθ − y)
# Hessian for OLS:     H = (2/n) X^T X (PSD)
# Gradient for Ridge:  ∇J_λ(θ) = (2/n) X^T (Xθ − y) + 2λθ
# Hessian for Ridge:   H_λ = (2/n) X^T X + 2λ I (PD for λ>0)

import numpy as np

# Use standardized features X_norm and centered target y_centered from Exercise 1
XT_X = X_norm.T @ X_norm
n = X_norm.shape[0]
p = X_norm.shape[1]
I = np.eye(p)

# OLS Hessian and eigenvalues
H_ols = (2.0 / n) * XT_X
Eig_ols, _ = np.linalg.eig(H_ols)
print(f"OLS Hessian eigenvalues: {Eig_ols}")

# Ridge Hessian and eigenvalues (choose a sample lambda just to illustrate)
lam = 1e-3
H_ridge = (2.0 / n) * XT_X + 2.0 * lam * I
Eig_ridge, _ = np.linalg.eig(H_ridge)
print(f"Ridge Hessian eigenvalues (λ={lam}): {Eig_ridge}")

# Gradient functions

def grad_ols(theta, X, y):
    return (2.0 / X.shape[0]) * (X.T @ (X @ theta - y))


def grad_ridge(theta, X, y, lam):
    return (2.0 / X.shape[0]) * (X.T @ (X @ theta - y)) + 2.0 * lam * theta

# Sanity check at θ = 0
theta0 = np.zeros(p)
print("grad_OLS(θ=0):", grad_ols(theta0, X_norm, y_centered))
print("grad_Ridge(θ=0):", grad_ridge(theta0, X_norm, y_centered, lam))


OLS Hessian eigenvalues: [2.32688393 1.67311607]
Ridge Hessian eigenvalues (λ=0.001): [2.32888393 1.67511607]
grad_OLS(θ=0): [  0.5957978  -11.55873229]
grad_Ridge(θ=0): [  0.5957978  -11.55873229]


## Exercise 3

$\textbf{a)}$
$\textbf{b)}$
$\textbf{c)}$
$\textbf{d)}$
$\textbf{e)}$
$\textbf{f)}$

## Exercise 4

$\textbf{a)}$
$\textbf{b)}$
$\textbf{c)}$
$\textbf{d)}$
$\textbf{e)}$
$\textbf{f)}$

$\textbf{a)}$
$\textbf{b)}$
$\textbf{c)}$
$\textbf{d)}$
$\textbf{e)}$
$\textbf{f)}$

## Exercise 5