TP

### 1. Génération des données du problème. <br>
#### a) Générez les données du problème

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cvxpy as cvx
import time
n = 200 # number of examples (you can try with n = 1000 and n = 5000)
p = 2*n # dimensionality of the problem
k = 5 # number of active variables
np.random.seed(0)
X = np.random.randn(n,p) # creating features and normalizing them
X = (X - np.mean(X,axis = 0))/np.std(X,axis = 0)
t = np.arange(0,p)/(p-1); # bulding the variance matrix !
S = np.zeros((p,p))
nn = 0.00001
for i in range(p):
    S[i,:] = np.exp(-(t-t[i])**2/nn);
X = X@(S**.5)
X = X/np.linalg.norm(X,axis=0)
ind = np.random.choice(p, k, replace=False) # generating optimal weights
print(f"index: {ind}")
weights = np.random.randn(k)
weights += 0.1+np.sign(weights) # to get large enough weight
wopt = np.zeros(p)
wopt[ind] = weights
rsnr = 2 # generating output by X@w + noise
z = X[:,ind]@weights
stdnoise = np.std(z)/rsnr
y = z + stdnoise*np.random.randn(n)

index: [309 390  55  82 329]


#### b) Vérifiez que les données ont bien les propriétés attendues

In [4]:
print(f"Taille de X: {X.shape}")
print(f"Taille de Y: {y.shape}")
print(f"Taille de S: {S.shape}")
print(f"weights: {weights}")

Taille de X: (200, 400)
Taille de Y: (200,)
Taille de S: (400, 400)
weights: [-1.12180992  1.31787313 -0.92626818  2.65803182 -1.51223091]


In [5]:
b_ls = np.linalg.solve(X.T@X,X.T@y)
e_ls = np.sum((X@b_ls-z)**2)
print("Test error for the LS regression: {:0.4f}".format(e_ls))

Test error for the LS regression: 2.7231


In [6]:
λ = 1 # le paramètre de régularisation
b_ridge = cvx.Variable(p) # le vecteur des coefficients, il sert de variable d'optimisation
obj = cvx.Minimize(0.5*cvx.sum_squares(X@b_ridge - y) + λ*cvx.sum_squares(b_ridge))
prob = cvx.Problem(obj) # le problème d'optimisation
start_time = time.time()
prob.solve(solver=cvx.SCS, eps=1e-5) # résolution du problème avec le solveur SCS
end_time = time.time()

# b) Vérifiez que la solution obtenue est meilleure que celle des moindres carrés
b_ls = np.linalg.solve(X.T@X,X.T@y) # recalcul des coefficients de la régression des moindres carrés
e_ls = np.sum((X@b_ls-z)**2) 
np.set_printoptions(formatter={'float': '{: 0.3f}'.format}) # pour afficher les nombres avec 3 décimales
print("In sample error for the LS regression: {:0.4f}".format(e_ls))

e_ridge = np.sum((X@b_ridge.value-z)**2)
print("In sample error for the ridge regression: {:0.4f}".format(e_ridge))

In sample error for the LS regression: 2.7231
In sample error for the ridge regression: 2.3094


In [8]:
grad = (X.T @ X) @ b_ridge.value - X.T @ y + 2 * λ * b_ridge.value
np.linalg.norm(grad)

np.float64(2.2185169067776875e-07)

In [47]:
def my_ridge(X, y, lam):
    n, p = X.shape
    A = X.T @ X + 2.0 * lam * np.eye(p) 
    rhs = X.T @ y                         
    b = np.linalg.solve(A, rhs)
    return b

b_ridge = my_ridge(X, y, λ)
e_ridge = np.sum((X @ b_ridge - z) ** 2)
print("ridge regression: {:0.4f}".format(e_ridge))
grad = (X.T @ X) @ b_ridge - X.T @ y + 2 * λ * b_ridge
np.linalg.norm(grad)

ridge regression: 2.3094


np.float64(3.665773238953586e-15)

In [63]:
t = 2.507

b_ridge = cvx.Variable(p)
constraint = [0.5 * cvx.sum_squares(b_ridge) <= t]
obj = cvx.Minimize(cvx.sum_squares(X @ b_ridge - y))
prob = cvx.Problem(obj, constraints=constraint)
prob.solve(solver=cvx.SCS, eps=1e-5)

beta_hat = b_ridge.value
print(f"norme de la solution: {np.linalg.norm(beta_hat)**2:0.4f} (doit être <= {2*t:0.4f})")
print(f"respecte la contrainte: {0.5 * np.linalg.norm(beta_hat)**2 <= t + 1e-4}")

## KKT
grad = X.T @ (X @ beta_hat - y)

# multiplicateur de Lagrange μ associé à la contrainte
lambda_hat = constraint[0].dual_value

# stationnarité
stationnarity = grad + 2 * lambda_hat * beta_hat
print(f"||stationnarité||: {np.linalg.norm(stationnarity):.4e}")
print(f"max violation stationnarité: {np.max(np.abs(stationnarity)):.4e}")

print(f"complementarité: {lambda_hat * (np.linalg.norm(beta_hat)**2 - t)}")
print("lambda_hat >= 0 ?", lambda_hat >= -1e-6)

norme de la solution: 5.0140 (doit être <= 5.0140)
respecte la contrainte: True
||stationnarité||: 1.7448e+00
max violation stationnarité: 7.6730e-01
complementarité: [ 1.302]
lambda_hat >= 0 ? [ True]


In [11]:
# Sous gradient et sous différentiabilité
# On considère le problème de Lasso
lambda_lasso = 0.001*n
b_lasso = cvx.Variable(p)
obj = cvx.Minimize(0.5 * cvx.sum_squares(X @ b_lasso - y) + lambda_lasso * cvx.norm1(b_lasso))
prob = cvx.Problem(obj)

prob.solve(solver=cvx.SCS, eps=1e-5)
print("Optimal value for the Lasso problem: {:.4f}".format(prob.value))

# A quelles conditions la solution est-elle optimale ?
beta_lasso = b_lasso.value
grad = X.T @ (X @ beta_lasso - y)
# On calcule un sous gradient du terme de régularisation
subgrad = lambda_lasso * np.sign(beta_lasso)
subgrad[beta_lasso == 0] = lambda_lasso * np.clip(grad[beta_lasso == 0]/lambda_lasso, -1, 1)
# Condition d'optimalité : 0 ∈ ∇f(β) + λ ∂||β||_1
optimality = grad + subgrad
print("||optimality||: {:.4e}".format(np.linalg.norm(optimality)))

Optimal value for the Lasso problem: 2.7094
||optimality||: 3.9193e+00
