## Imports

In [81]:
# Import libraries and functions:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Lasso

# Import our own implementations
import importlib, gradient_descent, prepare_data, polynomial_features
importlib.reload(gradient_descent)
importlib.reload(prepare_data)
importlib.reload(polynomial_features)


from prepare_data import prepare_data
from polynomial_features import polynomial_features
from gradient_descent import gradient_descent_LASSO, momentum_gradient_descent_LASSO, \
    ADAGrad_gradient_descent_LASSO, RMSProp_gradient_descent_LASSO, ADAM_gradient_descent_LASSO


## Testing Functions

### Standard gradient descent function, LASSO 

In [82]:
# Prepare data
x, y, x_train, x_test, y_train, y_test, y_noisy = prepare_data(n=100)

# Polynomial degree
p = 16
lam = 0.001
eta = 1e-2
num_iters = 100000

# make feature matrices, scale data
X_plot = polynomial_features(x, p, intercept=False)
X_train = polynomial_features(x_train, p, intercept=False)
X_test  = polynomial_features(x_test, p, intercept=False)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s  = scaler.transform(X_test)
X_plot_s  = scaler.transform(X_plot)



beta, t = gradient_descent_LASSO(X_train_s, y_train, lam = lam, eta=eta, num_iters=num_iters)
print("GD parameters: ", beta)
print(f"Number of iterations: {t}")
# Lasso parameters from ScikitLearn
lasso = Lasso(alpha=lam, fit_intercept=False, max_iter=num_iters)
lasso.fit(X_train_s, y_train)
beta_sklearn = lasso.coef_
print("LASSO coefficients, Scikit learn: ", beta_sklearn)
tol=1e-3
is_close = np.isclose(beta, beta_sklearn, rtol=tol, atol=tol)
print(f"GD parameters equal to parameters from sklearn: {is_close}.")
print(f"Tolerance = {tol}")


GD parameters:  [-0.         -0.98189229  0.00632461  1.05052906  0.         -0.
  0.         -0.10283976 -0.         -0.21693817 -0.         -0.
 -0.          0.         -0.00734072  0.02453983]
Number of iterations: 100000
LASSO coefficients, Scikit learn:  [-0.         -1.06054438  0.00427094  1.26740651  0.         -0.
 -0.         -0.47057126 -0.         -0.         -0.         -0.
 -0.          0.         -0.00455391  0.03739309]
GD parameters equal to parameters from sklearn: [ True False False False  True  True  True False  True False  True  True
  True  True False False].
Tolerance = 0.001


### Gradient descent with momentum, LASSO

In [83]:
# Prepare data
x, y, x_train, x_test, y_train, y_test, y_noisy = prepare_data(n=100)

# Polynomial degree
p = 4
lam = 0.0001
eta = 1e-3
num_iters = 100000
mom = 0.1

# make feature matrices, scale data
X_plot = polynomial_features(x, p, intercept=False)
X_train = polynomial_features(x_train, p, intercept=False)
X_test  = polynomial_features(x_test, p, intercept=False)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s  = scaler.transform(X_test)
X_plot_s  = scaler.transform(X_plot)



beta, t = momentum_gradient_descent_LASSO(X_train_s, y_train, lam = lam, eta=eta, momentum=mom, num_iters=num_iters)
print("GD parameters: ", beta)
print(f"Number of iterations: {t}")
# Lasso parameters from ScikitLearn
lasso = Lasso(alpha=lam, fit_intercept=False, max_iter=num_iters)
lasso.fit(X_train_s, y_train)
beta_sklearn = lasso.coef_
print("LASSO coefficients, Scikit learn: ", beta_sklearn)
tol=1e-2
is_close = np.isclose(beta, beta_sklearn, rtol=tol, atol=tol)
print(f"GD parameters equal to parameters from sklearn: {is_close}.")
print(f"Tolerance = {tol}")

GD parameters:  [-0.02760925 -0.64512713  0.02952902  0.450475  ]
Number of iterations: 55472
LASSO coefficients, Scikit learn:  [-0.02142985 -0.69949953  0.02566578  0.50494923]
GD parameters equal to parameters from sklearn: [ True False  True False].
Tolerance = 0.01


### Gradient descent with ADAGrad, LASSO

In [84]:
# Prepare data
x, y, x_train, x_test, y_train, y_test, y_noisy = prepare_data(n=100)

# Polynomial degree
p = 10
lam = 0.001
eta = 1e-1
num_iters = 100000

# make feature matrices, scale data
X_plot = polynomial_features(x, p, intercept=False)
X_train = polynomial_features(x_train, p, intercept=False)
X_test  = polynomial_features(x_test, p, intercept=False)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s  = scaler.transform(X_test)
X_plot_s  = scaler.transform(X_plot)



beta, t = ADAGrad_gradient_descent_LASSO(X_train_s, y_train, lam = lam, eta=eta, num_iters=num_iters)
print("GD parameters: ", beta)
print(f"Number of iterations: {t}")
# Lasso parameters from ScikitLearn
lasso = Lasso(alpha=lam, fit_intercept=False, max_iter=num_iters)
lasso.fit(X_train_s, y_train)
beta_sklearn = lasso.coef_
print("LASSO coefficients, Scikit learn: ", beta_sklearn)
tol=1e-2
is_close = np.isclose(beta, beta_sklearn, rtol=tol, atol=tol)
print(f"GD parameters equal to parameters from sklearn: {is_close}.")
print(f"Tolerance = {tol}")

GD parameters:  [-0.         -1.03160022  0.00660484  1.18531463  0.         -0.
 -0.         -0.3815634  -0.00679305 -0.        ]
Number of iterations: 17109
LASSO coefficients, Scikit learn:  [-0.         -1.03027552  0.00653717  1.18298956  0.         -0.
 -0.         -0.3804637  -0.00671807 -0.        ]
GD parameters equal to parameters from sklearn: [ True  True  True  True  True  True  True  True  True  True].
Tolerance = 0.01


### Gradient descent with RMSProp, LASSO

In [85]:
# Prepare data
x, y, x_train, x_test, y_train, y_test, y_noisy = prepare_data(n=100)

# Polynomial degree
p = 8
lam = 0.00001
eta = 1e-3
num_iters = 100000

# make feature matrices, scale data
X_plot = polynomial_features(x, p, intercept=False)
X_train = polynomial_features(x_train, p, intercept=False)
X_test  = polynomial_features(x_test, p, intercept=False)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s  = scaler.transform(X_test)
X_plot_s  = scaler.transform(X_plot)



beta, t = RMSProp_gradient_descent_LASSO(X_train_s, y_train, lam = lam, eta=eta, num_iters=num_iters)
print("GD parameters: ", beta)
print(f"Number of iterations: {t}")
# Lasso parameters from ScikitLearn
lasso = Lasso(alpha=lam, fit_intercept=False, max_iter=num_iters)
lasso.fit(X_train_s, y_train)
beta_sklearn = lasso.coef_
print("LASSO coefficients, Scikit learn: ", beta_sklearn)
tol=1e-2
is_close = np.isclose(beta, beta_sklearn, rtol=tol, atol=tol)
print(f"GD parameters equal to parameters from sklearn: {is_close}.")
print(f"Tolerance = {tol}")

GD parameters:  [-0.02672266 -2.09401031  0.17867277  5.81977076 -0.33512042 -6.56941488
  0.18842858  2.61217172]
Number of iterations: 100000
LASSO coefficients, Scikit learn:  [-0.02614834 -2.13960258  0.17734494  6.03823973 -0.34226685 -6.89443157
  0.19468921  2.7667896 ]
GD parameters equal to parameters from sklearn: [ True False  True False  True False  True False].
Tolerance = 0.01


### Gradient descent with ADAM, LASSO

In [89]:
# Prepare data
x, y, x_train, x_test, y_train, y_test, y_noisy = prepare_data(n=100)

# Polynomial degree
p = 14
lam = 0.0001
eta = 1e-2
num_iters = 100000

# make feature matrices, scale data
X_plot = polynomial_features(x, p, intercept=False)
X_train = polynomial_features(x_train, p, intercept=False)
X_test  = polynomial_features(x_test, p, intercept=False)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s  = scaler.transform(X_test)
X_plot_s  = scaler.transform(X_plot)



beta, t = ADAM_gradient_descent_LASSO(X_train_s, y_train, lam = lam, eta=eta, num_iters=num_iters)
print("GD parameters: ", beta)
print(f"Number of iterations: {t}")
# Lasso parameters from ScikitLearn
lasso = Lasso(alpha=lam, fit_intercept=False, max_iter=num_iters)
lasso.fit(X_train_s, y_train)
beta_sklearn = lasso.coef_
print("LASSO coefficients, Scikit learn: ", beta_sklearn)
tol=1e-2
is_close = np.isclose(beta, beta_sklearn, rtol=tol, atol=tol)
print(f"GD parameters equal to parameters from sklearn: {is_close}.")
print(f"Tolerance = {tol}")

GD parameters:  [-3.88327325e-03 -1.88094913e+00  1.96135567e-02  4.62968185e+00
 -0.00000000e+00 -4.02064133e+00 -3.82951538e-02  0.00000000e+00
 -0.00000000e+00  1.20724523e+00  0.00000000e+00  0.00000000e+00
  2.55731983e-02 -1.65194526e-01]
Number of iterations: 82160
LASSO coefficients, Scikit learn:  [-4.29621301e-03 -1.87307641e+00  2.11345894e-02  4.58605956e+00
 -0.00000000e+00 -3.95409863e+00 -4.06052861e-02  0.00000000e+00
 -0.00000000e+00  1.15700245e+00  0.00000000e+00  0.00000000e+00
  2.69088221e-02 -1.45530841e-01]
GD parameters equal to parameters from sklearn: [ True  True  True  True  True False  True  True  True False  True  True
  True False].
Tolerance = 0.01
