In [37]:
from sklearn.linear_model import LassoCV, RidgeCV
import numpy as np 
import pandas as pd

In [38]:
rg = np.random.default_rng(20201201)
p = 12
t = 250
x_values = rg.standard_normal((t,p)) + rg.standard_normal((t,1))
x = pd.DataFrame(x_values, columns=[f"x{i}" for i in range(1,p+1)])
beta = np.linspace(0.01, 0.10, p)
y = x @ beta + rg.standard_normal(t)


In [39]:
x_scale = x.std(axis=0)
y_scale = y.std()
std_x = x / x_scale
std_y = y / y_scale

In [40]:
lasso_cv = LassoCV(fit_intercept=False)
lasso_cv = lasso_cv.fit(std_x, std_y)
print(f"Optimal alpha : {lasso_cv.alpha_}")
lasso_cv.coef_

Optimal alpha : 0.03099006709177538


array([ 0.03974028,  0.06074776,  0.        , -0.        ,  0.05089953,
        0.01921271,  0.        ,  0.1502758 ,  0.03909242,  0.0598198 ,
        0.15528124,  0.07940766])

In [41]:
lasso_orig = lasso_cv.coef_  * (y_scale /  x_scale)
lasso_orig

x1     0.033779
x2     0.051995
x3     0.000000
x4    -0.000000
x5     0.045243
x6     0.016822
x7     0.000000
x8     0.125317
x9     0.032415
x10    0.053860
x11    0.127339
x12    0.064297
dtype: float64

In [42]:
ridge_cv = RidgeCV(alphas=np.linspace(1.,1001,101), fit_intercept=False)
ridge_cv = ridge_cv.fit(std_x, std_y)
print(f"Optimal alpha : {ridge_cv.alpha_}")
#lasso_cv.coef_

Optimal alpha : 261.0


In [43]:
ridge_cv = RidgeCV(alphas=np.linspace(251,271,2001), fit_intercept=False)
ridge_cv = ridge_cv.fit(std_x, std_y)
print(f"Optimal alpha : {ridge_cv.alpha_}")
ridge_cv.coef_

Optimal alpha : 260.16


array([0.05068027, 0.054899  , 0.02736382, 0.01459514, 0.05279006,
       0.04015488, 0.03447551, 0.08251136, 0.05053303, 0.0547685 ,
       0.08857254, 0.0601494 ])

In [44]:
ridge_orig = ridge_cv.coef_  * (y_scale /  x_scale)
ridge_orig

x1     0.043078
x2     0.046989
x3     0.023167
x4     0.012729
x5     0.046924
x6     0.035159
x7     0.029297
x8     0.068807
x9     0.041902
x10    0.049312
x11    0.072634
x12    0.048704
dtype: float64

In [48]:
rescaled_pred = x @ ridge_orig
rescaled_pred

0      0.344728
1      0.280837
2      0.299205
3      0.255554
4     -0.575529
         ...   
245   -0.813198
246   -0.520075
247   -0.390404
248    0.164311
249   -0.896477
Length: 250, dtype: float64

## Using scikit-learn to scale


In [46]:
from sklearn.preprocessing import StandardScaler
y_scaler = StandardScaler(with_mean=False)
y_2d =pd.DataFrame(y)
y_scaler = y_scaler.fit(y_2d)
std_y = y_scaler.transform(y_2d)
ridge_cv = RidgeCV(alphas=np.linspace(251,271,2001), fit_intercept=False)
ridge_cv = ridge_cv.fit(std_x, std_y)
print(f"Optimal alpha : {ridge_cv.alpha_}")


Optimal alpha : 260.16


In [50]:
pred = ridge_cv.predict(std_x)
rescaled_pred = y_scaler.inverse_transform(pred)
pd.Series(rescaled_pred.squeeze())

0      0.344728
1      0.280837
2      0.299205
3      0.255554
4     -0.575529
         ...   
245   -0.813198
246   -0.520075
247   -0.390404
248    0.164311
249   -0.896477
Length: 250, dtype: float64