In [1]:
# Lasso是拟合稀疏系数的线性模型
# 它倾向于使用具有较少参数值的情况，有效地减少给定解决方案所依赖变量的数量
# 因此，Lasso 及其变体是压缩感知领域的基础。 在一定条件下，它可以恢复一组非零权重的精确集（见压缩感知_断层重建）。

# Lasso 类的实现使用了 coordinate descent （坐标下降算法）来拟合系数


In [2]:
# https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_and_elasticnet.html#sphx-glr-auto-examples-linear-model-plot-lasso-and-elasticnet-py


In [9]:

print(__doc__)


import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import r2_score

# ##########################################
# generate some sparse data to play with

np.random.seed(42)

n_samples, n_features = 50, 100
X = np.random.randn(n_samples, n_features)

idx = np.arange(n_features)

# Decreasing coef w. alternated signs for visualization
coef = (-1) ** idx * np.exp(-idx / 10)


Automatically created module for IPython interactive environment


In [12]:
coef[10:] = 0     # 稀疏系数

y = np.dot(X, coef)

# 添加噪音
y += 0.01 * np.random.normal(size=n_samples)

[ 0.05868774  0.01573947 -1.72365514  1.0720574  -2.42506067 -2.05349311
 -0.76568952 -5.61060388  3.0029248  -1.80683056  2.20123171  6.61372855
 -1.17800038  0.06802314 -1.53573425  0.1122106  -1.62186833 -0.1906823
  1.0707779   0.44898261 -2.04287716 -3.35829499  3.09247636  1.19401381
 -1.09040506  0.56876276 -1.74495231 -0.90828142  0.80771497 -2.22494833
  0.01749412 -1.65066486 -1.8008078   3.1278658   1.42396057  2.91664859
  1.12743261 -1.45182882 -0.98781938  4.89674593 -4.08959078 -1.57164298
  3.35542511 -1.03076004 -2.03530214 -1.12951198 -1.07915963  2.35449225
  0.82627719  0.58702813]


In [14]:
# 分隔 训练集和测试集 (对半)
n_samples = X.shape[0]
X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]
print(X_train.shape, y_train.shape)

(25, 100) (25,)


In [16]:
# Lasso 模型

from sklearn.linear_model import Lasso

alpha = 0.1
lasso = Lasso(alpha=alpha)

y_predict_lasso = lasso.fit(X_train, y_train).predict(X_test)
r2_score_lasso = r2_score(y_test, y_predict_lasso)

print(lasso)
print(y_predict_lasso)
print(r2_score_lasso)


Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)
[ 1.56029941 -2.57043176 -2.33043045 -1.16486059 -1.86049068 -1.36108086
 -3.05755928 -0.22077219  2.02143307  0.8140565   1.44354875 -1.11347963
 -0.01695535 -1.9757142   2.3071442  -4.83123348 -1.4765326   2.26444459
 -0.04907148 -3.17554979 -0.63821789 -0.65728607  1.89599936  0.82371127
  0.78707098]
0.6580638417732383
