In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Linear model
$$\theta = (XX^T)^{-1}Xy$$
$$y = \theta_0 + x_1\theta_1 + \cdots + x_n\theta_n = X^T \theta$$

In [None]:
# data
num = 1000
X = np.random.rand(num, 2)
y = 2 + 3 * X[:,0] + 4 * X[:,1] + np.random.randn(num)
y = y.reshape((num, 1))
X_b = np.c_[np.ones((num, 1)), X]

In [None]:
# LinearRegression
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()  # 直接求解矩阵， 求逆
lin_reg.fit(X, y.ravel())
print(lin_reg.intercept_, lin_reg.coef_)

In [None]:
# Batch Gradient Descent: BGD
eta = 0.1
theta = np.random.randn(3, 1)
n_iter = 1000
for _ in range(n_iter):
    gradient = 1 / num * X_b.T.dot(X_b.dot(theta) - y)
    theta = theta - eta * gradient
print(theta)

# Stochastic Gradient Descent: SGD
from sklearn.linear_model import SGDRegressor, ARDRegression, 
# penalty: 在loss后面加上参数的惩罚项, eta0: 初始学习率, tol: 误差, max_iter: 最大迭代次数
sgd_reg = SGDRegressor(penalty='l2', eta0=0.01, max_iter=1000, tol=1e-3)
sgd_reg.fit(X, y.ravel())
print(lin_reg.intercept_, lin_reg.coef_)

# Polynomial Model

In [None]:
# data
num = 1000
X = 6 * np.random.rand(num, 1) - 3
# X = np.random.rand(num, 1)  # 图像不一样
y = 0.5 * X ** 2 + X + 1 + np.random.randn(num, 1)

In [None]:
# PolynomialFeatures
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_features = PolynomialFeatures(degree=2, include_bias=False)  # include_bias: 偏置常数项
X_poly = poly_features.fit_transform(X)
# LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y.ravel())
print(lin_reg.intercept_, lin_reg.coef_)

# Learn Curves

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

def plot_learning_curves(reg, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    train_error, test_error = [], []
    for m in range(1, len(X_train)):
        reg.fit(X_train[:m], y_train[:m])
        y_train_predict = reg.predict(X_train[:m])
        y_val_predict = reg.predict(X_test)
        train_error.append(mean_squared_error(y_train_predict, y_train[:m]))
        test_error.append(mean_squared_error(y_val_predict, y_test))
    plt.plot(np.sqrt(train_error), "r-", linewidth=3, label="train")
    plt.plot(np.sqrt(test_error), "b--", linewidth=3, label="test")
    plt.ylim(0,2)
    plt.legend(loc="best")
    plt.show()
# plot_learning_curves(LinearRegression(), X, y)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
polynomial_regression = Pipeline([
    ("poly_features", PolynomialFeatures(degree=2, include_bias=False)),
    ("lin_reg", LinearRegression(n_jobs=-1))
])
plot_learning_curves(polynomial_regression, X, y)
# polynomial_regression.fit(X, y)
# polynomial_regression.named_steps["lin_reg"].intercept_,  # .coef_

# Regularized Linear Models 正则化线性模型

In [None]:
# Ridge Regression（L2正则化）
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha=1, solver="cholesky")
ridge_reg.fit(X, y.ravel())

from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(penalty="l2", max_iter=5, tol=None)
sgd_reg.fit(X, y.ravel())

In [None]:
# Lasso Regression（L1正则化）
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(X, y.ravel())

from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(penalty="l1", max_iter=5, tol=None)
sgd_reg.fit(X, y.ravel())

In [None]:
# Elastic Net
from sklearn.linear_model import ElasticNet
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic_net.fit(X, y.ravel())

from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(penalty="elasticnet", alpha=0.1, l1_ratio=0.5)
sgd_reg.fit(X, y.ravel())

In [None]:
# 提前中断训练 warm_start
from sklearn.base import clone
sgd_reg = SGDRegressor(n_iter=1, warm_start=True, penalty=None, learning_rate="constant", eta0=0.0005)
minimum_val_error = float("inf") #正无穷
best_epoch, best_model = None, None
for epoch in range(1000):
    sgd_reg.fit(X_train_poly_scaled, y_train) # 继续训练
    y_val_predict = sgd_reg.predict(X_val_poly_scaled)
    val_error = mean_squared_error(y_val_predict, y_val)
    if val_error < minimum_val_error:
        minimum_val_error = val_error
        best_epoch = epoch
        best_model = clone(sgd_reg) #保存模型

# LogisticRegression

In [None]:
# Logistic回归（Logistic Regression）
# 在线性回归模型的基础上增加了sigmoid函数, 损失函数采用对数似然损失函数
from sklearn.linear_model import LogisticRegression
logistic_reg = LogisticRegression()
# logistic_reg.fit(X, y)

# Softmax回归（Softmax Regression）
# Softmax回归计算每一类的一个概率，归为概率最大的一类, 损失函数使用交叉熵
softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10)
# softmax_reg.fit()