## sklearn.preprocessing.PolynomialFeatures

- **degree**: int or tuple (min_degree, max_degree), default=2


- **interaction_only**: bool, default=False  是否仅含交叉项（次数至多为degree）


- **include_bias**: bool, default=True 是否含常数项

In [27]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
X = np.arange(6).reshape(3, 2)
X

array([[0, 1],
       [2, 3],
       [4, 5]])

In [28]:
poly = PolynomialFeatures(interaction_only=True, include_bias=False) # 只有交叉项 x0, x1, x0*x1
poly.fit_transform(X)

array([[ 0.,  1.,  0.],
       [ 2.,  3.,  6.],
       [ 4.,  5., 20.]])

## 转换为多元线性回归

In [29]:
# 可以导入LinearRegression

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

X = np.array([[1, 1], 
              [1, 2], 
              [2, 2], 
              [2, 3]])
# y = 3 + 1 * x_0 + 2 * x_1
y = X @ np.array([1, 2]) + 3
poly = PolynomialFeatures(2, include_bias=False)
XX = poly.fit_transform(X)
print(XX)
print('-'*30)

reg = LinearRegression().fit(XX, y)

print(reg.score(XX, y))
print(reg.intercept_, reg.coef_)

print(reg.predict(poly.transform([[3, 5]])))

[[1. 1. 1. 1. 1.]
 [1. 2. 1. 2. 4.]
 [2. 2. 4. 4. 4.]
 [2. 3. 4. 6. 9.]]
------------------------------
1.0
3.6842105263157885 [ 0.18421053  1.78947368  0.55263158 -0.42105263  0.21052632]
[17.10526316]


In [1]:
# 可以导入statsmodels

import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures

X = np.array([[1, 1], 
              [1, 2], 
              [2, 2], 
              [2, 3]])
# y = 3 + 1 * x_1 + 2 * x_2
y = X @ np.array([1, 2]) + 3
poly = PolynomialFeatures(2, include_bias=True)
XX = poly.fit_transform(X)
print(XX)
print('-'*30)

res = sm.OLS(y, XX).fit() # 自动从1开始给x编号
print(res.params)
print(res.summary2())
# print(res.summary())

print(res.predict(poly.transform([[3, 5]])))


[[1. 1. 1. 1. 1. 1.]
 [1. 1. 2. 1. 2. 4.]
 [1. 2. 2. 4. 4. 4.]
 [1. 2. 3. 4. 6. 9.]]
------------------------------
[ 2.59259259  1.68518519  1.92592593 -0.12962963 -0.14814815  0.07407407]
                 Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     nan      
Dependent Variable: y                AIC:                -247.0090
Date:               2022-08-26 22:43 BIC:                -249.4639
No. Observations:   4                Log-Likelihood:     127.50   
Df Model:           3                F-statistic:        nan      
Df Residuals:       0                Prob (F-statistic): nan      
R-squared:          1.000            Scale:              inf      
---------------------------------------------------------------------
           Coef.     Std.Err.       t       P>|t|    [0.025    0.975]
---------------------------------------------------------------------
const      2.5926         inf     0.0000      nan       nan       nan
x1         1

  return np.dot(wresid, wresid) / self.df_resid
  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  * (1 - self.rsquared))
