In [1]:
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor, BaggingRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import ElasticNet, SGDRegressor, BayesianRidge
from sklearn.kernel_ridge import KernelRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor

  from numpy.core.umath_tests import inner1d


In [43]:
import pickle
def getData(dataFilePath):
    """读取数据"""
    data = pickle.load(open(dataFilePath, 'rb'))

    return data 

In [17]:
import numpy as np
def cv_rmse(model, X, y):
    # cross_val_score函数用法：https://www.cnblogs.com/lzhc/p/9175707.html
    rmse = np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5))
    return rmse

In [13]:
dataFilePath = r"C:\Study\github\Lookoops\tool\毕设代码\data\samples-data.data"
labelsFilePath = r"C:\Study\github\Lookoops\tool\毕设代码\data\samples-data-labels.data"
X = getData(dataFilePath)
Y = getData(labelsFilePath)
print(X)
print("*"*20)
print(Y)

[[ 6.31954246e+01  1.16578449e+02  6.98894843e+01]
 [ 8.96001652e+00  1.41215285e+02 -1.12979424e+02]
 [ 7.56421979e+01  1.03431322e+02  4.41317015e+01]
 [ 9.09808083e+01  7.79844485e+01 -2.73052881e+02]
 [ 9.47943781e+01 -1.97866533e+02 -1.70865126e+02]
 [ 5.38311535e+01 -3.54532890e+00 -1.84232292e+02]
 [ 6.42194047e+01 -1.52638292e+02  9.15704278e+01]
 [ 7.87060818e+01 -9.34009118e+01 -1.52100043e+02]
 [ 8.93757537e+01  5.41237730e+01 -1.96527505e+00]
 [ 9.81984501e+01 -1.52191871e+01  5.97763387e+01]
 [ 3.13582137e+01  4.59956256e+00 -2.55640400e+02]
 [ 8.14820849e+01 -5.89837955e+01  2.27786406e+01]
 [ 1.42010773e+01  3.46486834e+01 -8.74078267e+01]
 [ 8.58754111e+01 -1.76506074e+02 -1.35603768e+02]
 [ 3.62540432e+01  4.65869013e+01 -5.54268630e+01]
 [ 4.56887366e+01  8.10900244e+01 -2.16366559e+02]
 [ 2.36450057e+01  1.96170486e+02 -2.81379069e+01]
 [ 5.23303370e+01  1.78331295e+01 -7.03501181e+01]
 [ 1.85819214e+01 -1.70765243e+02 -1.53894253e+02]
 [ 2.10091917e+01  1.76286870e+

In [14]:
models = [LinearRegression(),
          Ridge(), # http://www.cnblogs.com/pinard/p/6023000.html
          Lasso(alpha=0.01,max_iter=10000), # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html
          RandomForestRegressor(), # https://scikit-learn.org/dev/modules/generated/sklearn.ensemble.RandomForestRegressor.html
          GradientBoostingRegressor(), # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
          SVR(), # https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR
          LinearSVR(), # https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVR.html
          ElasticNet(alpha=0.001,max_iter=10000), # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html
          SGDRegressor(max_iter=10000,tol=1e-3), # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html
          BayesianRidge(), # 
          KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5), # https://scikit-learn.org/stable/modules/generated/sklearn.kernel_ridge.KernelRidge.html
         ExtraTreesRegressor(), # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesRegressor.html
          XGBRegressor(), 
          AdaBoostRegressor(n_estimators=50), # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html
          BaggingRegressor(), # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingRegressor.html
          DecisionTreeRegressor(), #https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
          KNeighborsRegressor()] # https://scikit-learn.org/0.18/modules/generated/sklearn.neighbors.KNeighborsRegressor.html

In [18]:
names = ["LR", "Ridge", "Lasso", "RF", "GBR", "SVR", "LinSVR", "Ela","SGD","Bay","Ker","Extra","Xgb", "AdaBoost", "Bagging", "DT", "KN"]
for name, model in zip(names, models):
    score = cv_rmse(model, X, Y)
    print("{}: {:.6f}, {:.4f}".format(name,score.mean(),score.std()))

LR: 0.301518, 0.0821
Ridge: 0.301516, 0.0821
Lasso: 0.301532, 0.0822
RF: 0.287433, 0.0863
GBR: 0.317927, 0.1000
SVR: 0.302752, 0.0593
LinSVR: 0.475997, 0.1870
Ela: 0.301519, 0.0821
SGD: 60536153132001.898438, 38540856799771.3672
Bay: 0.300911, 0.0802
Ker: 0.281050, 0.0960
Extra: 0.297462, 0.0690
Xgb: 0.315075, 0.0811
AdaBoost: 0.270952, 0.0751
Bagging: 0.305953, 0.0889
DT: 0.395787, 0.1398
KN: 0.285826, 0.0911


In [31]:
import pandas as pd
class grid():
    def __init__(self, model):
        self.model = model
        
    def grid_train(self, X, y, train_para):
        grid_search = GridSearchCV(self.model, train_para, cv=5, scoring="neg_mean_squared_error", return_train_score=True)
        grid_search.fit(X, y)
        print(grid_search.best_params_, np.sqrt(-grid_search.best_score_)) # 打印最好的结果
        grid_search.cv_results_['mean_test_score'] = np.sqrt(-grid_search.cv_results_['mean_test_score'])
        print(pd.DataFrame(grid_search.cv_results_)[['params','mean_test_score','std_test_score']])

In [32]:
grid(Lasso()).grid_train(X,Y,{'alpha': [0.002, 0.0003, 0.00035, 0.0004,0.0005,0.0007,0.0006,0.0009,0.0008], 'max_iter':[10000]})

{'alpha': 0.0003, 'max_iter': 10000} 0.3125002305202548
                                  params  mean_test_score  std_test_score
0    {'alpha': 0.002, 'max_iter': 10000}         0.312507        0.056538
1   {'alpha': 0.0003, 'max_iter': 10000}         0.312500        0.056514
2  {'alpha': 0.00035, 'max_iter': 10000}         0.312500        0.056514
3   {'alpha': 0.0004, 'max_iter': 10000}         0.312501        0.056515
4   {'alpha': 0.0005, 'max_iter': 10000}         0.312501        0.056517
5   {'alpha': 0.0007, 'max_iter': 10000}         0.312502        0.056520
6   {'alpha': 0.0006, 'max_iter': 10000}         0.312501        0.056518
7   {'alpha': 0.0009, 'max_iter': 10000}         0.312503        0.056522
8   {'alpha': 0.0008, 'max_iter': 10000}         0.312502        0.056521


In [33]:
grid(Ridge()).grid_train(X,Y,{'alpha':[i for i in range(10, 20)]})

{'alpha': 19} 0.3124639542319715
          params  mean_test_score  std_test_score
0  {'alpha': 10}         0.312481        0.056515
1  {'alpha': 11}         0.312479        0.056515
2  {'alpha': 12}         0.312477        0.056516
3  {'alpha': 13}         0.312475        0.056516
4  {'alpha': 14}         0.312473        0.056517
5  {'alpha': 15}         0.312471        0.056517
6  {'alpha': 16}         0.312469        0.056518
7  {'alpha': 17}         0.312468        0.056519
8  {'alpha': 18}         0.312466        0.056519
9  {'alpha': 19}         0.312464        0.056520


In [35]:
grid(GradientBoostingRegressor()).grid_train(X,Y,{'learning_rate':[float(i/10) for i in range(1, 10)]})

{'learning_rate': 0.7} 0.31080565899217655
                   params  mean_test_score  std_test_score
0  {'learning_rate': 0.1}         0.333570        0.060029
1  {'learning_rate': 0.2}         0.343882        0.067969
2  {'learning_rate': 0.3}         0.326863        0.057680
3  {'learning_rate': 0.4}         0.350251        0.052702
4  {'learning_rate': 0.5}         0.318825        0.050991
5  {'learning_rate': 0.6}         0.321759        0.045494
6  {'learning_rate': 0.7}         0.310806        0.054882
7  {'learning_rate': 0.8}         0.362453        0.055659
8  {'learning_rate': 0.9}         0.333724        0.068913


In [36]:
grid(SVR()).grid_train(X,Y,
                       {
                           'kernel':['rbf'], 
                           'gamma':[0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5],
                           'epsilon':[0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5, 1, 10]
                       })

{'epsilon': 0.0005, 'gamma': 0.0005, 'kernel': 'rbf'} 0.2912746008275748
                                               params  mean_test_score  \
0   {'epsilon': 0.0005, 'gamma': 0.0005, 'kernel':...         0.291275   
1   {'epsilon': 0.0005, 'gamma': 0.001, 'kernel': ...         0.299044   
2   {'epsilon': 0.0005, 'gamma': 0.005, 'kernel': ...         0.307046   
3   {'epsilon': 0.0005, 'gamma': 0.01, 'kernel': '...         0.308853   
4   {'epsilon': 0.0005, 'gamma': 0.05, 'kernel': '...         0.309139   
5   {'epsilon': 0.0005, 'gamma': 0.1, 'kernel': 'r...         0.309139   
6   {'epsilon': 0.0005, 'gamma': 0.5, 'kernel': 'r...         0.309139   
7   {'epsilon': 0.001, 'gamma': 0.0005, 'kernel': ...         0.291283   
8   {'epsilon': 0.001, 'gamma': 0.001, 'kernel': '...         0.299068   
9   {'epsilon': 0.001, 'gamma': 0.005, 'kernel': '...         0.307042   
10  {'epsilon': 0.001, 'gamma': 0.01, 'kernel': 'r...         0.308847   
11  {'epsilon': 0.001, 'gamma': 0.05, '

In [37]:
grid(LinearSVR()).grid_train(X,Y,{'epsilon':[0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5], 'loss':['epsilon_insensitive', 'squared_epsilon_insensitive']})

{'epsilon': 0.1, 'loss': 'epsilon_insensitive'} 0.3123885476004754
                                               params  mean_test_score  \
0   {'epsilon': 0.0005, 'loss': 'epsilon_insensiti...         0.386303   
1   {'epsilon': 0.0005, 'loss': 'squared_epsilon_i...         0.445687   
2   {'epsilon': 0.001, 'loss': 'epsilon_insensitive'}         0.529021   
3   {'epsilon': 0.001, 'loss': 'squared_epsilon_in...         0.460220   
4   {'epsilon': 0.005, 'loss': 'epsilon_insensitive'}         0.430560   
5   {'epsilon': 0.005, 'loss': 'squared_epsilon_in...         0.423765   
6    {'epsilon': 0.01, 'loss': 'epsilon_insensitive'}         0.327939   
7   {'epsilon': 0.01, 'loss': 'squared_epsilon_ins...         0.356326   
8    {'epsilon': 0.05, 'loss': 'epsilon_insensitive'}         0.385905   
9   {'epsilon': 0.05, 'loss': 'squared_epsilon_ins...         0.406980   
10    {'epsilon': 0.1, 'loss': 'epsilon_insensitive'}         0.312389   
11  {'epsilon': 0.1, 'loss': 'squared_epsilon

In [38]:
grid(GradientBoostingRegressor()).grid_train(X,Y,{'learning_rate':[float(i/10) for i in range(1, 10)]})

{'learning_rate': 0.7} 0.32115863826646757
                   params  mean_test_score  std_test_score
0  {'learning_rate': 0.1}         0.334973        0.059929
1  {'learning_rate': 0.2}         0.340356        0.066444
2  {'learning_rate': 0.3}         0.335229        0.064866
3  {'learning_rate': 0.4}         0.346177        0.054074
4  {'learning_rate': 0.5}         0.322688        0.049329
5  {'learning_rate': 0.6}         0.329053        0.051246
6  {'learning_rate': 0.7}         0.321159        0.057205
7  {'learning_rate': 0.8}         0.360820        0.056111
8  {'learning_rate': 0.9}         0.355561        0.075243


In [39]:
grid(LinearSVR()).grid_train(X,Y,{'epsilon':[0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5], 'loss':['epsilon_insensitive', 'squared_epsilon_insensitive']})

{'epsilon': 0.05, 'loss': 'squared_epsilon_insensitive'} 0.32893321342903825
                                               params  mean_test_score  \
0   {'epsilon': 0.0005, 'loss': 'epsilon_insensiti...         0.422425   
1   {'epsilon': 0.0005, 'loss': 'squared_epsilon_i...         0.483114   
2   {'epsilon': 0.001, 'loss': 'epsilon_insensitive'}         0.412976   
3   {'epsilon': 0.001, 'loss': 'squared_epsilon_in...         0.486687   
4   {'epsilon': 0.005, 'loss': 'epsilon_insensitive'}         0.373233   
5   {'epsilon': 0.005, 'loss': 'squared_epsilon_in...         0.362462   
6    {'epsilon': 0.01, 'loss': 'epsilon_insensitive'}         0.344477   
7   {'epsilon': 0.01, 'loss': 'squared_epsilon_ins...         0.440366   
8    {'epsilon': 0.05, 'loss': 'epsilon_insensitive'}         0.421990   
9   {'epsilon': 0.05, 'loss': 'squared_epsilon_ins...         0.328933   
10    {'epsilon': 0.1, 'loss': 'epsilon_insensitive'}         0.410859   
11  {'epsilon': 0.1, 'loss': 'squar

In [40]:
grid(ElasticNet()).grid_train(X,Y,{'alpha':[0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5],'l1_ratio':[0.08,0.1,0.3,0.5,0.7],'max_iter':[10000]})

{'alpha': 0.0005, 'l1_ratio': 0.08, 'max_iter': 10000} 0.31249914377335236
                                               params  mean_test_score  \
0   {'alpha': 0.0005, 'l1_ratio': 0.08, 'max_iter'...         0.312499   
1   {'alpha': 0.0005, 'l1_ratio': 0.1, 'max_iter':...         0.312499   
2   {'alpha': 0.0005, 'l1_ratio': 0.3, 'max_iter':...         0.312500   
3   {'alpha': 0.0005, 'l1_ratio': 0.5, 'max_iter':...         0.312500   
4   {'alpha': 0.0005, 'l1_ratio': 0.7, 'max_iter':...         0.312500   
5   {'alpha': 0.001, 'l1_ratio': 0.08, 'max_iter':...         0.312499   
6   {'alpha': 0.001, 'l1_ratio': 0.1, 'max_iter': ...         0.312499   
7   {'alpha': 0.001, 'l1_ratio': 0.3, 'max_iter': ...         0.312500   
8   {'alpha': 0.001, 'l1_ratio': 0.5, 'max_iter': ...         0.312501   
9   {'alpha': 0.001, 'l1_ratio': 0.7, 'max_iter': ...         0.312502   
10  {'alpha': 0.005, 'l1_ratio': 0.08, 'max_iter':...         0.312500   
11  {'alpha': 0.005, 'l1_ratio': 0.1,

In [44]:
# grid(SGDRegressor()).grid_train(X,Y,{'alpha':[0.005, 0.01, 0.05, 0.1,0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 5],'l1_ratio':[0.08,0.1,0.3,0.5,0.7, 0.8, 0.9, 1]})

In [42]:
grid(KernelRidge()).grid_train(X,Y,{'alpha':[0.05, 0.1, 0.3,0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 5], 'kernel':['polynomial'], 'coef0':[1, 1.2, 1.5, 1.6, 1.8, 1.9, 2, 2.2, 2.5, 3]})

{'alpha': 5, 'coef0': 1, 'kernel': 'polynomial'} 1.3482243983736875
                                                params  mean_test_score  \
0    {'alpha': 0.05, 'coef0': 1, 'kernel': 'polynom...         1.393141   
1    {'alpha': 0.05, 'coef0': 1.2, 'kernel': 'polyn...         1.406248   
2    {'alpha': 0.05, 'coef0': 1.5, 'kernel': 'polyn...         1.436718   
3    {'alpha': 0.05, 'coef0': 1.6, 'kernel': 'polyn...         1.444667   
4    {'alpha': 0.05, 'coef0': 1.8, 'kernel': 'polyn...         1.447823   
5    {'alpha': 0.05, 'coef0': 1.9, 'kernel': 'polyn...         1.462174   
6    {'alpha': 0.05, 'coef0': 2, 'kernel': 'polynom...         1.473303   
7    {'alpha': 0.05, 'coef0': 2.2, 'kernel': 'polyn...         1.478813   
8    {'alpha': 0.05, 'coef0': 2.5, 'kernel': 'polyn...         1.494026   
9    {'alpha': 0.05, 'coef0': 3, 'kernel': 'polynom...         1.499100   
10   {'alpha': 0.1, 'coef0': 1, 'kernel': 'polynomi...         1.377583   
11   {'alpha': 0.1, 'coef0': 1.2