## 1 网格搜索法

In [2]:
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn import svm, datasets

In [4]:
iris = datasets.load_iris()
svc = svm.SVC()

# search space
param_grid = [
    {'C': [1, 10, 100, 100], 'kernel': ['linear', 'rbf']},
    {'C': [1, 10, 100, 100], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
# cross verify
cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)

#scoring
scoring = 'accuracy'

clf = GridSearchCV(svc, param_grid, cv=cv, scoring=scoring)
clf.fit(iris.data, iris.target) # get the clf which had optimized 

print(clf.predict(iris.data))

print(clf.get_params())
print(clf.best_params_)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
{'cv': ShuffleSplit(n_splits=3, random_state=0, test_size=0.3, train_size=None), 'error_score': 'raise-deprecating', 'estimator__C': 1.0, 'estimator__cache_size': 200, 'estimator__class_weight': None, 'estimator__coef0': 0.0, 'estimator__decision_function_shape': 'ovr', 'estimator__degree': 3, 'estimator__gamma': 'auto_deprecated', 'estimator__kernel': 'rbf', 'estimator__max_iter': -1, 'estimator__probability': False, 'estimator__random_state': None, 'estimator__shrinking': True, 'estimator__tol': 0.001, 'estimator__verbose': False, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', m



## 2 随机搜索法

In [5]:
from sklearn.model_selection import ParameterSampler
from scipy.stats.distributions import expon
import numpy as np
np.random.seed(0)
param_grid = {'a':[1, 2], 'b': expon()}
param_list = list(ParameterSampler(param_grid, n_iter=4))
param_list

[{'a': 1, 'b': 0.8985603929935616},
 {'a': 1, 'b': 0.9232231458040688},
 {'a': 2, 'b': 1.8789640641973517},
 {'a': 2, 'b': 1.0381592949436094}]

In [7]:
from sklearn.model_selection import RandomizedSearchCV

param_distribution = {'C': [1, 10, 100, 100], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}
clf2 = RandomizedSearchCV(svc, param_distributions=param_distribution,
                          cv=cv, scoring=scoring)
clf2.fit(iris.data, iris.target)
print(clf2.predict(iris.data))
print(clf2.best_params_)
print(clf2.best_score_)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
{'kernel': 'rbf', 'gamma': 0.001, 'C': 100}
0.9703703703703703




## 3 模型特定交叉验证

自带cv的模型

In [3]:
from sklearn.model_selection import train_test_split
boston = datasets.load_boston()


In [6]:
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
                                                    test_size=1/3)
len(X_train),len(X_test)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(boston.data)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


1.0


In [10]:
from sklearn.linear_model import Lasso
lareg = Lasso()
lareg.fit(X_train, y_train)
lareg.score(X_test, y_test)

0.2163419644325999

In [11]:
from sklearn.linear_model import LassoCV
lacvreg = LassoCV()
lacvreg.fit(X_train, y_train)
lacvreg.score(X_test, y_test)



0.7215398926562696

## 4 信息准则化