In [2]:

import seaborn as sns

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.datasets import load_breast_cancer, load_boston
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.feature_selection import SelectPercentile, f_regression
from sklearn.preprocessing import MinMaxScaler, StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline


In [3]:
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data,
                                                    cancer.target,
                                                    random_state=0)


#%% ------------------------------------
'''6.0 普通の実装'''
# 訓練データの最小値と最大値を計算(標準化)
scaler = MinMaxScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train) # 訓練データをスケール変換
X_test_scaled = scaler.transform(X_test) # テストデータをスケール変換


In [4]:
svm = SVC()
svm.fit(X_train_scaled, y_train) # 学習

print('Test score:  {:.2f}'.format(svm.score(X_test_scaled, y_test)))



Test score:  0.95


In [10]:
pipe = Pipeline([('scaler', MinMaxScaler()), ('svm', SVC())]) # パイプの作成
pipe.fit(X_train, y_train) # 学習

print('Test score:', pipe.score(X_test, y_test)) # テストスコア


Test score: 0.951048951048951


In [11]:
param_grid = {'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],
            'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

# いつものようにグリッドサーチを行えば良い
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best cross-validation accuracy: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))
print("Best parameters: {}".format(grid.best_params_))


Best cross-validation accuracy: 0.98
Test set score: 0.97
Best parameters: {'svm__C': 1, 'svm__gamma': 1}
