In [17]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

cancer = load_breast_cancer()

In [18]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(cancer.data)

X_train,X_test, y_train, y_test = train_test_split(data_scaled, cancer.target,
                                                  test_size=0.3, random_state=0)

In [21]:
from sklearn.metrics import accuracy_score, roc_auc_score

lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
lr_preds = lr_clf.predict(X_test)

print('accuracy: {:0.3f}'.format(accuracy_score(y_test, lr_preds)))
print('roc_auc: {:0.3f}'.format(roc_auc_score(y_test, lr_preds)))

accuracy: 0.977
roc_auc: 0.972


In [28]:
from sklearn.model_selection import GridSearchCV

params = {'penalty': ['l2','l1'],
         'C':[0.01,0.1,1,1,5,10]}

grid_clf = GridSearchCV(lr_clf, param_grid=params, scoring='accuracy', cv=3)
grid_clf.fit(data_scaled, cancer.target)
print(f'최적 하이퍼 파라미터: {grid_clf.best_params_}')
print(f'최적 평균 정확도: {grid_clf.best_score_}')

pred = grid_clf.best_estimator_.predict(X_test)
accuracy_test = accuracy_score(y_test,pred)
print(f'테스트 데이터 정확도: {accuracy_test:.4f}')

최적 하이퍼 파라미터: {'C': 1, 'penalty': 'l2'}
최적 평균 정확도: 0.975392184164114
테스트 데이터 정확도: 0.9883
