https://hleecaster.com/ml-logistic-regression-concept/

In [39]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=0)


In [34]:
params = {
    'penalty':['l1','l2'],
    'C':[0.01,0.1,1,3,5,7,10]
}

lr_clf = LogisticRegression()
grid_clf = GridSearchCV(lr_clf,param_grid=params,cv=3,refit=True,scoring='accuracy')
grid_clf.fit(X_train,y_train)

GridSearchCV(cv=3, estimator=LogisticRegression(),
             param_grid={'C': [0.01, 0.1, 1, 3, 5, 7, 10],
                         'penalty': ['l1', 'l2']},
             scoring='accuracy')

In [35]:
def scores(y_test,pred=None,pred_proba=None):
    con_mat = confusion_matrix(y_test,pred)
    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test,pred)
    rec = recall_score(y_test,pred)
    f1 = f1_score(y_test,pred)
    roc_auc = roc_auc_score(y_test,pred_proba)
    print(f'오차행렬 : \n{con_mat}\n정확도 : {acc:.4f}, 정밀도 : {pre:.4f}, 재현율 : {rec:.4f}, f1 점수 : {f1:.4f}, roc_auc : {roc_auc:.4f}')


In [44]:
lr_clf.fit(X_train,y_train)
pred = lr_clf.predict(X_test)
pred_proba = lr_clf.predict_proba(X_test)[:,1]
print('테스트데이터 적용 점수')
scores(y_test,pred,pred_proba)

테스트데이터 적용 점수
오차행렬 : 
[[45  2]
 [ 4 63]]
정확도 : 0.9474, 정밀도 : 0.9692, 재현율 : 0.9403, f1 점수 : 0.9545, roc_auc : 0.9940


In [36]:
print('최적 파라미터 : ',grid_clf.best_params_)
print('최적 평균 정확도',grid_clf.best_score_,'\n')

best_grid = grid_clf.best_estimator_
pred = best_grid.predict(X_test)
pred_proba = best_grid.predict_proba(X_test)[:,1]

print('테스트데이터 적용 점수')
scores(y_test,pred,pred_proba)

최적 파라미터 :  {'C': 5, 'penalty': 'l2'}
최적 평균 정확도 0.9494742651330311 

테스트데이터 적용 점수
오차행렬 : 
[[44  3]
 [ 3 64]]
정확도 : 0.9474, 정밀도 : 0.9552, 재현율 : 0.9552, f1 점수 : 0.9552, roc_auc : 0.9898
