### 라이브러리 불러오기

In [30]:
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.preprocessing import StandardScaler

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


import warnings
warnings.filterwarnings('ignore')
dataset = load_breast_cancer()

In [11]:
scaler = StandardScaler()
dataset_scaled = scaler.fit_transform(dataset.data)

X_train, X_test, y_train, y_test = \
train_test_split(dataset_scaled
                 , dataset.target
                 , test_size=0.3
                 , random_state=0)

In [19]:
lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
lr_pred = lr_clf.predict(X_test)
lr_pred_proba = lr_clf.predict_proba(X_test)[:, 1]

print(f'''
▶ accuracy
{accuracy_score(y_test, lr_pred):.4f}

▶ roc_auc
{roc_auc_score(y_test, lr_pred):.4f} 
''')



▶ accuracy
0.9766

▶ roc_auc
0.9716 



In [29]:
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga']

for solver in solvers:
    lr_clf = LogisticRegression(solver = solver, max_iter = 600)
    lr_clf.fit(X_train, y_train)
    lr_preds = lr_clf.predict(X_test)
    lr_preds_proba = lr_clf.predict_log_proba(X_test)[:, 1]

    # accuracy와 roc-auc 측정
    print(
f'''
▶ solver : {solver}          
    ▶ accuracy_pred
        {accuracy_score(y_test, lr_preds):.4f}

    ▶ roc_auc_pred
        {roc_auc_score(y_test, lr_preds):.4f}

    ▶ roc_auc_pred_proba
        {roc_auc_score(y_test, lr_preds_proba):.4f}

'''
)




▶ solver : lbfgs          
    ▶ accuracy_pred
        0.9766

    ▶ roc_auc_pred
        0.9716

    ▶ roc_auc_pred_proba
        0.9947



▶ solver : liblinear          
    ▶ accuracy_pred
        0.9825

    ▶ roc_auc_pred
        0.9795

    ▶ roc_auc_pred_proba
        0.9947



▶ solver : newton-cg          
    ▶ accuracy_pred
        0.9766

    ▶ roc_auc_pred
        0.9716

    ▶ roc_auc_pred_proba
        0.9947



▶ solver : sag          
    ▶ accuracy_pred
        0.9825

    ▶ roc_auc_pred
        0.9795

    ▶ roc_auc_pred_proba
        0.9947



▶ solver : saga          
    ▶ accuracy_pred
        0.9825

    ▶ roc_auc_pred
        0.9795

    ▶ roc_auc_pred_proba
        0.9947




In [58]:
# c = alpha의 역수 (작은수록 규제가 크다)
# liblinear, saga : l1, l2 가능
# lbfgs, newton-cg, sag : l2만 가능
params = [{
            'solver' : ['liblinear']
            , 'penalty' : ['l2', 'l1']
            , 'C' : [0.01, 0.1, 1, 5, 10, 100]
            }
        , {
            'solver' : ['lbfgs', 'newton-cg']
            , 'penalty' : ['l2']
            , 'C' : [0.1, 1, 5 ,10, 100]
        }]

lr_clf = LogisticRegression()

grid_clf = GridSearchCV(lr_clf
                        , param_grid = params
                        , scoring = 'accuracy'
                        , cv = 3)

grid_clf.fit(dataset_scaled, dataset.target)
print(
f'''
best_params : {grid_clf.best_params_}
best_score : {grid_clf.best_score_:.4f}
'''
)


best_params : {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}
best_score : 0.9789



In [59]:
best_param = grid_clf.best_params_

best_lr_clf = LogisticRegression(**best_param)
best_lr_clf.fit(X_train, y_train)

lr_preds = best_lr_clf.predict(X_test)
lr_preds_proba = best_lr_clf.predict_log_proba(X_test)[:, 1]

print(f'''
accuracy : {accuracy_score(y_test, lr_pred):.4f}
roc-auc : {roc_auc_score(y_test, lr_pred):.4f}
roc-auc_proba : {roc_auc_score(y_test, lr_pred_proba):.4f}
      ''')



accuracy : 0.9766
roc-auc : 0.9716
roc-auc_proba : 0.9947
      
