# 로지스틱 회귀분석 
- 종속변수가 범주형 자료일 경우 적용하는 회귀모델
- sklearn.linear_model.LogisticRegression

# 데이터 불러오기

In [2]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd

df = pd.read_csv('../datasets/빅분기 실기 데이터/breast-cancer-wisconsin.csv', encoding='utf-8')

In [3]:
X = df.drop('Class', axis=1)
y = df[['Class']]

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=11)

# MinMaxScaler

In [5]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

# 모델 적용

In [6]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_scaled_train, y_train)

# 예측 & 결과 확인

In [7]:
pred = lr.predict(X_scaled_test)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
accuracy_score(y_test, pred)

0.9658536585365853

In [8]:
confusion = confusion_matrix(y_test, pred)
confusion

array([[130,   3],
       [  4,  68]], dtype=int64)

In [9]:
cfreport_test = classification_report(y_test, pred)
print('분류예측 레포트:\n', cfreport_test)

분류예측 레포트:
               precision    recall  f1-score   support

           0       0.97      0.98      0.97       133
           1       0.96      0.94      0.95        72

    accuracy                           0.97       205
   macro avg       0.96      0.96      0.96       205
weighted avg       0.97      0.97      0.97       205



# Grid Search
- 원래도 성능이 좋은 편이었지만 조금 오르긴 함 !

In [10]:
param_grid = {'C':[0.001, 0.01, 0.1, 1, 10, 100]}

from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(lr, param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)

In [12]:
print('Best Parameter: {}'.format(grid_search.best_params_))
print('Best Score: {:.4f}'.format(grid_search.best_score_))
print('TestSet Score: {:.4f}'.format(grid_search.score(X_scaled_test, y_test)))

Best Parameter: {'C': 10}
Best Score: 0.9686
TestSet Score: 0.9756
