In [1]:
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

cancer = load_breast_cancer()
data_df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
data_df.head(3)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758


In [15]:
# 개별 모델로서 로지스틱 회귀와 KNN
lr_clf = LogisticRegression(max_iter=5000)
knn_clf = KNeighborsClassifier(n_neighbors=8)
# 개별 모델을 소프트 보팅 기반의 앙상블 모델로 구현한 분류기
vo_clf = VotingClassifier(estimators=[('LR', lr_clf), ('KNN', knn_clf)], voting='soft')
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=.2, random_state=100)
# VotingClassifier 학습/예측 평가
print(X_test)
# vo_clf.fit(X_train, y_train)
# pred = vo_clf.predict(X_test)
# print('Voting Classifier 정확도: {0: .4f}'.format(accuracy_score(y_test, pred)))
# # 개별 모델의 학습/예측/평가
# classifiers = [lr_clf, knn_clf]
# for classifier in classifiers:
#     classifier.fit(X_train, y_train)
#     pred = classifier.predict(X_test)
#     class_name = classifier.__class__.__name__
#     print('{0} 정확도: {1: .4f}'.format(class_name, accuracy_score(y_test, pred)))

[[1.791e+01 2.102e+01 1.244e+02 ... 1.964e-01 3.245e-01 1.198e-01]
 [1.434e+01 1.347e+01 9.251e+01 ... 1.087e-01 3.062e-01 6.072e-02]
 [2.016e+01 1.966e+01 1.311e+02 ... 1.425e-01 3.055e-01 5.933e-02]
 ...
 [1.303e+01 1.842e+01 8.261e+01 ... 5.013e-02 1.987e-01 6.169e-02]
 [1.825e+01 1.998e+01 1.196e+02 ... 1.932e-01 3.063e-01 8.368e-02]
 [9.847e+00 1.568e+01 6.300e+01 ... 6.528e-02 2.502e-01 9.209e-02]]


In [11]:
vo_clf.fit(X_train, y_train)
lr_clf.fit(X_train, y_train)
knn_clf.fit(X_train, y_train)
import pickle

pickle.dump(lr_clf, open('models/lr_model.pkl', 'wb'))
pickle.dump(knn_clf, open('models/knn_model.pkl', 'wb'))
pickle.dump(vo_clf, open('models/voting_model.pkl', 'wb'))

In [12]:
with open('models/lr_model.pkl', 'rb') as f:
    model = pickle.load(f)

pred = model.predict(X_test)
print(f'Rogistic Regression Classifier 정확도: {accuracy_score(y_test, pred)}')

Rogistic Regression Classifier 정확도: 0.956140350877193


In [14]:
lr_model = 'models/lr_model.pkl'
knn_model = 'models/knn_model.pkl'
voting_model = 'models/voting_model.pkl'
models_list = [lr_model,knn_model,voting_model ]


for model_file in models_list:
    with open(model_file, 'rb') as f:
        model = pickle.load(f)
        pred = model.predict(X_test)
        print(f'{model_file[7:-4]}의 정확도는 {accuracy_score(y_test, pred)}')

lr_model의 정확도는 0.956140350877193
knn_model의 정확도는 0.9473684210526315
voting_model의 정확도는 0.9649122807017544
