In [17]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

## 0. Moon dataset 로드
X,y = make_moons(n_samples=500, noise=0.30, random_state=42) #데이터 생성
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [18]:
## 1. logistic Regression, Random Forest #50, #100, #200, SVM 직접 투표 분류기
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf_50 = RandomForestClassifier(n_estimators=50, random_state=42)
rnd_clf_100 = RandomForestClassifier(n_estimators=100, random_state=42)
rnd_clf_200 = RandomForestClassifier(n_estimators=200, random_state=42)
svm_clf = SVC(gamma="scale", random_state=42)

voting_clf = VotingClassifier(
    estimators= [('lr', log_clf), ('rf50',rnd_clf_50), ('rf100',rnd_clf_100), ('rf200',rnd_clf_200), ('svc', svm_clf)],
    voting = 'hard')

In [19]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf50',
                              RandomForestClassifier(n_estimators=50,
                                                     random_state=42)),
                             ('rf100', RandomForestClassifier(random_state=42)),
                             ('rf200',
                              RandomForestClassifier(n_estimators=200,
                                                     random_state=42)),
                             ('svc', SVC(random_state=42))])

In [21]:
## 1.2 각 분류기 성능 확인
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf_50, rnd_clf_100, rnd_clf_200, svm_clf, voting_clf) :
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    if (clf==rnd_clf_50) :
        print('Accuracy of ',clf.__class__.__name__,' with 50 trees : ', accuracy_score(y_test, y_pred))
    elif (clf==rnd_clf_100) :
        print('Accuracy of ',clf.__class__.__name__,' with 100 trees : ', accuracy_score(y_test, y_pred))
    elif (clf==rnd_clf_200) :
        print('Accuracy of ',clf.__class__.__name__,' with 200 trees : ', accuracy_score(y_test, y_pred))
    elif (clf==voting_clf) :
        print('Accuracy of Direct',clf.__class__.__name__,' : ', accuracy_score(y_test, y_pred))    
    else :
        print('Accuracy of ',clf.__class__.__name__,' : ' ,accuracy_score(y_test, y_pred))

Accuracy of  LogisticRegression  :  0.864
Accuracy of  RandomForestClassifier  with 50 trees :  0.896
Accuracy of  RandomForestClassifier  with 100 trees :  0.896
Accuracy of  RandomForestClassifier  with 200 trees :  0.896
Accuracy of  SVC  :  0.896
Accuracy of Direct VotingClassifier  :  0.896


In [22]:
## 2. logistic Regression, Random Forest #50, #100, #200, SVM 간접 투표 분류기
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf_50 = RandomForestClassifier(n_estimators=50, random_state=42)
rnd_clf_100 = RandomForestClassifier(n_estimators=100, random_state=42)
rnd_clf_200 = RandomForestClassifier(n_estimators=200, random_state=42)
svm_clf = SVC(gamma="scale", random_state=42, probability = True)

voting_clf = VotingClassifier(
    estimators= [('lr', log_clf), ('rf50',rnd_clf_50), ('rf100',rnd_clf_100), ('rf200',rnd_clf_200), ('svc', svm_clf)],
    voting = 'soft')

In [23]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf50',
                              RandomForestClassifier(n_estimators=50,
                                                     random_state=42)),
                             ('rf100', RandomForestClassifier(random_state=42)),
                             ('rf200',
                              RandomForestClassifier(n_estimators=200,
                                                     random_state=42)),
                             ('svc', SVC(probability=True, random_state=42))],
                 voting='soft')

In [24]:
## 2.2 각 분류기 성능 확인
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf_50, rnd_clf_100, rnd_clf_200, svm_clf, voting_clf) :
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    if (clf==rnd_clf_50) :
        print('Accuracy of ',clf.__class__.__name__,' with 50 trees : ', accuracy_score(y_test, y_pred))
    elif (clf==rnd_clf_100) :
        print('Accuracy of ',clf.__class__.__name__,' with 100 trees : ', accuracy_score(y_test, y_pred))
    elif (clf==rnd_clf_200) :
        print('Accuracy of ',clf.__class__.__name__,' with 200 trees : ', accuracy_score(y_test, y_pred))
    elif (clf==voting_clf) :
        print('Accuracy of Indirect',clf.__class__.__name__,' : ', accuracy_score(y_test, y_pred))
    else :
        print('Accuracy of ',clf.__class__.__name__,' : ' ,accuracy_score(y_test, y_pred))

Accuracy of  LogisticRegression  :  0.864
Accuracy of  RandomForestClassifier  with 50 trees :  0.896
Accuracy of  RandomForestClassifier  with 100 trees :  0.896
Accuracy of  RandomForestClassifier  with 200 trees :  0.896
Accuracy of  SVC  :  0.896
Accuracy of Indirect VotingClassifier  :  0.92
