In [9]:
#required imports

import os
import sys
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from combo.models.classifier_comb import SimpleClassifierAggregator
from combo.utils.data import evaluate_print

import warnings

In [3]:
warnings.filterwarnings("ignore")

In [4]:
#define data file and read in X and y
if __name__ == "__main__":
    random_state = 42
    X, y = load_breast_cancer(return_X_y = True)
    
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.4, random_state = random_state)

In [17]:
    #fit and predict data with decision tree
    clf = DecisionTreeClassifier(random_state = random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Decision Tree        |', y_test, clf.predict(X_test))

    #fit and predict data with logistic regression
    clf = LogisticRegression(random_state = random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Logistic Regression  |', y_test, clf.predict(X_test))

    #fit and predict data with KNeighbor
    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train)
    evaluate_print('K Neighbors          |', y_test, clf.predict(X_test))

    #fit and predict data with GradientBoosting
    clf = GradientBoostingClassifier(random_state = random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Gradient Boosting    |', y_test, clf.predict(X_test))

    #fit and predict data with random forest
    clf = RandomForestClassifier(random_state = random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Random Forest        |', y_test, clf.predict(X_test))

    print()
    
    #initialize group of classifiers
    classifiers = [DecisionTreeClassifier(random_state = random_state),
                   LogisticRegression(random_state = random_state),
                   KNeighborsClassifier(),
                   RandomForestClassifier(random_state = random_state),
                   GradientBoostingClassifier(random_state = random_state)]
    
    #combine by averaging
    #first line combines all models defined in 'classifiers'
    clf = SimpleClassifierAggregator(classifiers, method = 'average')
    #trains combined models
    clf.fit(X_train, y_train)
    #predict values in combined model
    y_test_predicted = clf.predict(X_test)
    #print evaluation like above
    evaluate_print('Combination by avg   |', y_test, y_test_predicted)

    #combine by weighted averaging
    #define weights
    clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2])
    #combine models
    clf = SimpleClassifierAggregator(classifiers, method = 'average', weights = clf_weights)
    #train models
    clf.fit(X_train, y_train)
    #predict
    y_test_predicted = clf.predict(X_test)
    #print
    evaluate_print('Combination by w_avg |', y_test, y_test_predicted)
    
    #combine by maximization
    #combine models
    clf = SimpleClassifierAggregator(classifiers, method = 'maximization')
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict(X_test)
    evaluate_print('Combination by max   |', y_test, y_test_predicted)
    
    #combine by weighted majority
    clf = SimpleClassifierAggregator(classifiers, method = 'majority_vote', weights = clf_weights)
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict(X_test)
    evaluate_print('Combination by w_vote|', y_test, y_test_predicted)
    
    #combine by median
    clf = SimpleClassifierAggregator(classifiers, method = 'median')
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict(X_test)
    evaluate_print('Combination by median|', y_test, y_test_predicted)

Decision Tree        | Accuracy:0.9386, ROC:0.9383, F1:0.9521
Logistic Regression  | Accuracy:0.9649, ROC:0.9586, F1:0.9732
K Neighbors          | Accuracy:0.9561, ROC:0.9519, F1:0.9662
Gradient Boosting    | Accuracy:0.9605, ROC:0.9524, F1:0.9699
Random Forest        | Accuracy:0.9781, ROC:0.9716, F1:0.9833

Combination by avg   | Accuracy:0.9825, ROC:0.9779, F1:0.9866
[[0.5 2.  0.5 1.  1. ]]
Combination by w_avg | Accuracy:0.9781, ROC:0.9688, F1:0.9834
Combination by max   | Accuracy:0.9474, ROC:0.925, F1:0.961
[[0.5 2.  0.5 1.  1. ]]
Combination by w_vote| Accuracy:0.9781, ROC:0.9774, F1:0.9831
Combination by median| Accuracy:0.9825, ROC:0.9779, F1:0.9866


Combination by avg   | Accuracy:0.9825, ROC:0.9779, F1:0.9866
[[0.5 2.  0.5 1.  1. ]]
Combination by w_avg | Accuracy:0.9781, ROC:0.9688, F1:0.9834
Combination by max   | Accuracy:0.9474, ROC:0.925, F1:0.961
[[0.5 2.  0.5 1.  1. ]]
Combination by w_vote| Accuracy:0.9781, ROC:0.9774, F1:0.9831
Combination by median| Accuracy:0.9825, ROC:0.9779, F1:0.9866
