In [87]:
import random
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import export_graphviz
from sklearn import cross_validation
from sklearn.datasets import load_digits, load_iris

import numpy as np

In [90]:
class compare_classifiers:
    
    def __init__(self, dataset = load_digits(n_class=10)):
        
        self.clf_scores = []
        self.bclf_scores = []
        self.rfclf_scores = []
        self.adaclf_scores = []
        self.gbclf_scores = []
        
        self.named_score_list = [("Random Tree", self.clf_scores), ("Bagging", self.bclf_scores),
                                 ("Random Forest", self.rfclf_scores), ("AdaBoost", self.adaclf_scores), 
                                 ("Gradient Boosting", self.gbclf_scores)]
        
        self.dataset = dataset
        
        self.run()
        self.print_scores()
    
    def run(self, times = 50):
    
        for i in range(times):
            i = random.randint(1,100000)
            X_train, X_test, y_train, y_test = cross_validation.train_test_split(self.dataset.data, self.dataset.target, test_size=0.4, random_state=i)

            clf = DecisionTreeClassifier()
            clf = clf.fit(X_train, y_train)
            self.clf_scores.append(clf.score(X_test, y_test))

            bclf = BaggingClassifier()
            bclf = bclf.fit(X_train, y_train)
            self.bclf_scores.append(bclf.score(X_test, y_test))

            rfclf = RandomForestClassifier()
            rfclf = rfclf.fit(X_train, y_train)
            self.rfclf_scores.append(rfclf.score(X_test, y_test))

            adaclf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                         algorithm="SAMME",
                         n_estimators=200)
            adaclf = adaclf.fit(X_train, y_train)
            self.adaclf_scores.append(adaclf.score(X_test, y_test))

            gbclf = GradientBoostingClassifier()
            gbclf = gbclf.fit(X_train, y_train)
            self.gbclf_scores.append(gbclf.score(X_test, y_test))

            
    def print_scores(self):
        for name, classifier in self.named_score_list:
            print("{0} accuracy mean: {1}\n{0} accuracy median: {2}\n{0} accuracy stdev: {3}\n".format(name, np.mean(classifier),
                                                                                                np.median(classifier),
                                                                                                    np.std(classifier)))

In [91]:
digit = compare_classifiers()

Random Tree accuracy mean: 0.8406397774687067
Random Tree accuracy median: 0.8414464534075105
Random Tree accuracy stdev: 0.014129231244981898

Bagging accuracy mean: 0.9208623087621696
Bagging accuracy median: 0.9235048678720446
Bagging accuracy stdev: 0.012656467315716274

Random Forest accuracy mean: 0.9376077885952712
Random Forest accuracy median: 0.9381084840055632
Random Forest accuracy stdev: 0.009350659629664393

AdaBoost accuracy mean: 0.8442837273991654
AdaBoost accuracy median: 0.844923504867872
AdaBoost accuracy stdev: 0.024737784096652803

Gradient Boosting accuracy mean: 0.9573574408901251
Gradient Boosting accuracy median: 0.9568845618915159
Gradient Boosting accuracy stdev: 0.006924051898632598



In [92]:
iris = compare_classifiers(dataset = load_iris())

Random Tree accuracy mean: 0.9463333333333334
Random Tree accuracy median: 0.95
Random Tree accuracy stdev: 0.02774687169554235

Bagging accuracy mean: 0.9520000000000002
Bagging accuracy median: 0.95
Bagging accuracy stdev: 0.028990419873545198

Random Forest accuracy mean: 0.9493333333333334
Random Forest accuracy median: 0.95
Random Forest accuracy stdev: 0.02707192067232927

AdaBoost accuracy mean: 0.9410000000000001
AdaBoost accuracy median: 0.95
AdaBoost accuracy stdev: 0.022659312532476456

Gradient Boosting accuracy mean: 0.9496666666666667
Gradient Boosting accuracy median: 0.95
Gradient Boosting accuracy stdev: 0.030090603922893342

