In [1]:
from sklearn.datasets import load_digits
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import numpy as np
import time

In [2]:
def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print('%r  %2.2f ms' % \
                  (method.__name__, (te - ts) * 1000))
        return result
    return timed

In [3]:
np.random.seed(8)

In [4]:
digits = load_digits()

In [5]:
digits.data.shape

(1797, 64)

In [6]:
digits.target.shape

(1797,)

In [7]:
train_size = 1400
no_of_estimators = 500

In [8]:
train_x, train_y = digits.data[:train_size], digits.target[:train_size]
test_x, test_y = digits.data[train_size:], digits.target[train_size:]

In [9]:
@timeit
def compute_accuracy(clf, train_x, train_y, test_x, test_y):
    clf.fit(train_x, train_y)
    clf_pred = clf.predict(test_x)
    acc_score = metrics.accuracy_score(test_y, clf_pred)
    return acc_score

In [10]:
extra_clf = ExtraTreesClassifier(n_estimators=no_of_estimators, n_jobs=4)
extra_clf_acc = compute_accuracy(extra_clf, train_x, train_y, test_x, test_y)
print('Extra Tree Forest Accuracy : %.2f' % extra_clf_acc)

'compute_accuracy'  937.16 ms
Extra Tree Forest Accuracy : 0.94


In [11]:
rf_clf = RandomForestClassifier(n_estimators=no_of_estimators, n_jobs=4)
rf_acc = compute_accuracy(rf_clf, train_x, train_y, test_x, test_y)
print('Random Forest Accuracy : %.2f' % rf_acc)

'compute_accuracy'  1056.30 ms
Random Forest Accuracy : 0.93
