In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import _tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
import numpy as np
import json
import dump

In [27]:
def tree_to_json(tree):
    tree_ = tree.tree_
    feature_names = range(30)
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    def recurse(node):
        tree_json = dict()
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            tree_json['type'] = 'split'
            threshold = tree_.threshold[node]
            tree_json['threshold'] = "{} <= {}".format(feature_name[node], threshold)
            tree_json['left'] = recurse(tree_.children_left[node])
            tree_json['right'] = recurse(tree_.children_right[node])
        else:
            tree_json['type'] = 'leaf'
            tree_json['value'] = tree_.value[node].tolist()
        return tree_json

    return recurse(0)
def forest_to_json(forest):
    forest_json = dict()
    #forest_json['n_features'] = forest.n_features_
    forest_json['n_classes'] = forest.n_classes_
    forest_json['classes'] = forest.classes_.tolist()
    forest_json['n_outputs'] = forest.n_outputs_
    forest_json['n_estimators'] = forest.n_estimators
    forest_json['estimators'] = [tree_to_json(estimator) for estimator in forest.estimators_]
    return forest_json

In [28]:
X_train = np.load('../dataset/X_train.npy')
y_train = np.load('../dataset/y_train.npy')
print('X_train:{0}, y_train:{1}'.format(X_train.shape, y_train.shape))

X_train:(7738, 17), y_train:(7738,)


In [29]:
clf = RandomForestClassifier()
print('Cross Validation Score: {0}'.format(np.mean(cross_val_score(clf, X_train, y_train, cv=10))))

Cross Validation Score: 0.947401145241032


In [30]:
clf.fit(X_train, y_train)

In [31]:
X_test = np.load('../dataset/X_test.npy')
y_test = np.load('../dataset/y_test.npy')

In [32]:
pred = clf.predict(X_test)
print('Accuracy: {}'.format(accuracy_score(y_test, pred)))

Accuracy: 0.9469400060295448


In [33]:
#print(forest_to_json(clf))
json.dump(forest_to_json(clf), open('../../static/classifier.json', 'w'))