In [1]:
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [18]:
with open('../data/em1/all.json') as data_file:    
    em1 = json.load(data_file)
with open('../data/sl1/all.json') as data_file:    
    sl1 = json.load(data_file)
with open('../data/si1/all.json') as data_file:    
    si1 = json.load(data_file)
with open('../data/st1/all.json') as data_file:    
    st1 = json.load(data_file)
    
with open('../data/em2/all.json') as data_file:    
    em2 = json.load(data_file)
with open('../data/sl2/all.json') as data_file:    
    sl2 = json.load(data_file)
with open('../data/si2/all.json') as data_file:    
    si2 = json.load(data_file)
with open('../data/st2/all.json') as data_file:    
    st2 = json.load(data_file)
    
with open('../data/em3/all.json') as data_file:    
    em3 = json.load(data_file)
with open('../data/sl3/all.json') as data_file:    
    sl3 = json.load(data_file)
with open('../data/si3/all.json') as data_file:    
    si3 = json.load(data_file)
with open('../data/st3/all.json') as data_file:    
    st3 = json.load(data_file)

In [19]:
data = np.concatenate((em1, sl1, si1, st1, em2, sl2, si2, st2, em3, sl3, si3, st3), axis=0)

In [20]:
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=2,suppress=True)
print (data)

[[ -13.94   -4.78  -14.3   -18.99  -22.57  -17.87  -15.2   -11.62   -6.93
    -4.     -5.19   -2.88    0.     10.      9.      0.  ]
 [ -16.41   -4.58  -12.06  -15.64  -18.65  -15.08  -11.28   -8.27   -4.69
    -3.44   -5.19   -2.88    0.     10.      9.      0.  ]
 [ -16.41   -6.48  -17.22  -24.59  -32.74  -26.19  -22.15  -15.62   -8.25
    -1.51   -5.35   -0.67    0.     10.      9.      0.  ]
 [ -13.94   -4.22  -11.52  -15.37  -18.65  -16.45  -13.74  -10.45   -6.6
    -0.89   -2.67   -0.33    0.     10.      9.      0.  ]
 [ -13.94   -4.78  -13.21  -17.63  -20.93  -16.51  -14.38  -11.07   -6.66
    -1.79   -5.35   -0.67    0.     10.      9.      0.  ]
 [ -10.1    -3.07   -8.07  -11.97  -14.19  -11.11  -14.19  -11.97   -8.07
    -2.7    -7.82   -1.02    0.     10.      9.      0.  ]
 [ -19.09   -5.47  -14.3   -18.99  -22.57  -17.87  -15.2   -11.62   -6.93
    -1.12    0.      0.      0.     10.      9.      0.  ]
 [ -16.41   -5.14  -13.21  -17.63  -20.93  -16.51  -14.38  -11.07   -6

In [21]:
X = data[:, :-1]

In [22]:
y = data[:, -1]

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Random Forest Classifier 

In [40]:
rfc = RandomForestClassifier()
parameters = {'n_estimators':[80, 90, 100, 110, 120], 'max_features': (None, 'auto', 'log2'), 'random_state': [42], 'n_jobs': [-1], 'min_samples_leaf': [1, 2, 3, 4]}
clf = GridSearchCV(rfc, parameters)
clf.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'n_estimators': [80, 90, 100, 110, 120], 'max_features': (None, 'auto', 'log2'), 'random_state': [42], 'n_jobs': [-1], 'min_samples_leaf': [1, 2, 3, 4]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

# Model Evaluation Metrics

In [58]:
y_predrfc = clf.best_estimator_.predict(X_test)

In [59]:
print("RFC :", accuracy_score(y_test, y_predrfc))

('RFC :', 0.8666666666666667)


In [60]:
confusion_matrix(y_test, y_predrfc)

array([[30,  0,  0,  1],
       [ 2, 22,  3,  1],
       [ 1,  0, 27,  1],
       [ 3,  0,  4, 25]])

In [61]:
clf.best_estimator_.feature_importances_ 

array([ 0.18,  0.1 ,  0.07,  0.1 ,  0.11,  0.05,  0.07,  0.05,  0.06,
        0.06,  0.09,  0.05,  0.  ,  0.01,  0.01])

In [62]:
clf.best_params_

{'max_features': 'auto',
 'min_samples_leaf': 1,
 'n_estimators': 110,
 'n_jobs': -1,
 'random_state': 42}

# Transpile Model

In [63]:
from sklearn_porter import Porter

In [64]:
porter = Porter(clf.best_estimator_, language='js')
output = porter.export()
print(output)

var Brain = function(atts) {

    var predict_000 = function(atts) {
        var classes = new Array(4);
        
        if (atts[7] <= -58.114341735839844) {
            if (atts[11] <= -196.67904663085937) {
                if (atts[4] <= -252.12141418457031) {
                    if (atts[9] <= -470.66082763671875) {
                        classes[0] = 0; 
                        classes[1] = 0; 
                        classes[2] = 2; 
                        classes[3] = 0; 
                    } else {
                        if (atts[2] <= -187.76922607421875) {
                            classes[0] = 0; 
                            classes[1] = 0; 
                            classes[2] = 1; 
                            classes[3] = 0; 
                        } else {
                            classes[0] = 0; 
                            classes[1] = 0; 
                            classes[2] = 0; 
                            classes[3] = 1; 
                        }
   