In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn import cross_validation, grid_search
from sklearn.ensemble import ExtraTreesClassifier
from utilities import visualize_classifier



In [2]:
input_file = 'data_random_forests.txt'
data = np.loadtxt('./data/' + input_file, delimiter = ',')
X, y = data[:, :-1], data[:, -1]

In [3]:
class_0 = np.array(X[y == 0])
class_1 = np.array(X[y == 1])
class_2 = np.array(X[y == 2])

In [4]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.25, random_state = 5)

In [5]:
parameter_grid = [ {'n_estimators' : [100], 'max_depth' : [2, 4, 7, 12, 16]},
                    {'max_depth' : [4], 'n_estimators' : [25, 50, 100, 250]}]

In [6]:
metrics = ['precision_weighted', 'recall_weighted']

In [7]:
for metric in metrics:
    print('#### Searching optimal parameters for', metric)
    
    classifier = grid_search.GridSearchCV(ExtraTreesClassifier(random_state = 0),
                                         parameter_grid, cv = 5, scoring = metric)
    classifier.fit(X_train, y_train)

#### Searching optimal parameters for precision_weighted
#### Searching optimal parameters for recall_weighted


In [8]:
print('Grid scores for the parameters grid :')
for params, avg_score, _ in classifier.grid_scores_:
    print(params, '--->', round(avg_score, 3))

print('Best parameters : ', classifier.best_params_)

Grid scores for the parameters grid :
{'max_depth': 2, 'n_estimators': 100} ---> 0.84
{'max_depth': 4, 'n_estimators': 100} ---> 0.837
{'max_depth': 7, 'n_estimators': 100} ---> 0.841
{'max_depth': 12, 'n_estimators': 100} ---> 0.834
{'max_depth': 16, 'n_estimators': 100} ---> 0.816
{'max_depth': 4, 'n_estimators': 25} ---> 0.843
{'max_depth': 4, 'n_estimators': 50} ---> 0.836
{'max_depth': 4, 'n_estimators': 100} ---> 0.837
{'max_depth': 4, 'n_estimators': 250} ---> 0.841
Best parameters :  {'max_depth': 4, 'n_estimators': 25}


In [9]:
y_pred = classifier.predict(X_test)
print('Performance report :' )
print(classification_report(y_test, y_pred))

Performance report :
             precision    recall  f1-score   support

        0.0       0.93      0.84      0.88        79
        1.0       0.85      0.86      0.85        70
        2.0       0.84      0.92      0.88        76

avg / total       0.87      0.87      0.87       225

