# Import Libraries

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn import model_selection
from sklearn.model_selection import GridSearchCV # from sklearn import model_selection, grid_search
from sklearn.ensemble import ExtraTreesClassifier

In [3]:
from utilities import visualize_classifier

# Load input data

In [4]:
input_file = 'data_random_forests.txt'
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

# Separate input data into three classes based on labels

In [5]:
class_0 = np.array(X[y==0])
class_1 = np.array(X[y==1])
class_2 = np.array(X[y==2])

# Split the data into training and testing datasets

In [11]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.25, random_state=5
)

# Define the parameter grid 

In [7]:
parameter_grid = [ 
    {'n_estimators': [100], 'max_depth': [2, 4, 7, 12, 16]},
    {'max_depth': [4], 'n_estimators': [25, 50, 100, 250]}
]

In [8]:
metrics = ['precision_weighted', 'recall_weighted']

In [15]:
for metric in metrics:
    print("\n##### Searching optimal parameters for", metric)

    classifier = model_selection.GridSearchCV(
        ExtraTreesClassifier(random_state=0), 
        parameter_grid, 
        cv=5, 
        scoring=metric
    )
    classifier.fit(X_train, y_train)

    print("\nGrid scores for the parameter grid:")
    # for params, avg_score, _ in classifier.cv_results_:  # grid_scores_
    #     print(params, '-->', round(avg_score, 3))
    for mean, params in zip(classifier.cv_results_['mean_test_score'], classifier.cv_results_['params']):
        print(f"{mean:.3f} for {params}")
    print("\nBest parameters:", classifier.best_params_)

    y_pred = classifier.predict(X_test)
    print("\nPerformance report:\n")
    print(classification_report(y_test, y_pred))


##### Searching optimal parameters for precision_weighted

Grid scores for the parameter grid:
0.850 for {'max_depth': 2, 'n_estimators': 100}
0.841 for {'max_depth': 4, 'n_estimators': 100}
0.844 for {'max_depth': 7, 'n_estimators': 100}
0.832 for {'max_depth': 12, 'n_estimators': 100}
0.816 for {'max_depth': 16, 'n_estimators': 100}
0.846 for {'max_depth': 4, 'n_estimators': 25}
0.840 for {'max_depth': 4, 'n_estimators': 50}
0.841 for {'max_depth': 4, 'n_estimators': 100}
0.845 for {'max_depth': 4, 'n_estimators': 250}

Best parameters: {'max_depth': 2, 'n_estimators': 100}

Performance report:

              precision    recall  f1-score   support

         0.0       0.94      0.81      0.87        79
         1.0       0.81      0.86      0.83        70
         2.0       0.83      0.91      0.87        76

    accuracy                           0.86       225
   macro avg       0.86      0.86      0.86       225
weighted avg       0.86      0.86      0.86       225


##### Search