In [38]:
from datasets.mushrooms import MushroomDataset
from tree import DecisionTreeClassifier
from random_forest import RandomForestClassifier, TournamentRandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay,f1_score
import math, time

# Load dataset

In [5]:
path = "../data/mushroom/agaricus-lepiota.data"
dataset = MushroomDataset(path=path)
dataset.clean()
X_train, X_val, y_train, y_val = dataset.split(test_size=0.2, random_state=42)

In [6]:
n_features = round(math.sqrt(X_train.shape[1]))

# Random forest classifier

In [48]:
params_matrix = {
    "n_trees": [100, 200, 300],
    "max_depth": [3, 5, 7],
    "max_features": [n_features],
}

In [49]:
import importlib
from utils import experiments
importlib.reload(experiments)
from utils.experiments import grid_search  

best_params, score  = grid_search(params_matrix, RandomForestClassifier, X_train, X_val, y_train, y_val, accuracy_score)
print(f"Best params: {best_params}")
print(f"Accuracy: {score}")


INFO:root:DecisionTreeClassifier(max_depth=3) created


INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=21, split_val=1.0, depth=2) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=4, split_val=4.25, depth=2) created
INFO:root:Node(split_feature=8, split_val=7.0, depth=1) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=0, split_val=1.25, depth=2) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=None, split_val=None, depth=3) created
INFO:root:Node(split_feature=20, split_val=1.25, depth=2) created
INFO:root:Node(split_feature=10, split_val=2.25, depth=1) created
INFO:root:Node(split_feature=11, split_val=1.0, depth=0) created


In [14]:
rf= RandomForestClassifier(**best_params)
rf.fit(X_train, y_train)
y_preds = [rf.predict(x) for x in X_val]

accuracy = accuracy_score(y_val, y_preds)

print("Random forest:\n")
print("Num of test samples: " + str(len(X_val)))
print(f"Accuracy: {accuracy}")
conf_matix = ConfusionMatrixDisplay(confusion_matrix(y_val, y_preds))

Random forest:

Num of test samples: 1625
Accuracy: 0.808


# Tournament Random forest

In [None]:
params_matrix = {
    "n_trees": [100, 200, 300],
    "max_depth": [3, 5, 7],
    "max_features": [n_features],
}

In [None]:
import importlib
from utils import experiments

importlib.reload(experiments)
from utils.experiments import grid_search

best_params, score = grid_search(params_matrix, TournamentRandomForestClassifier, X_train, X_val, y_train, y_val, accuracy_score)
print(f"Best params: {best_params}")
print(f"Accuracy: {score}")

In [None]:
trf = TournamentRandomForestClassifier(**best_params)
trf.fit(X_train, y_train)
y_preds = [rf.predict(x) for x in X_val]

accuracy = accuracy_score(y_val, y_preds)

print("Random forest:\n")
print("Num of test samples: " + str(len(X_val)))
print(f"Accuracy: {accuracy}")
conf_matix = ConfusionMatrixDisplay(confusion_matrix(y_val, y_preds))