**Random Forest Classifier**

Library imports

In [None]:
import os
import sys
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

sys.path.insert(0, str(Path.cwd().parent))

Data Imports

In [None]:
from utils.utils import save_experiment, train_and_evaluate_random_forest, grid_search_joblib
from configs.config_local import DATASET_PATH, ITW_DATASET_PATH, FEATURES_DIR

Training / Validation / Test features:
Using mean aggregated, N_MFCC = 20, N_FTT = 128, HOP_LENGTH = 256, N_MELS = 128

In [None]:
train_path = os.path.join(FEATURES_DIR, "training_features_mean_20_128_256_128.parquet")
val_path = os.path.join(FEATURES_DIR, "validation_features_mean_20_128_256_128.parquet")
test_path = os.path.join(FEATURES_DIR, "testing_features_mean_20_128_256_128.parquet")

Training with default hyperparameters

In [None]:
clf, metrics, rf_params, feature_names, metadata_extra = train_and_evaluate_random_forest(
    train_path=train_path,
    val_path=val_path,
    test_path=None,
)
print(metadata_extra)
print(metrics)

In [None]:
clf, metrics, rf_params, feature_names, metadata_extra = train_and_evaluate_random_forest(
    train_path=train_path,
    val_path=test_path,
    test_path=None,
)
print(metadata_extra)
print(metrics)

Training with default hyperparameters using Entropy

In [None]:
clf, metrics, rf_params, feature_names, metadata_extra = train_and_evaluate_random_forest(
    train_path=train_path,
    val_path=val_path,
    test_path=None,
    criterion="entropy"
)
print(metadata_extra)
print(metrics)

Training with weighted fake class

In [None]:
params = {
    "n_estimators": 100,
    "max_depth": 11,
    "min_samples_split": 5,
    "min_samples_leaf": 2,
    "max_features": "sqrt",
    "random_state": 42,
    "class_weight": {0: 1, 1: 5}
}

clf, metrics, rf_params, feature_names, metadata_extra = train_and_evaluate_random_forest(
    train_path=train_path,
    val_path=None,
    test_path=test_path,
    criterion="gini",
    rf_params=params
)
print(metadata_extra)
print(metrics)

**Validate using the ITW Dataset**

In [None]:
itw_val_path = os.path.join(ITW_DATASET_PATH, "normalized_features", "itw_features_20_128_256_128_trimmed_loudness_normalized.parquet")

In [None]:
clf, metrics, rf_params, feature_names, metadata_extra = train_and_evaluate_random_forest(
    train_path=train_path,
    val_path=itw_val_path,
    test_path=None,
    criterion="gini"
)
print(metadata_extra)
print(metrics)

Validation using weighted fake class on ITW

In [None]:
params = {
    "n_estimators": 100,
    "max_depth": 13,
    "min_samples_split": 5,
    "min_samples_leaf": 2,
    "max_features": "sqrt",
    "random_state": 42,
    "class_weight": {0: 1, 1: 5}
}

clf, metrics, rf_params, feature_names, metadata_extra = train_and_evaluate_random_forest(
    train_path=train_path,
    val_path=itw_val_path,
    test_path=None,
    criterion="gini",
    rf_params=params
)
print(metadata_extra)
print(metrics)

**Hyperparameter Grid Search**

In [None]:
model = RandomForestClassifier(random_state=42)

params = {
    "n_estimators": [50, 100, 200],
    "max_depth": [8, 10, 12, 15],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "class_weight": [{0: 1, 1: 5}, None],
    "criterion": ["gini", "entropy"],
    "max_features": ["sqrt", "log2"],
}

(
    final_model,
    test_metrics,
    val_metrics,
    best_params,
    val_results,
    metadata,
    feature_names
) = grid_search_joblib(
    model=model,
    param_grid=params,
    train_path=train_path,
    val_path=val_path,
    test_path=test_path,
    scoring="f1_macro",
    verbose=1,
    n_jobs=-1
)

In [None]:
save_experiment(
    final_model,
    test_metrics,
    experiment_dir=os.path.join("experiments", "RF_grid"),
    model_params=best_params,
    feature_names=feature_names,
    metadata_extra=metadata,
    val_results=val_results,
)