**Decision Tree Classifier**

Library imports

In [2]:
import os
import sys
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt

sys.path.insert(0, str(Path.cwd().parent))

Data Imports

In [3]:
from utils.utils import save_experiment, train_and_evaluate_decision_tree
from configs.config_local import DATASET_PATH, ITW_DATASET_PATH, FEATURES_DIR

Training / Testing Validation features:
Using mean aggregated, N_MFCC = 20, N_FTT = 128, HOP_LENGTH = 256, N_MELS = 128

In [4]:
train_path = os.path.join(FEATURES_DIR, "training_features_mean_20_128_256_128.parquet")
val_path = os.path.join(FEATURES_DIR, "validation_features_mean_20_128_256_128.parquet")
test_path = os.path.join(FEATURES_DIR, "testing_features_mean_20_128_256_128.parquet")

Training with default hyperparameters

In [5]:
clf, metrics, dt_params, feature_names, metadata_extra = train_and_evaluate_decision_tree(
    train_path=train_path,
    val_path=val_path,
    test_path=None,
)
print(metadata_extra)
print(metrics)

{'train_samples': 53864, 'val_samples': 10797}
{'accuracy': 0.9623969621191072, 'precision': 0.9624331701550768, 'recall': 0.9623973735478988, 'f1': 0.9623962495603613, 'roc_auc': 0.9800186674339881}


Training with default hyperparameters using Entropy

In [6]:
clf, metrics, dt_params, feature_names, metadata_extra = train_and_evaluate_decision_tree(
    train_path=train_path,
    val_path=val_path,
    test_path=None,
    criterion="entropy"
)
print(metadata_extra)
print(metrics)

{'train_samples': 53864, 'val_samples': 10797}
{'accuracy': 0.9656386033157358, 'precision': 0.9656390394489773, 'recall': 0.9656385601302122, 'f1': 0.9656385927044688, 'roc_auc': 0.9883596862207614}


Validate using the ITW Dataset

In [7]:

itw_val_path = os.path.join(ITW_DATASET_PATH, "normalized_features", "itw_features_20_128_256_128_trimmed_loudness_normalized.parquet")

In [None]:
clf, metrics, dt_params, feature_names, metadata_extra = train_and_evaluate_decision_tree(
    train_path=train_path,
    val_path=itw_val_path,
    test_path=None,
    criterion="gini"
)
print(metadata_extra)
print(metrics)