# HHCART(D) Evaluation on Rectangle Dataset
 
This notebook evaluates the HHCART(D) decision tree algorithm on the 2D Rectangle dataset with 5% label noise. It demonstrates the full pipeline: dataset loading, model training, inspection, and performance visualisation. Structural regularisation parameters are used to decrease over fragmentation of the input space.

In [1]:
from HHCART_SD import HHCartD, load_model
from src.load_shapes import load_shape_dataset

In [2]:
# Load and unpack a specific dataset
dataset_name = "rectangle_2d_label_noise_005"
X, y = load_shape_dataset(folder_name="shapes")[dataset_name]


Loaded 32 paired datasets: ['barbell_2d_label_noise_000', 'barbell_3d_label_noise_000', 'radial_segment_2d_label_noise_000', 'radial_segment_3d_label_noise_000', 'rectangle_2d_label_noise_000', 'saddle_3d_label_noise_000', 'sine_wave_2d_label_noise_000', 'star_2d_label_noise_000', 'barbell_2d_label_noise_003', 'barbell_3d_label_noise_003', 'radial_segment_2d_label_noise_003', 'radial_segment_3d_label_noise_003', 'rectangle_2d_label_noise_003', 'saddle_3d_label_noise_003', 'sine_wave_2d_label_noise_003', 'star_2d_label_noise_003', 'barbell_2d_label_noise_005', 'barbell_3d_label_noise_005', 'radial_segment_2d_label_noise_005', 'radial_segment_3d_label_noise_005', 'rectangle_2d_label_noise_005', 'saddle_3d_label_noise_005', 'sine_wave_2d_label_noise_005', 'star_2d_label_noise_005', 'barbell_2d_label_noise_007', 'barbell_3d_label_noise_007', 'radial_segment_2d_label_noise_007', 'radial_segment_3d_label_noise_007', 'rectangle_2d_label_noise_007', 'saddle_3d_label_noise_007', 'sine_wave_2d_

In [None]:
# Set tree parameters
max_depth = 8
min_purity = 0.8
mass_min = 0.05

# Instantiate HHCART, build and save tree
hh = HHCartD(X, y, min_purity=min_purity, mass_min=mass_min, max_depth=max_depth)
hh.build_tree(f"{dataset_name}_max_depth_{max_depth}_pur_{str(min_purity).replace('.', '_')}_mass_{str(mass_min).replace('.', '_')}")

[INFO] Building HHCartD oblique decision tree...
[INFO] Max number of nodes allowed by maximum depth constraint: 511 (used as progress bar target; actual number of splits unknown in advance).


Building tree nodes:   1%|          | 4/511 [01:44<3:31:19, 25.01s/it]

In [None]:
hh = load_model("rectangle_2d_label_noise_005_max_depth_8_pur_0_85_mass_0_05")

In [None]:
hh.select(depth=3)
hh.inspect()

In [None]:
hh.plot_tree_structure(depth=5, save=True)

In [None]:
hh.plot_metrics_vs_structure(save=True)

In [None]:
hh.plot_metrics_vs_structure(save=True, x_axis="class1_leaf_count")

In [None]:
hh.plot_tradeoff_path(save=True)

In [None]:
hh.plot_tradeoff_path(save=True, color_by="class1_leaf_count")

In [None]:
hh.plot_node_size_distribution(save=True)

In [None]:
hh.plot_splits_2d_grid(save=True)

In [None]:
hh.plot_splits_2d_overlay(save=True)

In [None]:
hh.plot_regions_2d_grid(save=True)