## Load Bagging Model Artifact

In [2]:
from pathlib import Path
import joblib
import pandas as pd

# Resolve a BaggingClassifier artifact automatically so the notebook stays portable.
MODEL_DIR = Path("../../../complete_dataset/second_exp_alcohol_dataset/pk_files_saved")
if not MODEL_DIR.exists():
    raise FileNotFoundError(f"Expected model directory missing: {MODEL_DIR}")

bagging_paths = sorted(MODEL_DIR.glob("*bagging*.pkl"))
if not bagging_paths:
    raise FileNotFoundError(
        f"No BaggingClassifier artifacts found in {MODEL_DIR}. "
        "Please train/save the model before running this notebook."
    )

BAGGING_MODEL_PATH = bagging_paths[0]
bagging_pipeline = joblib.load(BAGGING_MODEL_PATH)
print(f"Loaded Bagging pipeline from: {BAGGING_MODEL_PATH}")

# Some training runs persist the Bagging estimator inside an imblearn pipeline.
if hasattr(bagging_pipeline, "named_steps"):
    bagging_classifier = bagging_pipeline.named_steps.get("classifier", bagging_pipeline)
    pipeline_feature_names = getattr(bagging_pipeline, "feature_names_in_", None)
else:
    bagging_classifier = bagging_pipeline
    pipeline_feature_names = getattr(bagging_classifier, "feature_names_in_", None)

if not hasattr(bagging_classifier, "estimators_"):
    raise AttributeError("Loaded object does not expose base estimators; expected a BaggingClassifier.")

# Recover feature names from the training dataset when available.
combined_dataset = Path("../../../complete_dataset/second_exp_alcohol_dataset/combined_retraining_dataset.csv")
if combined_dataset.exists():
    sample_df = pd.read_csv(combined_dataset, nrows=1)
    all_columns = sample_df.columns.tolist()
    # Drop target columns that should not appear in the tree features.
    target_like = {"influence_encoded_max", "label", "target"}
    feature_names = [col for col in all_columns if col not in target_like]
else:
    feature_names = pipeline_feature_names

class_labels = getattr(bagging_classifier, "classes_", [0, 1])
print(f"Detected {len(bagging_classifier.estimators_)} base estimators.")
print(f"Feature names inferred: {feature_names if feature_names is not None else 'unknown'}")
print(f"Class labels: {class_labels}")

Loaded Bagging pipeline from: ..\..\..\complete_dataset\second_exp_alcohol_dataset\pk_files_saved\bagging.pkl
Detected 200 base estimators.
Feature names inferred: ['track_id', 'driver_type', 'totalMeters_mean', 'totalMeters_max', 'totalMeters_min', 'totalMeters_std', 'speed_mean', 'speed_max', 'speed_min', 'speed_std', 'midSpeed_mean', 'midSpeed_max', 'midSpeed_min', 'midSpeed_std', 'latitude_mean', 'latitude_min', 'latitude_max', 'latitude_std', 'longitude_mean', 'longitude_min', 'longitude_max', 'longitude_std', 'height_mean', 'height_max', 'height_min', 'height_std', 'course_mean', 'course_max', 'course_min', 'course_std', 'acceleration_mean', 'acceleration_max', 'acceleration_min', 'acceleration_std', 'deceleration_mean', 'deceleration_max', 'deceleration_min', 'deceleration_std', 'tickTimestamp_mean', 'tickTimestamp_max', 'tickTimestamp_min', 'tickTimestamp_std', 'accelerationX_mean', 'accelerationX_max', 'accelerationX_min', 'accelerationX_std', 'accelerationY_mean', 'accelerati

## Prepare Tree Visualization Helpers

In [5]:
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree, export_graphviz
from datetime import datetime

FIGURE_DIR = Path("../../../complete_dataset/second_exp_alcohol_dataset/figures")
DOT_DIR = Path("../../../complete_dataset/second_exp_alcohol_dataset/results/tree_structures")
FIGURE_DIR.mkdir(parents=True, exist_ok=True)
DOT_DIR.mkdir(parents=True, exist_ok=True)

try:
    import graphviz  # noqa: F401
    GRAPHVIZ_AVAILABLE = True
except ImportError:
    GRAPHVIZ_AVAILABLE = False


def _resolve_feature_names(estimator, fallback_names):
    n_features = getattr(estimator, "n_features_in_", None)

    if hasattr(estimator, "feature_names_in_"):
        names = list(estimator.feature_names_in_)
        if n_features is not None and len(names) != n_features:
            names = names[:n_features]
        return names

    if fallback_names is not None:
        names = list(fallback_names)
        if n_features is not None and len(names) != n_features:
            names = names[:n_features]
        return names

    if n_features is not None:
        return [f"feature_{idx}" for idx in range(n_features)]

    raise ValueError("Could not determine feature names for plotting.")


def render_tree(estimator, *, tree_index, feature_names=None, class_names=None,
                output_prefix="bagging_dt_tree", annotate_thresholds=True,
                save_png=True, save_dot=True):
    """Render a single DecisionTreeClassifier and persist a visual snapshot."""
    resolved_features = _resolve_feature_names(estimator, feature_names)
    resolved_classes = list(class_names) if class_names is not None else ["Class 0", "Class 1"]

    fig, ax = plt.subplots(figsize=(18, 12), dpi=150)
    plot_tree(
        estimator,
        feature_names=resolved_features,
        class_names=[str(label) for label in resolved_classes],
        filled=True,
        rounded=True,
        impurity=True,
        proportion=True,
        ax=ax
    )
    title = f"Bagging DT Base Estimator #{tree_index}"
    ax.set_title(title, fontsize=16, fontweight="bold")

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{output_prefix}_{tree_index:03d}_{timestamp}"

    if annotate_thresholds:
        ax.text(0.01, -0.05,
                "Each node shows split feature, threshold, class distribution, and impurity.",
                transform=ax.transAxes,
                fontsize=10,
                ha="left",
                va="top")

    outputs = {}
    if save_png:
        png_path = FIGURE_DIR / f"{filename}.png"
        fig.tight_layout()
        fig.savefig(png_path, bbox_inches="tight")
        outputs["png"] = png_path
    plt.close(fig)

    if save_dot:
        dot_path = DOT_DIR / f"{filename}.dot"
        export_graphviz(
            estimator,
            out_file=str(dot_path),
            feature_names=resolved_features,
            class_names=[str(label) for label in resolved_classes],
            filled=True,
            rounded=True,
            impurity=True,
            proportion=True
        )
        outputs["dot"] = dot_path

        if GRAPHVIZ_AVAILABLE:
            try:
                graph = graphviz.Source.from_file(str(dot_path))
                pdf_path = DOT_DIR / f"{filename}.pdf"
                graph.render(filename=str(pdf_path.with_suffix("")), format="pdf", cleanup=True)
                outputs["pdf"] = pdf_path
            except Exception as exc:
                print(f"⚠️  Graphviz PDF export failed: {exc}")
    return outputs

## Render a Single Base Estimator

In [6]:
if not hasattr(bagging_classifier, "estimators_") or not bagging_classifier.estimators_:
    raise ValueError("Bagging classifier does not have fitted base estimators to plot.")

representative_index = 0
representative_tree = bagging_classifier.estimators_[representative_index]

print(f"Rendering base estimator #{representative_index}…")
outputs_single = render_tree(
    representative_tree,
    tree_index=representative_index,
    feature_names=feature_names,
    class_names=class_labels,
    output_prefix="bagging_dt_single"
)
print("Saved outputs:")
for kind, path in outputs_single.items():
    print(f"  {kind.upper()}: {path}")

Rendering base estimator #0…
Saved outputs:
  PNG: ..\..\..\complete_dataset\second_exp_alcohol_dataset\figures\bagging_dt_single_000_20251028_010022.png
  DOT: ..\..\..\complete_dataset\second_exp_alcohol_dataset\results\tree_structures\bagging_dt_single_000_20251028_010022.dot
  PDF: ..\..\..\complete_dataset\second_exp_alcohol_dataset\results\tree_structures\bagging_dt_single_000_20251028_010022.pdf
Saved outputs:
  PNG: ..\..\..\complete_dataset\second_exp_alcohol_dataset\figures\bagging_dt_single_000_20251028_010022.png
  DOT: ..\..\..\complete_dataset\second_exp_alcohol_dataset\results\tree_structures\bagging_dt_single_000_20251028_010022.dot
  PDF: ..\..\..\complete_dataset\second_exp_alcohol_dataset\results\tree_structures\bagging_dt_single_000_20251028_010022.pdf


## Iterate and Persist Multiple Trees

In [7]:
max_trees_to_render = min(5, len(bagging_classifier.estimators_))
print(f"Batch rendering {max_trees_to_render} base estimators…")

batch_records = []
for tree_index in range(max_trees_to_render):
    estimator = bagging_classifier.estimators_[tree_index]
    outputs = render_tree(
        estimator,
        tree_index=tree_index,
        feature_names=feature_names,
        class_names=class_labels,
        output_prefix="bagging_dt_batch"
    )
    record = {
        "tree_index": tree_index,
        "depth": estimator.get_depth(),
        "n_leaves": estimator.get_n_leaves(),
        "outputs": outputs,
    }
    batch_records.append(record)
    print(
        f"Tree {tree_index}: depth={record['depth']}, leaves={record['n_leaves']} → "
        f"{', '.join([f'{k}:{v.name}' if hasattr(v,'name') else f'{k}:{v}' for k, v in outputs.items()])}"
    )

print("Finished batch export. Metadata preview:")
for record in batch_records:
    print(record)

Batch rendering 5 base estimators…
Tree 0: depth=7, leaves=19 → png:bagging_dt_batch_000_20251028_010024.png, dot:bagging_dt_batch_000_20251028_010024.dot, pdf:bagging_dt_batch_000_20251028_010024.pdf
Tree 0: depth=7, leaves=19 → png:bagging_dt_batch_000_20251028_010024.png, dot:bagging_dt_batch_000_20251028_010024.dot, pdf:bagging_dt_batch_000_20251028_010024.pdf
Tree 1: depth=7, leaves=16 → png:bagging_dt_batch_001_20251028_010025.png, dot:bagging_dt_batch_001_20251028_010025.dot, pdf:bagging_dt_batch_001_20251028_010025.pdf
Tree 1: depth=7, leaves=16 → png:bagging_dt_batch_001_20251028_010025.png, dot:bagging_dt_batch_001_20251028_010025.dot, pdf:bagging_dt_batch_001_20251028_010025.pdf
Tree 2: depth=6, leaves=18 → png:bagging_dt_batch_002_20251028_010026.png, dot:bagging_dt_batch_002_20251028_010026.dot, pdf:bagging_dt_batch_002_20251028_010026.pdf
Tree 2: depth=6, leaves=18 → png:bagging_dt_batch_002_20251028_010026.png, dot:bagging_dt_batch_002_20251028_010026.dot, pdf:bagging_dt