Run with the following conda environment: `../../conda_envs/training_env`

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import average_precision_score
from sklearn.tree import plot_tree 
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectFromModel, VarianceThreshold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import KBinsDiscretizer
from sklearn import tree

import graphviz
from IPython.display import display

from trustee import ClassificationTrustee
from trustee.report.trust import TrustReport

import matplotlib.pyplot as plt
import pandas as pd
import os
import sys
import pickle
from copy import deepcopy
import numpy as np
from multiprocess import Pool
from functools import partial

module_path = os.path.abspath(os.path.join('../../models/'))
if module_path not in sys.path:
    sys.path.append(module_path)
from utils.helpers import get_X_y_binary
from training.voting_rf_classifier import VotingRandomForestClassifier
from utils.model_sizing import size, get_tree, get_tree_estimator, get_subforest_estimators
from utils.model_sizing import get_best_trees_idx
from utils.plots import plot_tree_thr

In [None]:
results_folder = "../../results/"
data_folder = "../../data/"

In [None]:
force_redo = True
SEED = 42
n_pk = 5
model_path = f"{results_folder}/5_pk/tcp_udp/initial5min_none_0dryrun_caida/cl_pipeline_134500.pkl"
trustee_perf_path = model_path[:-4] + "_trustee_perf.pkl"
trustee_fidelity_path = model_path[:-4] + "_trustee_fidelity.pkl"
top_k = 3
max_impurity = 0.8
num_iter = 50
num_stability_iter = 10
# proba_thr = 0.04296875
proba_thr = 0.00048828125 # From log.txt

In [None]:
traces_train = [
    f"{data_folder}/caida/preprocessed_5-20pk_tcpudpicmp/130000_tcp_udp.csv",
    f"{data_folder}/caida/preprocessed_5-20pk_tcpudpicmp/130100_tcp_udp.csv",
    f"{data_folder}/caida/preprocessed_5-20pk_tcpudpicmp/130200_tcp_udp.csv",
    f"{data_folder}/caida/preprocessed_5-20pk_tcpudpicmp/130300_tcp_udp.csv",
    f"{data_folder}/caida/preprocessed_5-20pk_tcpudpicmp/130400_tcp_udp.csv",
]
traces_test = [
    f"{data_folder}/caida/preprocessed_5-20pk_tcpudpicmp/135000_tcp_udp.csv",
]
with open("../../models/training/params/feature_names_5pk.txt") as f:
    features = [line.rstrip() for line in f]

# Load data

In [None]:
def get_data(train, features, n_pk):
    """Get the input dataframe and labels"""

    X_preproc, y = get_X_y_binary(
        filenames=train, 
        feature_names=features, 
        min_packets=n_pk,
        percentile=99,
        verbose=True,
    )
    X_df = pd.DataFrame(
        data=X_preproc,
        columns=features
    )
    
    return X_df, y
    
X_train, y_train = get_data(traces_train, features, n_pk)
X_test, y_test = get_data(traces_test, features, n_pk)

# Load model

In [None]:
with open(model_path, "rb") as f:
    model = pickle.load(f)

model

# Evaluate

In [None]:
model_ap = average_precision_score(y_test, model.predict_proba(X_test)[:, 1])
print(f"{model_ap=}")

# Trustee

In [None]:
# Create our own Trustee, in order to tune pruning (max_impurity) and fitting (average_precision, proba_thresh)

from trustee.main import _check_if_trained
from trustee.utils.tree import top_k_prune
from trustee.utils.dataset import convert_to_df, convert_to_series
from sklearn.metrics import f1_score

class MyClassificationTrustee(ClassificationTrustee):
    """ A ClassificationTrustee that allows to tune 
    the max_impurity imposed when getting the stable student models"""    

    def __init__(self, **kwargs):
        print("Init MyClassificationTrustee")
        if kwargs.get("scoring_func"):
            scoring_func = kwargs.pop("scoring_func")
            self.scoring_func = scoring_func
            print(f"Using scoring func: {self.scoring_func}")
        else:
            self.scoring_func = f1_score
        return super().__init__(**kwargs)
    
    @_check_if_trained
    def get_stable(self, top_k=10, threshold=0.9, sort=True, max_impurity=0.1, proba_thr=None, predict_proba=False):
        """
        Filters out explanations from Trustee stability analysis with less than threshold agreement.

        Parameters
        ----------
        top_k: int, default=10
            Number of top-k branches, sorted by number of samples per branch, to keep after finding
            decision tree with highest fidelity.

        threshold: float, default=0.9
            Remove any student decision tree explanation if their mean agreement goes below given threshold.
            To keep all students regardless of mean agreement, pass 0.

        sort: bool, default=True
            Boolean indicating whether to sort returned stable student explanation based on mean agreement.

        max_impurity: float, default=0.1
            Float indicating the maximum impurity allowed in a branch. Branch below with impurity will be pruned.
            (So it acts more as a *min_impurity* actually)

        Returns
        -------
        stable_explanations: array-like of tuple
            [(dt, pruned_dt, agreement, reward), ...]

            - dt: {DecisionTreeClassifier, DecisionTreeRegressor}
                Unconstrained fitted student model.

            - pruned_dt: {DecisionTreeClassifier, DecisionTreeRegressor}
                Top-k pruned fitted student model.

            - agreement: float
                Mean agreement of pruned student model with respect to others.

            - reward: float
                Fidelity of student model to the expert model.
        """
        if len(self._stable_students) == 0:
            agreement = []
            # Calculate pair-wise agreement of all top students generated during inner loop
            # print(f"{self._top_students=}")
            for i, _ in enumerate(self._top_students):
                agreement.append([])
                # Apply top-k pruning before calculating agreement
                base_tree = top_k_prune(self._top_students[i][0], top_k=top_k, max_impurity=max_impurity)
                for j, _ in enumerate(self._top_students):
                    # Apply top-k pruning before calculating agreement
                    iter_tree = top_k_prune(self._top_students[j][0], top_k=top_k, max_impurity=max_impurity)

                    if proba_thr or predict_proba:
                        iter_y_pred = iter_tree.predict_proba(self._X_test.values)[:, 1]
                        base_y_pred = base_tree.predict_proba(self._X_test.values)[:, 1]
                        if proba_thr:
                            iter_y_pred = np.where(iter_y_pred < proba_thr, 0, 1)
                            base_y_pred = np.where(base_y_pred < proba_thr, 0, 1)
                    else:
                        iter_y_pred = iter_tree.predict(self._X_test.values)
                        base_y_pred = base_tree.predict(self._X_test.values)

                    agreement[i].append(self._score(iter_y_pred, base_y_pred))

                # Save complete dt, top-k prune dt, mean agreement and fidelity
                self._stable_students.append(
                    (
                        self._top_students[i][0],
                        base_tree,
                        np.mean(agreement[i]),
                        self._top_students[i][1],
                    )
                )

        stable = self._stable_students
        if threshold > 0:
            stable = filter(lambda item: item[2] >= threshold, stable)

        if sort:
            return sorted(stable, key=lambda item: item[2], reverse=True)

        return stable
    
    @_check_if_trained
    def explain(self, top_k=10, max_impurity=0.1, proba_thr=None, predict_proba=False):
        """
        Returns explainable model that best imitates Expert model, based on highest mean agreement and highest fidelity.

        Returns
        -------
        top_student: tuple
            (dt, pruned_dt, agreement, reward)

            - dt: {DecisionTreeClassifier, DecisionTreeRegressor}
                Unconstrained fitted student model.

            - pruned_dt: {DecisionTreeClassifier, DecisionTreeRegressor}
                Top-k pruned fitted student model.

            - agreement: float
                Mean agreement of pruned student model with respect to others.

            - reward: float
                Fidelity of student model to the expert model.

            - max_impurity: float, default=0.1
                Float indicating the maximum impurity allowed in a branch. Branch below with impurity will be pruned.
                (So it acts more as a *min_impurity* actually)
        """
        stable = self.get_stable(top_k=top_k, threshold=0, sort=False, max_impurity=max_impurity, proba_thr=proba_thr, predict_proba=predict_proba)
        return max(stable, key=lambda item: item[2])
    
    def fit(
        self,
        X,
        y,
        top_k=10,
        max_leaf_nodes=None,
        max_depth=None,
        ccp_alpha=0.0,
        train_size=0.7,
        num_iter=50,
        num_stability_iter=5,
        num_samples=2000,
        samples_size=None,
        use_features=None,
        predict_method_name="predict",
        optimization="fidelity",  # for comparative purposes only
        aggregate=True,  # for comparative purposes only
        verbose=False,
        max_impurity=0.1,
        predict_proba=False,
        proba_thr=None,
    ):
        """
        Trains Decision Tree Regressor to imitate Expert model.

        Parameters
        ----------
        X: {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Internally, it will be converted to a pandas DataFrame.

        y: array-like of shape (n_samples,) or (n_samples, n_outputs)
            The target values for X (class labels in classification, real numbers in regression).
            Internally, it will be converted to a pandas Series.

        top_k: int, default=10
            Number of top-k branches, sorted by number of samples per branch, to keep after finding
            decision tree with highest fidelity.

        max_leaf_nodes: int, default=None
            Grow a tree with max_leaf_nodes in best-first fashion. Best nodes are defined as
            relative reduction in impurity. If None then unlimited number of leaf nodes.

        max_depth: int, default=None
            The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure.

        ccp_alpha: float, default=0.0
            Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the
            largest cost complexity that is smaller than ccp_alpha will be chosen. By default,
            no pruning is performed. See Minimal Cost-Complexity Pruning here for details:
            https://scikit-learn.org/stable/modules/tree.html#minimal-cost-complexity-pruning

        train_size: float or int, default=0.7
            If float, should be between 0.0 and 1.0 and represent the proportion of the dataset
            to include in the train split. If int, represents the absolute number of train samples.

        num_iter: int, default=50
            Number of iterations to repeat Trustee inner-loop for.

        num_stability_iter: int, default=5
            Number of stability to repeat Trustee stabilization outer-loop for.

        num_samples: int, default=2000
            The absolute number of samples to fetch from the training dataset split to train the
            student decision tree model. If the `samples_size` argument is provided, this arg is
            ignored.

        samples_size: float, default=None
            The fraction of the training dataset to use to train the student decision tree model.
            If None, the value is automatically set to the `num_samples` provided value.

        use_features: array-like, default=None
            Array-like of integers representing the indexes of features from the `X` training samples.
            If not None, only the features indicated by the provided indexes will be used to train the
            student decision tree model.

        predict_method_name: str, default="predict"
            The method interface to use to get predictions from the expert model.
            If no value is passed, the default `predict` interface is used.

        optimization: {"fidelity", "accuracy"}, default="fidelity"
            The comparison criteria to optimize the decision tree students in Trustee inner-loop.
            Used for ablation study only.

        aggregate: bool, default=True
            Boolean indicating whether dataset aggregation should be used in Trustee inner-loop.
            Used for ablation study only.

        verbose: bool, default=False
            Boolean indicating whether to log messages.

        max_impurity: float, default=0.1
            Float indicating the maximum impurity allowed in a branch. Branch below with impurity will be pruned.
            (So it acts more as a *min_impurity* actually)
        
        predict_proba: bool, default=False
            Whether to use the positive class predicted probability or the class prediction
        
        proba_thr: bool, default=None
            Whether to apply a fixed cutoff of the positive class predicted probability to get the final class assignement.
            Note: requires arg predict_proba=True
        """
        if verbose:
            self.log(f"Initializing training dataset using {self.expert} as expert model")

        if len(X) != len(y):
            raise ValueError("Features (X) and target (y) values should have the same length.")

        # convert data to np array to facilitate processing
        X = convert_to_df(X)
        y = convert_to_series(y)

        # split input array to train DTs and evaluate agreement
        self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(X, y, train_size=train_size)

        features = self._X_train
        targets = convert_to_series(getattr(self.expert, predict_method_name)(self._X_train, thresh=proba_thr))

        if hasattr(targets, "shape") and len(targets.shape) >= 2:
            targets = targets.ravel()

        student = self.student_class(
            random_state=0, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, ccp_alpha=ccp_alpha
        )

        if verbose:
            self.log(f"Expert model score: {self._score(self._y_train, targets)}")
            self.log(f"Initializing Trustee outer-loop with {num_stability_iter} iterations")

        # Trustee outer-loop
        for i in range(num_stability_iter):
            self._students_by_iter.append([])
            if verbose:
                self.log("#" * 10, f"Outer-loop Iteration {i}/{num_stability_iter}", "#" * 10)
                self.log(f"Initializing Trustee inner-loop with {num_stability_iter} iterations")

            # Trustee inner-loop
            for j in range(num_iter):
                if verbose:
                    self.log("#" * 10, f"Inner-loop Iteration {j}/{num_iter}", "#" * 10)

                dataset_size = len(features)
                size = int(int(len(self._X_train)) * samples_size) if samples_size else num_samples
                # Step 1: Sample predictions from training dataset
                if verbose:
                    self.log(
                        f"Sampling {size} points from training dataset with ({len(features)}, {len(targets)}) entries"
                    )

                samples_idxs = np.random.choice(dataset_size, size=size, replace=False)
                X_iter, y_iter = features.iloc[samples_idxs], targets.iloc[samples_idxs]
                X_iter_train, X_iter_test, y_iter_train, y_iter_test = train_test_split(
                    X_iter, y_iter, train_size=train_size
                )

                X_train_student = X_iter_train
                X_test_student = X_iter_test
                if use_features is not None:
                    X_train_student = X_iter_train.iloc[:, use_features]
                    X_test_student = X_iter_test.iloc[:, use_features]

                # Step 2: Training DecisionTreeRegressor with sampled data
                student.fit(X_train_student.values, y_iter_train.values)
                if predict_proba or proba_thr:
                    student_pred = student.predict_proba(X_test_student.values)[:, 1]
                    if proba_thr:
                        student_pred = np.where(student_pred < proba_thr, 0, 1)
                else:
                    student_pred = student.predict(X_test_student.values)

                if verbose:
                    self.log(
                        f"Student model {i}-{j} trained with depth {student.get_depth()} "
                        f"and {student.get_n_leaves()} leaves:"
                    )
                    self.log(f"Student model score: {self._score(y_iter_test, student_pred)}")

                # Step 3: Use expert model predictions to aggregate original dataset
                if proba_thr:
                    expert_pred = pd.Series(self.expert.predict_proba(X_iter_test)[:, 1])
                    expert_pred = expert_pred.where(expert_pred < proba_thr, 0)
                    expert_pred = expert_pred.where(expert_pred >= proba_thr, 1)
                else:
                    expert_pred = pd.Series(getattr(self.expert, predict_method_name)(X_iter_test))
                if hasattr(expert_pred, "shape") and len(expert_pred.shape) >= 2:
                    expert_pred = expert_pred.ravel()

                if aggregate:
                    features = pd.concat([features, X_iter_test])
                    targets = pd.concat([targets, expert_pred])

                if optimization == "accuracy":
                    # Step 4: Calculate reward based on Decision Tree Classifier accuracy
                    reward = self._score(y_iter_test, student_pred)
                else:
                    # Step 4: Calculate reward based on Decision Tree Classifier fidelity to the Expert model
                    reward = self._score(expert_pred, student_pred)

                if verbose:
                    self.log(f"Student model {i}-{j} fidelity: {reward}")

                # Save student to list of iterations dt
                self._students_by_iter[i].append((deepcopy(student), reward))

            # Save student with highest fidelity to list of top students by iteration
            self._top_students.append(max(self._students_by_iter[i], key=lambda item: item[1]))

        # Get best overall student based on mean agreement
        self._best_student = self.explain(top_k=top_k, max_impurity=max_impurity, proba_thr=proba_thr, predict_proba=predict_proba)[0]


    def _score(self, y_true, y_pred, kwargs={"average": "macro"}):
        """
        Score function for student models. Compares the ground-truth predictions
        of a blackbox model with the predictions of a student model, using self.scoring_func (cf. constructor args).

        Parameters
        ----------
        y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
            The ground-truth target values (class labels in classification, real numbers in regression).

        y_pred: array-like of shape (n_samples,) or (n_samples, n_outputs)
            The predicted target values (class labels in classification, real numbers in regression).

        Returns
        -------
        score: float
            Calculated F1-score between student model predictions and expert model ground-truth.
        """
        return self.scoring_func(y_true, y_pred, **kwargs)

### Get the most performant student

Based on F1 score on thresholded predicted probas

In [None]:
if not force_redo:
    try: 
        with open(trustee_perf_path, "rb") as f:
            trustee_perf = pickle.load(f)
    except FileNotFoundError:
        trustee_perf = MyClassificationTrustee(expert=model, scoring_func=f1_score)
        trustee_perf.fit(
            X_train, y_train,
            top_k=top_k, 
            num_iter=num_iter, 
            num_stability_iter=num_stability_iter, 
            samples_size=0.7, 
            max_impurity=max_impurity,
            proba_thr=proba_thr,
            verbose=False,
            aggregate=False,
            optimization="accuracy",
        )
        print()
else:
    trustee_perf = MyClassificationTrustee(expert=model, scoring_func=f1_score)
    trustee_perf.fit(
        X_train, y_train,
        top_k=top_k, 
        num_iter=num_iter, 
        num_stability_iter=num_stability_iter, 
        samples_size=0.7, 
        max_impurity=max_impurity,
        proba_thr=proba_thr,
        verbose=False,
        aggregate=False,
        optimization="accuracy",
    )
    print()
        
dt, _, agreement, reward = trustee_perf.explain()
print(f"Model explanation training (agreement, reward): ({agreement}, {reward})")
print(f"Model Explanation size: {dt.tree_.node_count}")
pruned_dt = trustee_perf.prune(top_k=10, max_impurity=0.01)
print(f"Top-k Prunned Model explanation size: {pruned_dt.tree_.node_count}")

# Evaluate perf of the explainers
y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test, thresh=proba_thr)
dt_y_pred_proba = dt.predict_proba(X_test.values)[:, 1]
dt_y_pred = np.where(dt_y_pred_proba < proba_thr, 0, 1)
pruned_dt_y_pred_proba = pruned_dt.predict_proba(X_test.values)[:, 1]
pruned_dt_y_pred = np.where(pruned_dt_y_pred_proba < proba_thr, 0, 1)

f1 = f1_score(y_test, y_pred)
print(f"F1 initial model: {f1}")
f1_student = f1_score(y_test, dt_y_pred)
print(f"F1 student model: {f1_student}")
f1_student_pruned = f1_score(y_test, pruned_dt_y_pred)
print(f"F1 pruned student model: {f1_student_pruned}")

ap = average_precision_score(y_test, y_pred_proba)
print(f"AP initial model: {ap}")
ap_student = average_precision_score(y_test, dt_y_pred_proba)
print(f"AP student model: {ap_student}")
ap_student_pruned = average_precision_score(y_test, pruned_dt_y_pred_proba)
print(f"AP pruned student model: {ap_student_pruned}")

In [None]:
with open(trustee_perf_path, "wb") as f:
    pickle.dump(trustee_perf, f)

In [None]:
# Node importance
top_nodes = trustee_perf.get_top_nodes(top_k=3)
top_nodes

In [None]:
# Feature importance
top_feat = trustee_perf.get_top_features(top_k=5)
top_feat_named = [
    (features[top_f[0]], top_f[1])
    for top_f 
    in top_feat
]
top_feat_named

In [None]:
a = trustee_perf.prune(top_k=25, max_impurity=0.01)
print(f"Proba thr: {proba_thr}")
plot_tree_thr(
    a, 
    proba_thr, 
    6,
    features, 
    "./out/11app_trustee_perf.pdf"
)

# Evaluate perf of the explainer
pruned_dt_y_pred_proba = a.predict_proba(X_test.values)[:, 1]
pruned_dt_y_pred = np.where(pruned_dt_y_pred_proba < proba_thr, 0, 1)

f1 = f1_score(y_test, y_pred)
print(f"F1 initial model: {f1}")
f1_student_pruned = f1_score(y_test, pruned_dt_y_pred)
print(f"F1 pruned student model: {f1_student_pruned}")

ap = average_precision_score(y_test, y_pred_proba)
print(f"AP initial model: {ap}")
ap_student_pruned = average_precision_score(y_test, pruned_dt_y_pred_proba)
print(f"AP pruned student model: {ap_student_pruned}")

### Get the most fidel student 

Based on F1-score on thresholded predicted probas

In [None]:
if not force_redo:
    try: 
        with open(trustee_fidelity_path, "rb") as f:
            trustee_fid = pickle.load(f)
    except FileNotFoundError:
        trustee_fid = MyClassificationTrustee(expert=model, scoring_func=f1_score)
        trustee_fid.fit(
            X_train, y_train,
            top_k=top_k, 
            num_iter=num_iter, 
            num_stability_iter=num_stability_iter, 
            samples_size=0.7, 
            # samples_size=0.5, 
            max_impurity=max_impurity,
            proba_thr=proba_thr,
            verbose=True,
            aggregate=False,
            optimization="fidelity",
        )
        print()
else:
    trustee_fid = MyClassificationTrustee(expert=model, scoring_func=f1_score)
    trustee_fid.fit(
        X_train, y_train,
        top_k=top_k, 
        num_iter=num_iter, 
        num_stability_iter=num_stability_iter, 
        samples_size=0.5, 
        max_impurity=max_impurity,
        proba_thr=proba_thr,
        verbose=False,
        aggregate=False,
        optimization="fidelity",
    )
    print()
        
dt, _, agreement, reward = trustee_fid.explain()
print(f"Model explanation training (agreement, reward): ({agreement}, {reward})")
print(f"Model Explanation size: {dt.tree_.node_count}")
pruned_dt = trustee_fid.prune(top_k=10, max_impurity=0.01)
print(f"Top-k Prunned Model explanation size: {pruned_dt.tree_.node_count}")

# Evaluate perf of the explainers
y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test, thresh=proba_thr)
dt_y_pred_proba = dt.predict_proba(X_test.values)[:, 1]
dt_y_pred = np.where(dt_y_pred_proba < proba_thr, 0, 1)
pruned_dt_y_pred_proba = pruned_dt.predict_proba(X_test.values)[:, 1]
pruned_dt_y_pred = np.where(pruned_dt_y_pred_proba < proba_thr, 0, 1)

f1 = f1_score(y_test, y_pred)
print(f"F1 initial model: {f1}")
f1_student = f1_score(y_test, dt_y_pred)
print(f"F1 student model: {f1_student}")
f1_student_pruned = f1_score(y_test, pruned_dt_y_pred)
print(f"F1 pruned student model: {f1_student_pruned}")

ap = average_precision_score(y_test, y_pred_proba)
print(f"AP initial model: {ap}")
ap_student = average_precision_score(y_test, dt_y_pred_proba)
print(f"AP student model: {ap_student}")
ap_student_pruned = average_precision_score(y_test, pruned_dt_y_pred_proba)
print(f"AP pruned student model: {ap_student_pruned}")

In [None]:
with open(trustee_fidelity_path, "wb") as f:
    pickle.dump(trustee_fid, f)

In [None]:
# Node importance
top_nodes = trustee_fid.get_top_nodes(top_k=3)
top_nodes

In [None]:
# Feature importance
top_feat = trustee_fid.get_top_features(top_k=5)
top_feat_named = [
    (features[top_f[0]], top_f[1])
    for top_f 
    in top_feat
]
top_feat_named

In [None]:
a = trustee_fid.prune(top_k=7, max_impurity=0.01)
proba_thr = 0.043
print(f"Proba thr: {proba_thr}")
plot_tree_thr(
    a, 
    proba_thr, 
    15,
    features, 
    "./out/11app_trustee_fidelity.pdf"
)

# Evaluate perf of the explainer
pruned_dt_y_pred_proba = a.predict_proba(X_test.values)[:, 1]
pruned_dt_y_pred = np.where(pruned_dt_y_pred_proba < proba_thr, 0, 1)

f1 = f1_score(y_test, y_pred)
print(f"F1 initial model: {f1}")
f1_student_pruned = f1_score(y_test, pruned_dt_y_pred)
print(f"F1 pruned student model: {f1_student_pruned}")

ap = average_precision_score(y_test, y_pred_proba)
print(f"AP initial model: {ap}")
ap_student_pruned = average_precision_score(y_test, pruned_dt_y_pred_proba)
print(f"AP pruned student model: {ap_student_pruned}")