In [2]:
######################################################################
# 0 .  Imports & dataset ---------------------------------------------
######################################################################
import pandas as pd
from treefarms import TREEFARMS
import time

# your dataframe with columns  ["Age","BMI","Glucose","Outcome"]
df = pd.read_csv("../data/pima/input/diabetes-treefarms.csv")
#X, y          = df.iloc[:, :-1], df.iloc[:, -1]
X, y          = df.drop(columns=["Outcome"]), df["Outcome"]
feature_names = list(X.columns)

In [3]:

######################################################################
# 1 .  Configure & fit TreeFARMS -------------------------------------
######################################################################
cfg = {
    "regularization"            : 0.1,   # ≤ sparsity penalty
    "rashomon_bound_multiplier" : 0.1,   # enumerate near-optimal trees
    "depth_budget"                 : 5       # keep trees interpretable
}

start_time = time.time()
model = TREEFARMS(cfg)
model.fit(X, y)
end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

null
Finding Optimal Objective...
treefarms reported successful execution{
  "complexity": 0.10000000149011612,
  "loss": 0.3489583432674408,
  "model_objective": 0.4489583373069763,
  "name": "Outcome",
  "prediction": 0
}

training completed. Number of trees in the Rashomon set: 285
Training time: 402.42 seconds


In [4]:
first_tree = model[0]

print("evaluating the first model in the Rashomon set", flush=True)

# get the results
train_acc = first_tree.score(X, y)
n_leaves = first_tree.leaves()
n_nodes = first_tree.nodes()

print("Training accuracy: {}".format(train_acc))
print("# of leaves: {}".format(n_leaves))
print(first_tree)

evaluating the first model in the Rashomon set


Training accuracy: 0.71484375
# of leaves: 2
if feature_1842 = true then:
    predicted Prediction: 0

else if feature_1842 != true then:
    predicted Prediction: 1


In [5]:
######################################################################
# 2 .  Helper: extract cut-points from a TreeFARMS tree --------------
######################################################################
def bins_from_tree(tree, feat_names):
    """
    Parameters
    ----------
    tree : one element returned by `model[...]`
    feat_names : list[str]

    Returns
    -------
    dict   {feature_name : sorted list of thresholds}
    """
    # Obtain the underlying scikit-learn DecisionTreeClassifier
    if hasattr(tree, "to_sklearn"):
        sk_tree = tree.to_sklearn()
    elif hasattr(tree, "sklearn_tree_"):   # older API
        sk_tree = tree
    else:
        raise RuntimeError("Cannot access sklearn tree in this TreeFARMS object")

    cuts = {}
    tree_ = sk_tree.tree_
    for node_id in range(tree_.node_count):
        feat_idx = tree_.feature[node_id]
        thr      = tree_.threshold[node_id]
        if feat_idx >= 0:                    # −2 means leaf
            fname = feat_names[feat_idx]
            cuts.setdefault(fname, set()).add(float(thr))

    return {f: sorted(thrs) for f, thrs in cuts.items()}

In [6]:
######################################################################
# 3 .  Print best tree + discretisation bins -------------------------
######################################################################
best_tree = model[0]  # lowest objective value
print(f"\n=== Best tree ===\n{best_tree}")
print("Training accuracy :", best_tree.score(X, y))
print("# leaves          :", best_tree.leaves())

print("\nCut-points (bins) extracted from this tree:")
for feat, cut_list in bins_from_tree(best_tree, feature_names).items():
    print(f"  {feat:8s} : {cut_list}")

######################################################################
# 4 .  Repeat for entire Rashomon set (optional) ---------------------
######################################################################
for idx, t in enumerate(model):
    print(f"\n--- Tree #{idx}  objective={t.objective:.4f} ---")
    for feat, cuts in bins_from_tree(t, feature_names).items():
        print(f"  {feat:8s} : {cuts}")



=== Best tree ===
if feature_1842 = true then:
    predicted Prediction: 0

else if feature_1842 != true then:
    predicted Prediction: 1
Training accuracy : 0.71484375
# leaves          : 2

Cut-points (bins) extracted from this tree:


RuntimeError: Cannot access sklearn tree in this TreeFARMS object

In [7]:
best = model[0]  # lowest objective value
best.__all_leaves__()

[{'prediction': 1, 'name': 'Prediction'},
 {'prediction': 0, 'name': 'Prediction'}]