In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer


numerical_features = [
    "n_items", "total_price", "total_freight", "avg_price",
    "payment_value", "seller_avg_score_past",
    "purchase_to_estimated_days"
]
ordinal_linear = ["payment_installments"]
ordinal_cyclic = ["purchase_month", "purchase_dow"]
categorical_features = ["main_payment_type", "same_state", "customer_state", "seller_state"]
df=pd.read_csv('../data/df.csv',index_col=False)
X = df.drop(columns=["is_late"])
y = df["is_late"]
groups = df["customer_unique_id"]
def encode_cyclic_features(df):
    df = df.copy()
    return pd.DataFrame({
        "month_sin": np.sin(2 * np.pi * df["purchase_month"] / 12),
        "month_cos": np.cos(2 * np.pi * df["purchase_month"] / 12),
        "dow_sin": np.sin(2 * np.pi * df["purchase_dow"] / 7),
        "dow_cos": np.cos(2 * np.pi * df["purchase_dow"] / 7),
    })

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features + ordinal_linear),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_features),
        ("cyclic", FunctionTransformer(encode_cyclic_features), ordinal_cyclic)
    ]
)

Train+Val size: (78254, 15), Test size: (19564, 15)


In [7]:
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    confusion_matrix, classification_report, roc_auc_score
)
from sklearn.base import clone
from tqdm import tqdm
import joblib
import numpy as np

n_runs = 3
base_seed = 42

test_f1_list = []             
best_global_test_f1 = -1.0   
best_run_idx = None

best_preprocessor = None
best_dt_model = None
best_dt_params = None
best_dt_val_f1 = None
best_X_test_pre = None
best_y_test = None

for i in range(1, n_runs + 1):
    print(f"\n================== Decision Tree Run {i} / {n_runs} ==================")
    seed = base_seed * i
    X_other, X_test, y_other, y_test = train_test_split(X, y,test_size=0.2,random_state=seed,stratify=y)

    X_train_raw, X_val_raw, y_train, y_val = train_test_split(X_other, y_other,test_size=0.25,random_state=seed,stratify=y_other)

    preproc_this_run = clone(preprocessor)
    preproc_this_run.fit(X_train_raw)

    X_train    = preproc_this_run.transform(X_train_raw)
    X_val      = preproc_this_run.transform(X_val_raw)
    X_test_pre = preproc_this_run.transform(X_test)
    print(f"[Run {i}] Train shape: {X_train.shape}")
    print(f"[Run {i}] Val shape  : {X_val.shape}")
    print(f"[Run {i}] Test shape : {X_test_pre.shape}")


    param_grid_dt = {
        "max_depth": [3, 5, 10],     
        "min_samples_split": [2, 10, 50],  
        "min_samples_leaf": [1, 5, 20],    
        "class_weight": ["balanced"],
        "criterion": ["gini","entropy"],  
    }
    param_list_dt = list(ParameterGrid(param_grid_dt))
    print(f"[Run {i}] Total param combinations: {len(param_list_dt)}")

    best_dt_model_run  = None
    best_dt_params_run = None
    best_dt_f1_run     = -1.0

    for params in tqdm(param_list_dt, desc=f"Searching Decision Tree - run {i}"):
        model = DecisionTreeClassifier(
            random_state=seed,
            **params,
        )
        model.fit(X_train, y_train)

        y_val_pred = model.predict(X_val)
        val_f1 = f1_score(y_val, y_val_pred)

        if val_f1 > best_dt_f1_run:
            best_dt_f1_run     = val_f1
            best_dt_params_run = params
            best_dt_model_run  = model

    print(f"\n[Run {i}] Best params on val:", best_dt_params_run)
    print(f"[Run {i}] Best Val F1 (label=1): {best_dt_f1_run:.4f}")

    y_test_pred = best_dt_model_run.predict(X_test_pre)

    test_f1   = f1_score(y_test, y_test_pred)
    test_acc  = accuracy_score(y_test, y_test_pred)
    test_prec = precision_score(y_test, y_test_pred, zero_division=0)
    test_rec  = recall_score(y_test, y_test_pred, zero_division=0)
    cm        = confusion_matrix(y_test, y_test_pred)
    test_proba = best_dt_model_run.predict_proba(X_test_pre)[:, 1]
    test_auc   = roc_auc_score(y_test, test_proba)

    print(f"\n[Run {i}] Test F1 (label=1): {test_f1:.4f}")
    test_f1_list.append(test_f1)

    if test_f1 > best_global_test_f1:
        best_global_test_f1 = test_f1
        best_run_idx        = i
        best_preprocessor   = preproc_this_run
        best_dt_model       = best_dt_model_run
        best_dt_params      = best_dt_params_run
        best_dt_val_f1      = best_dt_f1_run
        best_X_test_pre     = X_test_pre
        best_y_test         = y_test

        print("\n--> New global BEST DT run so far!")
        print("[Global best DT so far] Test metrics:")
        print(f"Accuracy : {test_acc:.4f}")
        print(f"Precision: {test_prec:.4f}")
        print(f"Recall   : {test_rec:.4f}")
        print(f"F1       : {test_f1:.4f}")
        print(f"ROC-AUC  : {test_auc:.4f}")
        print("Confusion matrix [[TN, FP], [FN, TP]]:")
        print(cm)
        print("\nClassification report:")
        print(classification_report(y_test, y_test_pred))
test_f1_array = np.array(test_f1_list)
print("\n================== Decision Tree Summary over runs ==================")
print("DT Test F1 for each run:", test_f1_list)
print(f"Mean DT Test F1 over {len(test_f1_array)} runs: {test_f1_array.mean():.4f}")
print(f"Std  DT Test F1 over {len(test_f1_array)} runs: {test_f1_array.std():.4f}")
print(f"Best DT run index: {best_run_idx}, Best Test F1: {best_global_test_f1:.4f}")
y_best_test_pred = best_dt_model.predict(best_X_test_pre)
best_test_f1   = f1_score(best_y_test, y_best_test_pred)
best_test_acc  = accuracy_score(best_y_test, y_best_test_pred)
best_test_prec = precision_score(best_y_test, y_best_test_pred, zero_division=0)
best_test_rec  = recall_score(best_y_test, y_best_test_pred, zero_division=0)
best_cm        = confusion_matrix(best_y_test, y_best_test_pred)
best_proba     = best_dt_model.predict_proba(best_X_test_pre)[:, 1]
best_test_auc  = roc_auc_score(best_y_test, best_proba)

print("\n================== Final best DT run metrics ==================")
print(f"[Best DT run #{best_run_idx}] Test metrics:")
print(f"Accuracy : {best_test_acc:.4f}")
print(f"Precision: {best_test_prec:.4f}")
print(f"Recall   : {best_test_rec:.4f}")
print(f"F1       : {best_test_f1:.4f}")
print(f"ROC-AUC  : {best_test_auc:.4f}")
print("Confusion matrix [[TN, FP], [FN, TP]]:")
print(best_cm)
print("\nClassification report:")
print(classification_report(best_y_test, y_best_test_pred))

dt_artifacts = {
    "preprocessor": best_preprocessor,
    "model": best_dt_model,
    "best_params": best_dt_params,
    "best_val_f1": best_dt_val_f1,
    "best_test_f1": best_global_test_f1,
    "test_f1_all_runs": test_f1_list,
}

joblib.dump(dt_artifacts, "best_decision_tree_model_3_run.pkl")
print("\nSaved Decision Tree model (best of multi-run) → best_decision_tree_model_3_run.pkl")


[Run 1] Train shape: (58690, 67)
[Run 1] Val shape  : (19564, 67)
[Run 1] Test shape : (19564, 67)
[Run 1] Total param combinations: 54


Searching Decision Tree - run 1: 100%|██████████| 54/54 [00:15<00:00,  3.42it/s]



[Run 1] Best params on val: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 20, 'min_samples_split': 2}
[Run 1] Best Val F1 (label=1): 0.2837

[Run 1] Test F1 (label=1): 0.2592

--> New global BEST DT run so far!
[Global best DT so far] Test metrics:
Accuracy : 0.7259
Precision: 0.1655
Recall   : 0.5978
F1       : 0.2592
ROC-AUC  : 0.7109
Confusion matrix [[TN, FP], [FN, TP]]:
[[13264  4731]
 [  631   938]]

Classification report:
              precision    recall  f1-score   support

           0       0.95      0.74      0.83     17995
           1       0.17      0.60      0.26      1569

    accuracy                           0.73     19564
   macro avg       0.56      0.67      0.55     19564
weighted avg       0.89      0.73      0.79     19564


[Run 2] Train shape: (58690, 67)
[Run 2] Val shape  : (19564, 67)
[Run 2] Test shape : (19564, 67)
[Run 2] Total param combinations: 54


Searching Decision Tree - run 2: 100%|██████████| 54/54 [00:16<00:00,  3.35it/s]



[Run 2] Best params on val: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 20, 'min_samples_split': 2}
[Run 2] Best Val F1 (label=1): 0.2824

[Run 2] Test F1 (label=1): 0.2778

--> New global BEST DT run so far!
[Global best DT so far] Test metrics:
Accuracy : 0.7512
Precision: 0.1810
Recall   : 0.5966
F1       : 0.2778
ROC-AUC  : 0.7270
Confusion matrix [[TN, FP], [FN, TP]]:
[[13761  4234]
 [  633   936]]

Classification report:
              precision    recall  f1-score   support

           0       0.96      0.76      0.85     17995
           1       0.18      0.60      0.28      1569

    accuracy                           0.75     19564
   macro avg       0.57      0.68      0.56     19564
weighted avg       0.89      0.75      0.80     19564


[Run 3] Train shape: (58690, 66)
[Run 3] Val shape  : (19564, 66)
[Run 3] Test shape : (19564, 66)
[Run 3] Total param combinations: 54


Searching Decision Tree - run 3: 100%|██████████| 54/54 [00:15<00:00,  3.44it/s]


[Run 3] Best params on val: {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 20, 'min_samples_split': 2}
[Run 3] Best Val F1 (label=1): 0.2716

[Run 3] Test F1 (label=1): 0.2738

DT Test F1 for each run: [0.25918762088974856, 0.27778602166493543, 0.2738440303657695]
Mean DT Test F1 over 3 runs: 0.2703
Std  DT Test F1 over 3 runs: 0.0080
Best DT run index: 2, Best Test F1: 0.2778

[Best DT run #2] Test metrics:
Accuracy : 0.7512
Precision: 0.1810
Recall   : 0.5966
F1       : 0.2778
ROC-AUC  : 0.7270
Confusion matrix [[TN, FP], [FN, TP]]:
[[13761  4234]
 [  633   936]]

Classification report:
              precision    recall  f1-score   support

           0       0.96      0.76      0.85     17995
           1       0.18      0.60      0.28      1569

    accuracy                           0.75     19564
   macro avg       0.57      0.68      0.56     19564
weighted avg       0.89      0.75      0.80     19564


Saved Decision Tree model (best o


