In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer

df = pd.read_csv("../data/df.csv", index_col=False)


numerical_features_full = [
    "n_items", "total_price", "total_freight", "avg_price",
    "payment_value", "seller_avg_score_past",
    "purchase_to_estimated_days",
]

numerical_features_reduced = [
    "n_items", "total_price", "total_freight", "avg_price",
    "payment_value",
    "purchase_to_estimated_days",
]

ordinal_linear = ["payment_installments"]
ordinal_cyclic = ["purchase_month", "purchase_dow"]
categorical_features = ["main_payment_type", "same_state", "customer_state", "seller_state"]

X = df.drop(columns=["is_late"])
y = df["is_late"]
groups = df["customer_unique_id"]

X_other, X_test, y_other, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Train+Val size: {X_other.shape}, Test size: {X_test.shape}")

def encode_cyclic_features(df_sub):
    df_sub = df_sub.copy()
    return pd.DataFrame({
        "month_sin": np.sin(2 * np.pi * df_sub["purchase_month"] / 12),
        "month_cos": np.cos(2 * np.pi * df_sub["purchase_month"] / 12),
        "dow_sin": np.sin(2 * np.pi * df_sub["purchase_dow"] / 7),
        "dow_cos": np.cos(2 * np.pi * df_sub["purchase_dow"] / 7),
    })

preprocessor_full = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features_full + ordinal_linear),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_features),
        ("cyclic", FunctionTransformer(encode_cyclic_features), ordinal_cyclic),
    ]
)

preprocessor_reduced = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features_reduced + ordinal_linear),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_features),
        ("cyclic", FunctionTransformer(encode_cyclic_features), ordinal_cyclic),
    ]
)

print("Two preprocessors defined: preprocessor_full & preprocessor_reduced")

Train+Val size: (78254, 15), Test size: (19564, 15)
Two preprocessors defined: preprocessor_full & preprocessor_reduced


In [3]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    confusion_matrix, classification_report, roc_auc_score
)
from sklearn.base import clone
from tqdm import tqdm
import joblib
import numpy as np

n_runs = 3
base_seed = 42

test_f1_list = []         
best_global_test_f1 = -1.0 
best_run_idx = None

best_preprocessor_full = None
best_model_full = None
best_params_full = None
best_val_f1_full = None
best_X_test_full_pre = None
best_y_test_full = None

for i in range(1, 4):
    print(f"\n================== Run {i} / {n_runs} ==================")
    seed = base_seed * i
    X_other, X_test, y_other, y_test = train_test_split(X, y,test_size=0.2,random_state=seed,stratify=y)
    mask_full_other = X_other["seller_avg_score_past"].notna()
    mask_full_test  = X_test["seller_avg_score_past"].notna()
    X_other_full = X_other[mask_full_other].copy()
    y_other_full = y_other[mask_full_other].copy()
    X_test_full  = X_test[mask_full_test].copy()
    y_test_full  = y_test[mask_full_test].copy()
    print(f"[Run {i}] Train+Val rows (no missing): {X_other_full.shape[0]}")
    print(f"[Run {i}] Test rows (no missing):      {X_test_full.shape[0]}")

    X_train_full_raw, X_val_full_raw, y_train_full, y_val_full = train_test_split(X_other_full, y_other_full, test_size=0.25,random_state=seed,stratify=y_other_full)

    preproc_this_run = clone(preprocessor_full)
    preproc_this_run.fit(X_train_full_raw)

    X_train_full = preproc_this_run.transform(X_train_full_raw)
    X_val_full   = preproc_this_run.transform(X_val_full_raw)
    X_test_full_pre = preproc_this_run.transform(X_test_full)

    print(f"[Run {i}] Train shape: {X_train_full.shape}")
    print(f"[Run {i}] Val shape  : {X_val_full.shape}")
    print(f"[Run {i}] Test shape : {X_test_full_pre.shape}")

    param_grid_full = {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf"],
        "gamma": ["scale", "auto"],
        "class_weight": ["balanced"],
    }
    param_list_full = list(ParameterGrid(param_grid_full))
    print(f"[Run {i}] Total param combinations: {len(param_list_full)}")

    best_full_model_run  = None
    best_full_params_run = None
    best_full_f1_run     = -1.0
    for params in tqdm(param_list_full, desc=f"Searching SVM (full-feature) - run {i}"):
        model = SVC(
            probability=False,
            random_state=seed,
            **params,
        )
        model.fit(X_train_full, y_train_full)

        y_val_pred_full = model.predict(X_val_full)
        f1_full = f1_score(y_val_full, y_val_pred_full)

        if f1_full > best_full_f1_run:
            best_full_f1_run     = f1_full
            best_full_params_run = params
            best_full_model_run  = model

    print(f"\n[Run {i}] Best params on val:", best_full_params_run)
    print(f"[Run {i}] Best Val F1 (label=1): {best_full_f1_run:.4f}")

    y_test_full_pred = best_full_model_run.predict(X_test_full_pre)

    test_f1   = f1_score(y_test_full, y_test_full_pred)
    test_acc  = accuracy_score(y_test_full, y_test_full_pred)
    test_prec = precision_score(y_test_full, y_test_full_pred, zero_division=0)
    test_rec  = recall_score(y_test_full, y_test_full_pred, zero_division=0)
    cm        = confusion_matrix(y_test_full, y_test_full_pred)
    test_scores = best_full_model_run.decision_function(X_test_full_pre)
    test_auc  = roc_auc_score(y_test_full, test_scores)

    print(f"\n[Run {i}] Test F1 (label=1): {test_f1:.4f}")
    test_f1_list.append(test_f1)

    if test_f1 > best_global_test_f1:
        best_global_test_f1   = test_f1
        best_run_idx          = i
        best_preprocessor_full = preproc_this_run
        best_model_full        = best_full_model_run
        best_params_full       = best_full_params_run
        best_val_f1_full       = best_full_f1_run
        best_X_test_full_pre   = X_test_full_pre
        best_y_test_full       = y_test_full
        print("\n--> New global BEST run so far!")
        print("[Global best so far] Test metrics (only non-missing rows):")
        print(f"Accuracy : {test_acc:.4f}")
        print(f"Precision: {test_prec:.4f}")
        print(f"Recall   : {test_rec:.4f}")
        print(f"F1       : {test_f1:.4f}")
        print(f"ROC-AUC  : {test_auc:.4f}")
        print("Confusion matrix [[TN, FP], [FN, TP]]:")
        print(cm)
        print("\nClassification report:")
        print(classification_report(y_test_full, y_test_full_pred))

test_f1_array = np.array(test_f1_list)
print("\n================== Summary over runs ==================")
print("Test F1 for each run:", test_f1_list)
print(f"Mean Test F1 over {n_runs} runs: {test_f1_array.mean():.4f}")
print(f"Std  Test F1 over {n_runs} runs: {test_f1_array.std():.4f}")
print(f"Best run index: {best_run_idx}, Best Test F1: {best_global_test_f1:.4f}")

y_best_test_pred = best_model_full.predict(best_X_test_full_pre)
best_test_f1   = f1_score(best_y_test_full, y_best_test_pred)
best_test_acc  = accuracy_score(best_y_test_full, y_best_test_pred)
best_test_prec = precision_score(best_y_test_full, y_best_test_pred, zero_division=0)
best_test_rec  = recall_score(best_y_test_full, y_best_test_pred, zero_division=0)
best_cm        = confusion_matrix(best_y_test_full, y_best_test_pred)
best_scores    = best_model_full.decision_function(best_X_test_full_pre)
best_test_auc  = roc_auc_score(best_y_test_full, best_scores)

print("\n================== Final best-run metrics ==================")
print(f"[Best run #{best_run_idx}] Test metrics (only non-missing rows):")
print(f"Accuracy : {best_test_acc:.4f}")
print(f"Precision: {best_test_prec:.4f}")
print(f"Recall   : {best_test_rec:.4f}")
print(f"F1       : {best_test_f1:.4f}")
print(f"ROC-AUC  : {best_test_auc:.4f}")
print("Confusion matrix [[TN, FP], [FN, TP]]:")
print(best_cm)
print("\nClassification report:")
print(classification_report(best_y_test_full, y_best_test_pred))

svm_full_artifacts = {
    "preprocessor_full": best_preprocessor_full,
    "model_full": best_model_full,
    "best_params_full": best_params_full,
    "best_val_f1_full": best_val_f1_full,
    "best_test_f1_full": best_global_test_f1,
    "test_f1_all_runs": test_f1_list,
}

joblib.dump(svm_full_artifacts, "best_svm_full_model_3_run.pkl")
print("\nSaved full-feature SVM model (best of multi-run) → best_svm_full_model_3_run.pkl")


[Run 1] Train+Val rows (no missing): 73486
[Run 1] Test rows (no missing):      18394
[Run 1] Train shape: (55114, 66)
[Run 1] Val shape  : (18372, 66)
[Run 1] Test shape : (18394, 66)
[Run 1] Total param combinations: 12


Searching SVM (full-feature) - run 1: 100%|██████████| 12/12 [1:17:13<00:00, 386.16s/it]



[Run 1] Best params on val: {'C': 1, 'class_weight': 'balanced', 'gamma': 'scale', 'kernel': 'rbf'}
[Run 1] Best Val F1 (label=1): 0.2807

[Run 1] Test F1 (label=1): 0.2807

--> New global BEST run so far!
[Global best so far] Test metrics (only non-missing rows):
Accuracy : 0.7376
Precision: 0.1798
Recall   : 0.6399
F1       : 0.2807
ROC-AUC  : 0.7606
Confusion matrix [[TN, FP], [FN, TP]]:
[[12625  4297]
 [  530   942]]

Classification report:
              precision    recall  f1-score   support

           0       0.96      0.75      0.84     16922
           1       0.18      0.64      0.28      1472

    accuracy                           0.74     18394
   macro avg       0.57      0.69      0.56     18394
weighted avg       0.90      0.74      0.79     18394


[Run 2] Train+Val rows (no missing): 73587
[Run 2] Test rows (no missing):      18293
[Run 2] Train shape: (55190, 66)
[Run 2] Val shape  : (18397, 66)
[Run 2] Test shape : (18293, 66)
[Run 2] Total param combinations: 12


Searching SVM (full-feature) - run 2: 100%|██████████| 12/12 [1:16:00<00:00, 380.06s/it]



[Run 2] Best params on val: {'C': 10, 'class_weight': 'balanced', 'gamma': 'scale', 'kernel': 'rbf'}
[Run 2] Best Val F1 (label=1): 0.2801

[Run 2] Test F1 (label=1): 0.2846

--> New global BEST run so far!
[Global best so far] Test metrics (only non-missing rows):
Accuracy : 0.7686
Precision: 0.1885
Recall   : 0.5803
F1       : 0.2846
ROC-AUC  : 0.7408
Confusion matrix [[TN, FP], [FN, TP]]:
[[13218  3624]
 [  609   842]]

Classification report:
              precision    recall  f1-score   support

           0       0.96      0.78      0.86     16842
           1       0.19      0.58      0.28      1451

    accuracy                           0.77     18293
   macro avg       0.57      0.68      0.57     18293
weighted avg       0.90      0.77      0.82     18293


[Run 3] Train+Val rows (no missing): 73482
[Run 3] Test rows (no missing):      18398
[Run 3] Train shape: (55111, 66)
[Run 3] Val shape  : (18371, 66)
[Run 3] Test shape : (18398, 66)
[Run 3] Total param combinations: 12

Searching SVM (full-feature) - run 3: 100%|██████████| 12/12 [1:15:46<00:00, 378.88s/it]



[Run 3] Best params on val: {'C': 1, 'class_weight': 'balanced', 'gamma': 'scale', 'kernel': 'rbf'}
[Run 3] Best Val F1 (label=1): 0.2814

[Run 3] Test F1 (label=1): 0.2757

Test F1 for each run: [0.28073312472060796, 0.2846036842994761, 0.2756589603887498]
Mean Test F1 over 3 runs: 0.2803
Std  Test F1 over 3 runs: 0.0037
Best run index: 2, Best Test F1: 0.2846

[Best run #2] Test metrics (only non-missing rows):
Accuracy : 0.7686
Precision: 0.1885
Recall   : 0.5803
F1       : 0.2846
ROC-AUC  : 0.7408
Confusion matrix [[TN, FP], [FN, TP]]:
[[13218  3624]
 [  609   842]]

Classification report:
              precision    recall  f1-score   support

           0       0.96      0.78      0.86     16842
           1       0.19      0.58      0.28      1451

    accuracy                           0.77     18293
   macro avg       0.57      0.68      0.57     18293
weighted avg       0.90      0.77      0.82     18293


Saved full-feature SVM model (best of multi-run) → best_svm_full_model

In [4]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    confusion_matrix, classification_report, roc_auc_score
)
from sklearn.base import clone
from tqdm import tqdm
import joblib
import numpy as np

n_runs_red = 3
base_seed_red = 42

test_f1_list_red = []              
best_global_test_f1_red = -1.0    
best_run_idx_red = None

best_preprocessor_red = None
best_model_red = None
best_params_red = None
best_val_f1_red = None
best_X_test_red_pre = None
best_y_test_red = None

for i in range(1, n_runs_red + 1):
    print(f"\n================== Reduced Run {i} / {n_runs_red} ==================")
    seed = base_seed_red * i

    X_other, X_test, y_other, y_test = train_test_split(X, y,test_size=0.2,random_state=seed,stratify=y)
    mask_red_other = X_other["seller_avg_score_past"].isna()
    mask_red_test  = X_test["seller_avg_score_past"].isna()
    X_other_red = X_other[mask_red_other].copy()
    y_other_red = y_other[mask_red_other].copy()
    X_test_red  = X_test[mask_red_test].copy()
    y_test_red  = y_test[mask_red_test].copy()
    print(f"[Reduced Run {i}] Train+Val rows (missing): {X_other_red.shape[0]}")
    print(f"[Reduced Run {i}] Test rows (missing)     : {X_test_red.shape[0]}")


    if X_other_red.shape[0] < 50 or y_other_red.nunique() < 2:
        print(f"[Reduced Run {i}] Too few samples or only one class in missing subset, skipping this run.")
        continue

    X_train_red_raw, X_val_red_raw, y_train_red, y_val_red = train_test_split(X_other_red, y_other_red,test_size=0.25,random_state=seed,stratify=y_other_red)


    preproc_red_this_run = clone(preprocessor_reduced)
    preproc_red_this_run.fit(X_train_red_raw)
    X_train_red = preproc_red_this_run.transform(X_train_red_raw)
    X_val_red   = preproc_red_this_run.transform(X_val_red_raw)
    X_test_red_pre = preproc_red_this_run.transform(X_test_red)
    print(f"[Reduced Run {i}] Train shape: {X_train_red.shape}")
    print(f"[Reduced Run {i}] Val shape  : {X_val_red.shape}")
    print(f"[Reduced Run {i}] Test shape : {X_test_red_pre.shape}")

    param_grid_red = {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf"],
        "gamma": ["scale", "auto"],
        "class_weight": ["balanced"],
    }
    param_list_red = list(ParameterGrid(param_grid_red))
    print(f"[Reduced Run {i}] Total param combinations: {len(param_list_red)}")

    best_red_model_run  = None
    best_red_params_run = None
    best_red_f1_run     = -1.0

    for params in tqdm(param_list_red, desc=f"Searching SVM (reduced-feature) - run {i}"):
        model = SVC(
            probability=False,
            random_state=seed,
            **params,
        )
        model.fit(X_train_red, y_train_red)

        y_val_pred_red = model.predict(X_val_red)
        f1_red = f1_score(y_val_red, y_val_pred_red)

        if f1_red > best_red_f1_run:
            best_red_f1_run     = f1_red
            best_red_params_run = params
            best_red_model_run  = model

    print(f"\n[Reduced Run {i}] Best params on val:", best_red_params_run)
    print(f"[Reduced Run {i}] Best Val F1 (label=1): {best_red_f1_run:.4f}")


    y_test_red_pred = best_red_model_run.predict(X_test_red_pre)
    test_f1_red   = f1_score(y_test_red, y_test_red_pred)
    test_acc_red  = accuracy_score(y_test_red, y_test_red_pred)
    test_prec_red = precision_score(y_test_red, y_test_red_pred, zero_division=0)
    test_rec_red  = recall_score(y_test_red, y_test_red_pred, zero_division=0)
    cm_red        = confusion_matrix(y_test_red, y_test_red_pred)
    test_scores_red = best_red_model_run.decision_function(X_test_red_pre)
    test_auc_red  = roc_auc_score(y_test_red, test_scores_red)
    print(f"\n[Reduced Run {i}] Test F1 (missing rows, label=1): {test_f1_red:.4f}")
    test_f1_list_red.append(test_f1_red)

    if test_f1_red > best_global_test_f1_red:
        best_global_test_f1_red   = test_f1_red
        best_run_idx_red          = i
        best_preprocessor_red     = preproc_red_this_run
        best_model_red            = best_red_model_run
        best_params_red           = best_red_params_run
        best_val_f1_red           = best_red_f1_run
        best_X_test_red_pre       = X_test_red_pre
        best_y_test_red           = y_test_red

        print("\n--> New global BEST reduced run so far!")
        print("[Global best reduced so far] Test metrics (missing rows):")
        print(f"Accuracy : {test_acc_red:.4f}")
        print(f"Precision: {test_prec_red:.4f}")
        print(f"Recall   : {test_rec_red:.4f}")
        print(f"F1       : {test_f1_red:.4f}")
        print(f"ROC-AUC  : {test_auc_red:.4f}")
        print("Confusion matrix [[TN, FP], [FN, TP]]:")
        print(cm_red)
        print("\nClassification report:")
        print(classification_report(y_test_red, y_test_red_pred))

test_f1_array_red = np.array(test_f1_list_red)
print("\n================== Reduced Summary over runs ==================")
print("Reduced Test F1 for each run:", test_f1_list_red)
if len(test_f1_array_red) > 0:
    print(f"Mean Reduced Test F1 over {len(test_f1_array_red)} runs: {test_f1_array_red.mean():.4f}")
    print(f"Std  Reduced Test F1 over {len(test_f1_array_red)} runs: {test_f1_array_red.std():.4f}")
else:
    print(" No valid reduced runs (maybe missing subset too small / single class in all runs).")

print(f"Best reduced run index: {best_run_idx_red}, Best Test F1: {best_global_test_f1_red:.4f}")

y_best_test_red_pred = best_model_red.predict(best_X_test_red_pre)
best_test_f1_red   = f1_score(best_y_test_red, y_best_test_red_pred)
best_test_acc_red  = accuracy_score(best_y_test_red, y_best_test_red_pred)
best_test_prec_red = precision_score(best_y_test_red, y_best_test_red_pred, zero_division=0)
best_test_rec_red  = recall_score(best_y_test_red, y_best_test_red_pred, zero_division=0)
best_cm_red        = confusion_matrix(best_y_test_red, y_best_test_red_pred)
best_scores_red    = best_model_red.decision_function(best_X_test_red_pre)
best_test_auc_red  = roc_auc_score(best_y_test_red, best_scores_red)

print("\n================== Final best reduced-run metrics ==================")
print(f"[Best reduced run #{best_run_idx_red}] Test metrics (missing rows):")
print(f"Accuracy : {best_test_acc_red:.4f}")
print(f"Precision: {best_test_prec_red:.4f}")
print(f"Recall   : {best_test_rec_red:.4f}")
print(f"F1       : {best_test_f1_red:.4f}")
print(f"ROC-AUC  : {best_test_auc_red:.4f}")
print("Confusion matrix [[TN, FP], [FN, TP]]:")
print(best_cm_red)
print("\nClassification report:")
print(classification_report(best_y_test_red, y_best_test_red_pred))

svm_reduced_artifacts = {
    "preprocessor_reduced": best_preprocessor_red,
    "model_reduced": best_model_red,
    "best_params_reduced": best_params_red,
    "best_val_f1_reduced": best_val_f1_red,
    "best_test_f1_reduced": best_global_test_f1_red,
    "test_f1_all_runs_reduced": test_f1_list_red,
}

joblib.dump(svm_reduced_artifacts, "best_svm_reduced_model_3_run.pkl")
print("\nSaved reduced-feature SVM model (best of multi-run) → best_svm_reduced_model_3_run.pkl")


[Reduced Run 1] Train+Val rows (missing): 4768
[Reduced Run 1] Test rows (missing)     : 1170
[Reduced Run 1] Train shape: (3576, 64)
[Reduced Run 1] Val shape  : (1192, 64)
[Reduced Run 1] Test shape : (1170, 64)
[Reduced Run 1] Total param combinations: 12


Searching SVM (reduced-feature) - run 1: 100%|██████████| 12/12 [00:09<00:00,  1.22it/s]



[Reduced Run 1] Best params on val: {'C': 1, 'class_weight': 'balanced', 'gamma': 'auto', 'kernel': 'rbf'}
[Reduced Run 1] Best Val F1 (label=1): 0.2542

[Reduced Run 1] Test F1 (missing rows, label=1): 0.2173

--> New global BEST reduced run so far!
[Global best reduced so far] Test metrics (missing rows):
Accuracy : 0.6368
Precision: 0.1323
Recall   : 0.6082
F1       : 0.2173
ROC-AUC  : 0.7115
Confusion matrix [[TN, FP], [FN, TP]]:
[[686 387]
 [ 38  59]]

Classification report:
              precision    recall  f1-score   support

           0       0.95      0.64      0.76      1073
           1       0.13      0.61      0.22        97

    accuracy                           0.64      1170
   macro avg       0.54      0.62      0.49      1170
weighted avg       0.88      0.64      0.72      1170


[Reduced Run 2] Train+Val rows (missing): 4667
[Reduced Run 2] Test rows (missing)     : 1271
[Reduced Run 2] Train shape: (3500, 64)
[Reduced Run 2] Val shape  : (1167, 64)
[Reduced Run

Searching SVM (reduced-feature) - run 2: 100%|██████████| 12/12 [00:08<00:00,  1.36it/s]



[Reduced Run 2] Best params on val: {'C': 10, 'class_weight': 'balanced', 'gamma': 'auto', 'kernel': 'rbf'}
[Reduced Run 2] Best Val F1 (label=1): 0.2661

[Reduced Run 2] Test F1 (missing rows, label=1): 0.2756

--> New global BEST reduced run so far!
[Global best reduced so far] Test metrics (missing rows):
Accuracy : 0.6939
Precision: 0.1766
Recall   : 0.6271
F1       : 0.2756
ROC-AUC  : 0.7419
Confusion matrix [[TN, FP], [FN, TP]]:
[[808 345]
 [ 44  74]]

Classification report:
              precision    recall  f1-score   support

           0       0.95      0.70      0.81      1153
           1       0.18      0.63      0.28       118

    accuracy                           0.69      1271
   macro avg       0.56      0.66      0.54      1271
weighted avg       0.88      0.69      0.76      1271


[Reduced Run 3] Train+Val rows (missing): 4772
[Reduced Run 3] Test rows (missing)     : 1166
[Reduced Run 3] Train shape: (3579, 64)
[Reduced Run 3] Val shape  : (1193, 64)
[Reduced Ru

Searching SVM (reduced-feature) - run 3: 100%|██████████| 12/12 [00:09<00:00,  1.27it/s]



[Reduced Run 3] Best params on val: {'C': 0.1, 'class_weight': 'balanced', 'gamma': 'scale', 'kernel': 'linear'}
[Reduced Run 3] Best Val F1 (label=1): 0.2667

[Reduced Run 3] Test F1 (missing rows, label=1): 0.2980

--> New global BEST reduced run so far!
[Global best reduced so far] Test metrics (missing rows):
Accuracy : 0.6647
Precision: 0.1935
Recall   : 0.6484
F1       : 0.2980
ROC-AUC  : 0.7301
Confusion matrix [[TN, FP], [FN, TP]]:
[[692 346]
 [ 45  83]]

Classification report:
              precision    recall  f1-score   support

           0       0.94      0.67      0.78      1038
           1       0.19      0.65      0.30       128

    accuracy                           0.66      1166
   macro avg       0.57      0.66      0.54      1166
weighted avg       0.86      0.66      0.73      1166


Reduced Test F1 for each run: [0.21731123388581952, 0.2756052141527002, 0.2980251346499102]
Mean Reduced Test F1 over 3 runs: 0.2636
Std  Reduced Test F1 over 3 runs: 0.0340
Best r