In [46]:
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot as plt
from hyperparameters import hyperparameters
import matplotlib as mpl

from IPython.display import display
plt.style.use('bmh')
mpl.rcParams.update({
    "grid.linestyle" : "dashed",
    "axes.facecolor" : "white",
    "axes.spines.top" : False,
    "axes.spines.right" : False,
    "legend.frameon" : False,
    "figure.figsize" : (8, 5),
    "figure.dpi" : 300,
})
%matplotlib inline

# Suppress sklearn deprecated warnings
import warnings
def warn(*args, **kwargs): pass
warnings.warn = warn
np.set_printoptions(threshold=sys.maxsize)

np.random.seed(42)

In [47]:
# TODO make test for dataset with creatina column
# Dataset without thyroid = 18 features (including survive7y)
# Dataset with thyroid = 27 features (including survive7y)
# With columns that have missing values, 23 and 32
# Default 18
n_features = 18
extra_path = n_features != 27 and n_features != 18
dropped_na_key = "dropped_na/"
mean_key = "mean/"
key = mean_key
path = f"data/{n_features}features/{key if extra_path else '' }"
path_models = f"models/{n_features}features/{key if extra_path else '' }"
output_models = f"models_output/{n_features}features/{key if extra_path else '' }"
print(path_models)
print(path)
print(output_models)

models/18features/
data/18features/
models_output/18features/


In [48]:
# Read data
df_train = pd.read_csv(f"{path}train.csv", index_col=0)
df_valid = pd.read_csv(f"{path}valid.csv", index_col=0)
df_test = pd.read_csv(f"{path}test.csv", index_col=0)
print(len(df_train) + len(df_valid) + len(df_test))
print(len(df_train.columns))


train, valid, test = df_train.to_numpy(), df_valid.to_numpy(), df_test.to_numpy()

# y_**** contains the value of Survive7y as a list
# X_**** contains everything except for Survive7y as a list of list
X_train, y_train = train[:, :-1], train[:, -1]
X_valid, y_valid = valid[:, :-1], valid[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]
feat_names = list(df_train.columns)
# Print how Survive7y are distribuited in each set
from collections import Counter
print(Counter(y_train))
print(Counter(y_valid))
print(Counter(y_test))

# All the numerical features that can be standardized
from utils import get_preprocess_std_num
preprocess_std = get_preprocess_std_num(feat_names)

# Preprocessed ready-to-use train and valid set
process_tmp = preprocess_std.fit(X_train)
X_train_std = process_tmp.transform(X_train)
X_valid_std = process_tmp.transform(X_valid)

#If you want to print the resulting df
# Note: You don't need to pass the _std to the train function. The function will call predict on the pipeline and transform the dataset accordingly to the transformer  
#df_scaled = pd.DataFrame(X_valid_std,columns = preprocess_std.get_feature_names_out())
#display(df_scaled)

6667
18
Counter({np.float64(1.0): 3494, np.float64(0.0): 505})
Counter({np.float64(1.0): 1165, np.float64(0.0): 169})
Counter({np.float64(1.0): 1165, np.float64(0.0): 169})


### Training


In [49]:
from functools import partial
from train import report, evaluate, train_and_evaluate
train_partial = partial(
    train_and_evaluate, 
    preprocess_std, 
    X_train=X_train,
    y_train=y_train,
    X_valid=X_valid,
    y_valid=y_valid,
    scoring="f1_macro", 
    iter=5000, 
    save=True,
    path_models = path_models,
    output_models = output_models
)

In [50]:
from sklearn.linear_model import LogisticRegression

hyperparams = hyperparameters["lr"] 
#Default is None (thus weight = 1). Balanced uses the formula n_samples / (n_classes * np.bincount(y))
model = LogisticRegression(class_weight="balanced")
train_partial(model=model, hyperparams=hyperparams, savename="lr")



Testing on training set:
              precision    recall  f1-score   support

         0.0      0.318     0.709     0.439       505
         1.0      0.949     0.780     0.856      3494

    accuracy                          0.771      3999
   macro avg      0.633     0.745     0.648      3999
weighted avg      0.869     0.771     0.804      3999

auc macro 0.824
confusion matrix
[[ 358  147]
 [ 768 2726]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.297     0.698     0.417       169
         1.0      0.946     0.761     0.843      1165

    accuracy                          0.753      1334
   macro avg      0.621     0.729     0.630      1334
weighted avg      0.863     0.753     0.789      1334

auc macro 0.827
confusion matrix
[[118  51]
 [279 886]]
Model rank: 1
Mean validation score: 0.648 (std: 0.023)
Parameters: {'model__C': 9, 'model__dual': True, 'model__max_iter': 66, 'model__penalty': 'l2', 'model__solver': 'liblinea

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,True
,tol,0.0001
,C,9
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,66


In [51]:
from sklearn.svm import SVC
hyperparams = hyperparameters["svc"] 

model = SVC(class_weight="balanced", probability=True)
train_partial(model=model, hyperparams=hyperparams, savename="svc")



Testing on training set:
              precision    recall  f1-score   support

         0.0      0.603     0.560     0.581       505
         1.0      0.937     0.947     0.942      3494

    accuracy                          0.898      3999
   macro avg      0.770     0.754     0.762      3999
weighted avg      0.895     0.898     0.896      3999

auc macro 0.934
confusion matrix
[[ 283  222]
 [ 186 3308]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.388     0.367     0.377       169
         1.0      0.909     0.916     0.912      1165

    accuracy                          0.846      1334
   macro avg      0.648     0.641     0.645      1334
weighted avg      0.843     0.846     0.845      1334

auc macro 0.744
confusion matrix
[[  62  107]
 [  98 1067]]
Model rank: 1
Mean validation score: 0.641 (std: 0.027)
Parameters: {'model__C': 443, 'model__coef0': np.float64(0.30539117137160443), 'model__degree': 9, 'model__gamma': 'au

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,443
,kernel,'poly'
,degree,9
,gamma,'auto'
,coef0,np.float64(0....9117137160443)
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,'balanced'


In [52]:
from sklearn.neighbors import KNeighborsClassifier

hyperparams = hyperparameters["knn"] 

model = KNeighborsClassifier()
train_partial(model=model, hyperparams=hyperparams, savename="knn")

Testing on training set:
              precision    recall  f1-score   support

         0.0      0.668     0.347     0.456       505
         1.0      0.912     0.975     0.942      3494

    accuracy                          0.896      3999
   macro avg      0.790     0.661     0.699      3999
weighted avg      0.881     0.896     0.881      3999

auc macro 0.906
confusion matrix
[[ 175  330]
 [  87 3407]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.531     0.308     0.390       169
         1.0      0.905     0.961     0.932      1165

    accuracy                          0.878      1334
   macro avg      0.718     0.634     0.661      1334
weighted avg      0.858     0.878     0.863      1334

auc macro 0.746
confusion matrix
[[  52  117]
 [  46 1119]]
Model rank: 1
Mean validation score: 0.624 (std: 0.031)
Parameters: {'model__algorithm': 'kd_tree', 'model__leaf_size': 21, 'model__n_neighbors': 6, 'model__weights': 'unifor

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_neighbors,6
,weights,'uniform'
,algorithm,'kd_tree'
,leaf_size,21
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [53]:
from sklearn.ensemble import RandomForestClassifier

hyperparams = hyperparameters["rf"] 

model = RandomForestClassifier()
train_partial(model=model, hyperparams=hyperparams, savename="rf")

Testing on training set:
              precision    recall  f1-score   support

         0.0      0.619     0.869     0.723       505
         1.0      0.980     0.923     0.950      3494

    accuracy                          0.916      3999
   macro avg      0.800     0.896     0.837      3999
weighted avg      0.934     0.916     0.922      3999

auc macro 0.969
confusion matrix
[[ 439   66]
 [ 270 3224]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.404     0.538     0.462       169
         1.0      0.930     0.885     0.907      1165

    accuracy                          0.841      1334
   macro avg      0.667     0.712     0.684      1334
weighted avg      0.863     0.841     0.850      1334

auc macro 0.825
confusion matrix
[[  91   78]
 [ 134 1031]]
Model rank: 1
Mean validation score: 0.687 (std: 0.036)
Parameters: {'model__class_weight': 'balanced_subsample', 'model__criterion': 'entropy', 'model__max_features': 'sqrt'

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_estimators,188
,criterion,'entropy'
,max_depth,
,min_samples_split,3
,min_samples_leaf,4
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [54]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

hyperparams = hyperparameters["adaboost"] 

model = AdaBoostClassifier()
train_partial(model=model, hyperparams=hyperparams, savename="adaboost")

Testing on training set:
              precision    recall  f1-score   support

         0.0      0.628     0.248     0.355       505
         1.0      0.900     0.979     0.938      3494

    accuracy                          0.886      3999
   macro avg      0.764     0.613     0.646      3999
weighted avg      0.866     0.886     0.864      3999

auc macro 0.827
confusion matrix
[[ 125  380]
 [  74 3420]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.547     0.207     0.300       169
         1.0      0.894     0.975     0.933      1165

    accuracy                          0.878      1334
   macro avg      0.721     0.591     0.617      1334
weighted avg      0.850     0.878     0.853      1334

auc macro 0.834
confusion matrix
[[  35  134]
 [  29 1136]]
Model rank: 1
Mean validation score: 0.632 (std: 0.028)
Parameters: {'model__learning_rate': np.float64(1.101803700250848), 'model__n_estimators': 77}

Model rank: 2
Mean val

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,estimator,
,n_estimators,77
,learning_rate,np.float64(1.101803700250848)
,algorithm,'deprecated'
,random_state,


In [55]:
from sklearn.neural_network import MLPClassifier
import random

hyperparams = hyperparameters["nn"] 

model = MLPClassifier()
train_partial(model=model, hyperparams=hyperparams, savename="nn")

Testing on training set:
              precision    recall  f1-score   support

         0.0      0.653     0.257     0.369       505
         1.0      0.901     0.980     0.939      3494

    accuracy                          0.889      3999
   macro avg      0.777     0.619     0.654      3999
weighted avg      0.870     0.889     0.867      3999

auc macro 0.839
confusion matrix
[[ 130  375]
 [  69 3425]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.657     0.272     0.385       169
         1.0      0.903     0.979     0.939      1165

    accuracy                          0.890      1334
   macro avg      0.780     0.626     0.662      1334
weighted avg      0.872     0.890     0.869      1334

auc macro 0.832
confusion matrix
[[  46  123]
 [  24 1141]]
Model rank: 1
Mean validation score: 0.659 (std: 0.016)
Parameters: {'model__alpha': np.float64(0.08373362495306058), 'model__early_stopping': True, 'model__hidden_layer_size

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,hidden_layer_sizes,"[275, 137]"
,activation,'relu'
,solver,'adam'
,alpha,np.float64(0....3362495306058)
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,np.float64(0....1147754185757)
,power_t,0.5
,max_iter,465
,shuffle,True


In [56]:
from sklearn.ensemble import GradientBoostingClassifier

hyperparams = hyperparameters["gb"] 

model = GradientBoostingClassifier()
train_partial(model=model, hyperparams=hyperparams, savename="gb")

Testing on training set:
              precision    recall  f1-score   support

         0.0      0.772     0.422     0.545       505
         1.0      0.922     0.982     0.951      3494

    accuracy                          0.911      3999
   macro avg      0.847     0.702     0.748      3999
weighted avg      0.903     0.911     0.900      3999

auc macro 0.881
confusion matrix
[[ 213  292]
 [  63 3431]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.531     0.302     0.385       169
         1.0      0.905     0.961     0.932      1165

    accuracy                          0.878      1334
   macro avg      0.718     0.632     0.659      1334
weighted avg      0.857     0.878     0.863      1334

auc macro 0.811
confusion matrix
[[  51  118]
 [  45 1120]]
Model rank: 1
Mean validation score: 0.652 (std: 0.031)
Parameters: {'model__learning_rate': np.float64(0.14180003010305958), 'model__max_depth': 4, 'model__max_features': No

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,loss,'log_loss'
,learning_rate,np.float64(0....0003010305958)
,n_estimators,84
,subsample,0.25
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,4
,min_impurity_decrease,0.0


In [57]:
#Don't run this in jupyter within vscode, run this with notebooks within browsers.
import os
#os.environ['KMP_DUPLICATE_LIB_OK']='True'

import xgboost as xgb

hyperparams = hyperparameters["xgb"] 

model = xgb.XGBClassifier(n_jobs=1)
train_partial(model=model, hyperparams=hyperparams, savename="xgb")

Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamma", "max_depth", "subsample" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "gamm

Testing on training set:
              precision    recall  f1-score   support

         0.0      0.502     0.547     0.523       505
         1.0      0.934     0.922     0.928      3494

    accuracy                          0.874      3999
   macro avg      0.718     0.734     0.725      3999
weighted avg      0.879     0.874     0.876      3999

auc macro 0.859
confusion matrix
[[ 276  229]
 [ 274 3220]]
Testing on validation set:
              precision    recall  f1-score   support

         0.0      0.427     0.503     0.462       169
         1.0      0.926     0.902     0.914      1165

    accuracy                          0.852      1334
   macro avg      0.677     0.703     0.688      1334
weighted avg      0.863     0.852     0.857      1334

auc macro 0.831
confusion matrix
[[  85   84]
 [ 114 1051]]
Model rank: 1
Mean validation score: 0.700 (std: 0.039)
Parameters: {'model__alpha': np.float64(0.3721392552382474), 'model__booster': 'dart', 'model__eta': np.float64(0.3067

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('stand', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,'dart'
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [58]:

mean_path = f"data/27features/"
df_train = pd.read_csv(f"{mean_path}train.csv")
df_valid = pd.read_csv(f"{mean_path}valid.csv")
df_test = pd.read_csv(f"{mean_path}test.csv")
sum_valid = 0
sum_test = 0
print(1 in df_valid.iloc[:,0].to_numpy())
for val in df_train.iloc[:,0].to_numpy():
    if val in df_valid.iloc[:,0].to_numpy():
        print(val)
        sum_valid +=1
    if val in df_test.iloc[:,0].to_numpy():
        sum_test +=1
print("#######################")
print(sum_valid)
print(sum_test)

False
#######################
0
0


In [59]:
import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score
import joblib

model_names_map = {
    'lr': 'Logistic Regression',
    'svc': 'Support Vector Machine',
    'knn': 'K-Nearest Neighbors',
    'rf': 'Random Forest',
    'adaboost': 'AdaBoost',
    'nn': 'Neural Network (MLP)',
    'gb': 'Gradient Boosting',
    'xgb': 'XGBoost'
}

results_summary = []

for model_key in ['lr', 'svc', 'knn', 'rf', 'adaboost', 'nn', 'gb', 'xgb']:
    model_name = model_names_map[model_key]
    model_path = f"{path_models}{model_key}.joblib"
    model = joblib.load(model_path)

    # VALIDATION set
    y_pred = model.predict(X_valid)
    y_proba = model.predict_proba(X_valid)

    report = classification_report(y_valid, y_pred, output_dict=True)

    # ✨ qui prendiamo la riga "macro avg"
    precision_macro = report['macro avg']['precision']
    recall_macro = report['macro avg']['recall']
    f1_macro = report['macro avg']['f1-score']

    # AUC "macro": nel binario è la stessa dell'AUC standard
    auc_macro = roc_auc_score(y_valid, y_proba[:, 1])

    results_summary.append({
        'Model': model_name,
        'Precision (macro)': f"{precision_macro:.3f}",
        'Recall (macro)': f"{recall_macro:.3f}",
        'F1-Score (macro)': f"{f1_macro:.3f}",
        'AUC (macro)': f"{auc_macro:.3f}",
    })

df_summary = pd.DataFrame(results_summary)

print(f"\n{'='*90}")
print(f"SUMMARY TABLE - VALIDATION SET ({n_features} features)")
print(f"{'='*90}\n")
print(df_summary.to_string(index=False))
print(f"\n{'='*90}\n")

output_path = f"figures/{n_features}features/models_summary_validation.csv"
df_summary.to_csv(output_path, index=False)
print(f"Table saved to: {output_path}")

# best by macro
best_f1_macro_idx = df_summary['F1-Score (macro)'].astype(float).idxmax()
best_auc_idx = df_summary['AUC (macro)'].astype(float).idxmax()
best_recall_idx = df_summary['Recall (macro)'].astype(float).idxmax()

print("\nBEST MODELS BY METRIC:")
print(f"  - Best F1-Score (macro): {df_summary.loc[best_f1_macro_idx, 'Model']} "
      f"({df_summary.loc[best_f1_macro_idx, 'F1-Score (macro)']})")
print(f"  - Best AUC (macro): {df_summary.loc[best_auc_idx, 'Model']} "
      f"({df_summary.loc[best_auc_idx, 'AUC (macro)']})")
print(f"  - Best Recall (macro): {df_summary.loc[best_recall_idx, 'Model']} "
      f"({df_summary.loc[best_recall_idx, 'Recall (macro)']})")



SUMMARY TABLE - VALIDATION SET (18 features)

                 Model Precision (macro) Recall (macro) F1-Score (macro) AUC (macro)
   Logistic Regression             0.621          0.729            0.630       0.827
Support Vector Machine             0.648          0.641            0.645       0.744
   K-Nearest Neighbors             0.718          0.634            0.661       0.746
         Random Forest             0.667          0.712            0.684       0.825
              AdaBoost             0.721          0.591            0.617       0.834
  Neural Network (MLP)             0.780          0.626            0.662       0.832
     Gradient Boosting             0.718          0.632            0.659       0.811
               XGBoost             0.677          0.703            0.688       0.831


Table saved to: figures/18features/models_summary_validation.csv

BEST MODELS BY METRIC:
  - Best F1-Score (macro): XGBoost (0.688)
  - Best AUC (macro): AdaBoost (0.834)
  - Best Recall

In [60]:
from auto_export_notebook import export_current_notebook


html_path = export_current_notebook(
    globals(),
    wait_for_disk_save=True,   # wait for Auto Save
    wait_timeout_sec=8.0
)
print("Exported to:", html_path)


<IPython.core.display.Javascript object>

Exported to: /home/ileniag/buzi_ml4cad_0/exported_notebooks/2_classifiers_18features_20251031_234821.html
