In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import os
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from scipy.stats import randint, uniform
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from scipy.stats import randint, uniform

In [2]:
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

In [3]:
data_path = "../data/processed/appendicitis_cleaned_data_v1s.csv"
df = pd.read_csv(data_path)
df.head()

Unnamed: 0,Age,BMI,Sex,Height,Weight,Diagnosis,Appendix_on_US,Migratory_Pain,Lower_Right_Abd_Pain,Contralateral_Rebound_Tenderness,...,CRP,Dysuria,Stool,Peritonitis,Psoas_Sign,US_Performed,US_Number,Free_Fluids,Age_Group,BMI_Category
0,12.68,16.9,female,148.0,37.0,appendicitis,yes,no,yes,yes,...,0.0,no,normal,no,yes,yes,882.0,no,Preteen,Underweight
1,14.1,31.9,male,147.0,69.5,no appendicitis,no,yes,yes,yes,...,3.0,yes,normal,no,yes,yes,883.0,no,Teenager,Obese
2,14.14,23.3,female,163.0,62.0,no appendicitis,no,no,yes,yes,...,3.0,no,constipation,no,yes,yes,884.0,no,Teenager,Normal
3,16.37,20.6,female,165.0,56.0,no appendicitis,no,yes,yes,no,...,0.0,yes,normal,no,yes,yes,886.0,no,Teenager,Normal
4,11.08,16.9,female,163.0,45.0,appendicitis,yes,no,yes,yes,...,0.0,no,constipation,no,yes,yes,887.0,no,Preteen,Underweight


In [4]:
df.duplicated().sum()

np.int64(0)

In [5]:
df.shape

(589, 30)

In [6]:
df.isnull().sum().sum()

np.int64(0)

## Before any preprocessing, lets divide the data

In [7]:
X = df.drop(columns=["Diagnosis", "BMI", "Age"], axis=1)
y_raw = df["Diagnosis"]

In [8]:
le = LabelEncoder()
y = le.fit_transform(y_raw)
joblib.dump(le, "../artifacts/label_encoder.joblib")

['../artifacts/label_encoder.joblib']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
)

In [10]:
print("Classes (original):", list(le.classes_))
print("Train size:", X_train.shape, "Test size:", X_test.shape)

Classes (original): ['appendicitis', 'no appendicitis']
Train size: (471, 27) Test size: (118, 27)


## Preprocessing pipeline

In [11]:
num_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_features = X.select_dtypes(include=["object"]).columns.tolist()

print("Numerical features:", num_features)
print("Categorical features:", cat_features)

Numerical features: ['Height', 'Weight', 'Body_Temperature', 'WBC_Count', 'RBC_Count', 'Hemoglobin', 'RDW', 'Thrombocyte_Count', 'CRP', 'US_Number']
Categorical features: ['Sex', 'Appendix_on_US', 'Migratory_Pain', 'Lower_Right_Abd_Pain', 'Contralateral_Rebound_Tenderness', 'Coughing_Pain', 'Nausea', 'Loss_of_Appetite', 'Neutrophilia', 'Dysuria', 'Stool', 'Peritonitis', 'Psoas_Sign', 'US_Performed', 'Free_Fluids', 'Age_Group', 'BMI_Category']


In [12]:
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, num_features),
        ('cat', categorical_transformer, cat_features)
    ]
)



In [13]:
joblib.dump(preprocessor, "../mlflow/artifacts/prepocessor.joblib")

['../mlflow/artifacts/prepocessor.joblib']

In [14]:
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

print("Preprocessed train shape:", X_train_preprocessed.shape)

Preprocessed train shape: (471, 49)


## Model pipelines

#### Logistic Regression 

In [15]:
lr_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", LogisticRegression(
        penalty="l1", solver="saga", max_iter=5000, random_state=RANDOM_STATE
    ))
])

In [16]:
rf_pipeline = Pipeline(steps=[
("preprocessor", preprocessor),
("model", RandomForestClassifier(random_state=RANDOM_STATE))
])

In [17]:
xgb_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", XGBClassifier(
        eval_metric="logloss", random_state=RANDOM_STATE
    ))
])

In [18]:
# quick sanity check
for name, pipe in [
    ("Random Forest", rf_pipeline),
    ("Logistic Regression", lr_pipeline)
]:
    pipe.fit(X_train, y_train)
    acc = pipe.score(X_test, y_test)
    print(f"{name} test accuracy: {acc:.3f}")

Random Forest test accuracy: 0.805
Logistic Regression test accuracy: 0.822


In [19]:
joblib.dump(rf_pipeline, "../mlflow/artifacts/rf_pipeline_base.joblib")
joblib.dump(lr_pipeline, "../mlflow/artifacts/lr_pipeline_base.joblib")
joblib.dump(xgb_pipeline, "../mlflow/artifacts/xgb_pipeline_base.joblib")

['../mlflow/artifacts/xgb_pipeline_base.joblib']

### Cross-validation and hyperparameter search

In [20]:
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

In [21]:
rf_param_dist = {
    "model__n_estimators": randint(100, 500),
    "model__max_depth": randint(3, 20),
    "model__min_samples_split": randint(2, 10),
    "model__min_samples_leaf": randint(1, 5),
    "model__bootstrap": [True, False],
    "model__max_features": [None, 'sqrt', 'log2']
}

In [22]:
lr_param_dist = {
    "model__C": uniform(0.01, 10),
    "model__fit_intercept": [True, False]
}

In [23]:
xgb_param_dist = {
    "model__n_estimators": randint(100, 500),
    "model__max_depth": randint(3, 20),
    "model__learning_rate": uniform(0.01, 0.3),
    "model__subsample": uniform(0.6, 0.4),
    "model__colsample_bytree": uniform(0.6, 0.4),
    "model__gamma": uniform(0, 0.4)
}

In [24]:
search_config = {
    "cv": cv_strategy,
    "n_iter": 20,
    "scoring": "recall",
    "n_jobs": -1,
    "verbose": 2,
    "random_state": 42
}

In [25]:
rf_search = RandomizedSearchCV(rf_pipeline, rf_param_dist, **search_config)
lr_search = RandomizedSearchCV(lr_pipeline, lr_param_dist, **search_config)
xgb_search = RandomizedSearchCV(xgb_pipeline, xgb_param_dist, **search_config)

In [26]:
rf_search.fit(X_train, y_train)
print("Best RF params:", rf_search.best_params_)
print("Best RF CV accuracy:", rf_search.best_score_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[CV] END model__bootstrap=True, model__max_depth=17, model__max_features=log2, model__min_samples_leaf=4, model__min_samples_split=6, model__n_estimators=120; total time=   0.4s
[CV] END model__bootstrap=True, model__max_depth=17, model__max_features=log2, model__min_samples_leaf=4, model__min_samples_split=6, model__n_estimators=120; total time=   0.3s
[CV] END model__bootstrap=True, model__max_depth=17, model__max_features=log2, model__min_samples_leaf=4, model__min_samples_split=6, model__n_estimators=120; total time=   0.3s
[CV] END model__bootstrap=True, model__max_depth=13, model__max_features=log2, model__min_samples_leaf=4, model__min_samples_split=6, model__n_estimators=199; total time=   0.5s
[CV] END model__bootstrap=True, model__max_depth=13, model__max_features=log2, model__min_samples_leaf=4, model__min_samples_split=6, model__n_estimators=199; total time=   0.5s
[CV] END model__bootstrap=True, model__max_depth=17, model__max_features=log2, model__min_samples_leaf=4, mode

In [27]:
lr_search.fit(X_train, y_train)
print("Best LR params:", lr_search.best_params_)
print("Best LR CV accuracy:", lr_search.best_score_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] END model__C=1.844347898661638, model__fit_intercept=False; total time=   0.3s
[CV] END model__C=1.844347898661638, model__fit_intercept=False; total time=   0.3s
[CV] END model__C=1.844347898661638, model__fit_intercept=False; total time=   0.4s
[CV] END model__C=3.7554011884736247, model__fit_intercept=True; total time=   0.4s
[CV] END model__C=1.844347898661638, model__fit_intercept=False; total time=   0.4s
[CV] END model__C=1.844347898661638, model__fit_intercept=False; total time=   0.4s
[CV] END model__C=4.468327528535911, model__fit_intercept=True; total time=   0.4s
[CV] END model__C=3.7554011884736247, model__fit_intercept=True; total time=   0.4s
[CV] END model__C=3.7554011884736247, model__fit_intercept=True; total time=   0.5s
[CV] END model__C=4.468327528535911, model__fit_intercept=True; total time=   0.5s
[CV] END model__C=3.7554011884736247, model__fit_intercept=True; total time=   0.5s
[CV] END model__

In [None]:
xgb_search.fit(X_train, y_train)
print("Best XGB params:", xgb_search.best_params_)
print("Best XGB CV accuracy:", xgb_search.best_score_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] END model__colsample_bytree=0.749816047538945, model__gamma=0.3802857225639665, model__learning_rate=0.22959818254342154, model__max_depth=9, model__n_estimators=221, model__subsample=0.662397808134481; total time=   0.1s
[CV] END model__colsample_bytree=0.749816047538945, model__gamma=0.3802857225639665, model__learning_rate=0.22959818254342154, model__max_depth=9, model__n_estimators=221, model__subsample=0.662397808134481; total time=   0.1s
[CV] END model__colsample_bytree=0.749816047538945, model__gamma=0.3802857225639665, model__learning_rate=0.22959818254342154, model__max_depth=9, model__n_estimators=221, model__subsample=0.662397808134481; total time=   0.1s
[CV] END model__colsample_bytree=0.749816047538945, model__gamma=0.3802857225639665, model__learning_rate=0.22959818254342154, model__max_depth=9, model__n_estimators=221, model__subsample=0.662397808134481; total time=   0.1s
[CV] END model__colsample_bytr

In [None]:
for name, search in [
    ("Random Forest", rf_search),
    ("Logistic Regression", lr_search), 
    ("XGBoost", xgb_search)
]:
    y_pred = search.best_estimator_.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n{name} - Test Accuracy: {acc:.3f}")
    print(classification_report(y_test, y_pred))


Random Forest - Test Accuracy: 0.746
              precision    recall  f1-score   support

           0       0.78      0.78      0.78        67
           1       0.71      0.71      0.71        51

    accuracy                           0.75       118
   macro avg       0.74      0.74      0.74       118
weighted avg       0.75      0.75      0.75       118


Logistic Regression - Test Accuracy: 0.822
              precision    recall  f1-score   support

           0       0.82      0.88      0.85        67
           1       0.83      0.75      0.78        51

    accuracy                           0.82       118
   macro avg       0.82      0.81      0.82       118
weighted avg       0.82      0.82      0.82       118


XGBoost - Test Accuracy: 0.839
              precision    recall  f1-score   support

           0       0.84      0.88      0.86        67
           1       0.83      0.78      0.81        51

    accuracy                           0.84       118
   macro avg  

In [None]:
joblib.dump(rf_search.best_estimator_, "../mlflow/artifacts/rf_best_model.joblib")
joblib.dump(lr_search.best_estimator_, "../mlflow/artifacts/lr_best_model.joblib")
joblib.dump(xgb_search.best_estimator_, "../mlflow/artifacts/xgb_best_model.joblib")

['../mlflow/artifacts/xgb_best_model.joblib']

 ## MLFlow model saving

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [None]:
mlflow.set_experiment("Appendicitis_Diagnosis_Models")

2025/11/11 21:43:29 INFO mlflow.tracking.fluent: Experiment with name 'Appendicitis_Diagnosis_Models' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///home/tux/ml_proyecto_final/notebooks/mlruns/259622151298174711', creation_time=1762915409955, experiment_id='259622151298174711', last_update_time=1762915409955, lifecycle_stage='active', name='Appendicitis_Diagnosis_Models', tags={}>

In [None]:
def evaluate_and_log_model(model_name, model_pipeline, X_train, X_test, y_train, y_test, label_encoder):
    with mlflow.start_run(run_name=model_name):
        # Fit model
        model_pipeline.fit(X_train, y_train)
        
        # Predictions
        y_pred = model_pipeline.predict(X_test)
        y_prob = model_pipeline.predict_proba(X_test)[:, 1]
        
        # Metrics
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_prob)
        
        # Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("roc_auc", roc_auc)
        
        # Log parameters
        mlflow.log_params(model_pipeline.get_params())
        
        # Confusion matrix visualization
        cm = confusion_matrix(y_test, y_pred)
        cm_labels = label_encoder.classes_
        fig, ax = plt.subplots(figsize=(5, 4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", 
                    xticklabels=cm_labels, yticklabels=cm_labels, ax=ax)
        ax.set_xlabel("Predicted")
        ax.set_ylabel("Actual")
        ax.set_title(f"{model_name} - Confusion Matrix")
        fig.tight_layout()
        
        # Save figure as artifact
        fig_path = f"../mlflow/artifacts/{model_name}_confusion_matrix.png"
        plt.savefig(fig_path)
        mlflow.log_artifact(fig_path)
        plt.close(fig)
        
        # Log model
        input_example = X_train.iloc[:3]
        mlflow.sklearn.log_model(
            sk_model=model_pipeline, 
            name=model_name, 
            input_example=input_example)
        
        print(f"{model_name} logged successfully → Recall: {rec:.3f}, ROC-AUC: {roc_auc:.3f}")


In [None]:
evaluate_and_log_model("RandomForest", rf_search.best_estimator_, X_train, X_test, y_train, y_test, le)
evaluate_and_log_model("Logistic_Lasso", lr_search.best_estimator_, X_train, X_test, y_train, y_test, le)
evaluate_and_log_model("XGBoost", xgb_search.best_estimator_, X_train, X_test, y_train, y_test, le)




RandomForest logged successfully → Recall: 0.706, ROC-AUC: 0.838




Logistic_Lasso logged successfully → Recall: 0.745, ROC-AUC: 0.908


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost logged successfully → Recall: 0.784, ROC-AUC: 0.936
