
# Neural Network Pipeline (PyTorch + scikit-learn + MLflow)

This notebook organizes  end-to-end pipeline:
1. Setup & Imports  
2. Config  
3. Model Definitions (NN, EarlyStopping)  
4. MLflow Setup  
5. Load Data  
6. Preprocess / Split / Scale (+ optional SMOTE)  
7. DataLoaders  
8. Build Model  
9. Train  
10. Evaluate + Plots  
11. Baseline Models & Comparison  
12. Utilities (plotting helpers)  



## 1) Setup & Imports

In [110]:

import os, warnings, json, pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings("ignore")

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, PowerTransformer
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                             confusion_matrix, roc_auc_score, roc_curve,
                             precision_recall_curve, average_precision_score,
                             mean_squared_error, mean_absolute_error, r2_score)

from sklearn.ensemble import (RandomForestClassifier, GradientBoostingClassifier)

from sklearn.linear_model import LogisticRegression

# Optional libraries

try:
    import catboost as cb; CATBOOST_AVAILABLE = True
except Exception:
    CATBOOST_AVAILABLE = False

#logistic regression
from sklearn.linear_model import LogisticRegression as lr

#lightgbm
try:
    import lightgbm as lgb; LIGHTGBM_AVAILABLE = True
except Exception:
    LIGHTGBM_AVAILABLE = False



# MLflow
import mlflow, mlflow.pytorch, mlflow.sklearn
from mlflow.models.signature import infer_signature

# Other
from scipy import stats
from imblearn.over_sampling import SMOTE

# Reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Paths / Device
OUTPUTS_DIR = "./outputs_nn"
os.makedirs(OUTPUTS_DIR, exist_ok=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("Device:", DEVICE)
print("CatBoost available:", CATBOOST_AVAILABLE)
print("Logistic Regression available:", lr)

Device: cpu
CatBoost available: True
Logistic Regression available: <class 'sklearn.linear_model._logistic.LogisticRegression'>


## 2) Config (hyperparameters & switches)

In [111]:

class Config:
    EXPERIMENT_NAME = "Neural_Network_Orthopedic_Classification"
    TRACKING_URI = "sqlite:///mlflow_nn.db"
    ARTIFACT_ROOT = "./mlruns_nn"
    OUTPUTS_DIR = OUTPUTS_DIR

    RANDOM_STATE = 42
    TEST_SIZE = 0.2
    VALIDATION_SIZE = 0.2

    HIDDEN_LAYERS = [64, 32, 16]
    DROPOUT_RATE = 0.2
    BATCH_NORM = True
    ACTIVATION = "relu"

    BATCH_SIZE = 128
    MAX_EPOCHS = 200
    LEARNING_RATE = 1e-3
    WEIGHT_DECAY = 1e-5
    PATIENCE = 20

    DEVICE = DEVICE
    TASK_TYPE = "classification"  
    USE_SMOTE = True
    CLASS_WEIGHTS = True

cfg = Config()
cfg.__dict__


{}

## 3) Model Definitions (NN, EarlyStopping)

In [112]:

class FeedforwardNeuralNetwork(nn.Module):
    """Feedforward Neural Network with customizable architecture"""
    def __init__(self, input_size, hidden_layers, output_size,
                 dropout_rate=0.2, batch_norm=True, activation="relu", task_type="classification"):
        super().__init__()
        self.task_type = task_type
        self.activation = activation
        self.batch_norm = batch_norm

        if activation == "relu":
            act = nn.ReLU()
        elif activation == "tanh":
            act = nn.Tanh()
        elif activation == "sigmoid":
            act = nn.Sigmoid()
        elif activation == "leaky_relu":
            act = nn.LeakyReLU(0.01)
        else:
            act = nn.ReLU()

        layers = []
        sizes = [input_size] + hidden_layers + [output_size]
        for i in range(len(sizes) - 1):
            layers.append(nn.Linear(sizes[i], sizes[i+1]))
            if i < len(sizes) - 2:
                if batch_norm:
                    layers.append(nn.BatchNorm1d(sizes[i+1]))
                layers.append(act)
                if dropout_rate > 0:
                    layers.append(nn.Dropout(dropout_rate))
        self.network = nn.Sequential(*layers)
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if self.activation == "relu":
                    nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                else:
                    nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        return self.network(x)

class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, restore_best_weights=True):
        self.patience = patience; self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_loss = None; self.counter = 0
        self.best_weights = None; self.early_stop = False

    def __call__(self, val_loss, model):
        if self.best_loss is None or val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss; self.counter = 0
            self.best_weights = model.state_dict().copy()
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                if self.restore_best_weights:
                    model.load_state_dict(self.best_weights)


## 4) MLflow Setup

In [113]:

mlflow.set_tracking_uri(cfg.TRACKING_URI)
try:
    exp_id = mlflow.create_experiment(cfg.EXPERIMENT_NAME, artifact_location=cfg.ARTIFACT_ROOT)
except mlflow.exceptions.MlflowException:
    exp = mlflow.get_experiment_by_name(cfg.EXPERIMENT_NAME)
    exp_id = exp.experiment_id if exp else None
mlflow.set_experiment(cfg.EXPERIMENT_NAME)
exp_id

'1'

## 5) Load Data

In [114]:

# Ensure the CSV is present in the working directory
DATA_PATH = "column_3C_processed.csv"
df = pd.read_csv(DATA_PATH)
print(df.shape)
df.head()


(310, 8)


Unnamed: 0,pelvic_tilt,sacral_slope,lumbar_lordosis_angle,pelvic_radius,pi_ss_ratio,class,binary_class,degree_spondylolisthesis_PowerTransformer
0,22.552586,40.475232,39.609117,98.672917,1.557195,Hernia,Abnormal,-0.267585
1,10.060991,28.99596,25.015378,114.405425,1.346979,Hernia,Abnormal,2.922868
2,22.218482,46.613539,50.092194,105.985135,1.476653,Hernia,Abnormal,-5.347396
3,24.652878,44.64413,44.311238,101.868495,1.552209,Hernia,Abnormal,5.581202
4,9.652075,40.060784,28.317406,108.168725,1.240936,Hernia,Abnormal,4.373008


## 6) Preprocess / Split / Scale (+ optional SMOTE)

In [115]:


num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
X = df[num_cols]
y = df['binary_class']  # adjust if your target differs

le = None
if cfg.TASK_TYPE == "classification":
    le = LabelEncoder()
    y = le.fit_transform(y)

# Split
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=cfg.TEST_SIZE, random_state=cfg.RANDOM_STATE,
    stratify=y if cfg.TASK_TYPE == "classification" else None
)

val_size_adj = cfg.VALIDATION_SIZE / (1 - cfg.TEST_SIZE)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=val_size_adj, random_state=cfg.RANDOM_STATE,
    stratify=y_temp if cfg.TASK_TYPE == "classification" else None
)

# Optional SMOTE
if cfg.TASK_TYPE == "classification" and cfg.USE_SMOTE:
    sm = SMOTE(random_state=cfg.RANDOM_STATE)
    X_train, y_train = sm.fit_resample(X_train, y_train)

# Scale
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)

X_train_s.shape, X_val_s.shape, X_test_s.shape


((252, 6), (62, 6), (62, 6))

## 7) DataLoaders

In [116]:

to_tensor = torch.FloatTensor
Xtr_t = to_tensor(X_train_s)
Xva_t = to_tensor(X_val_s)
Xte_t = to_tensor(X_test_s)

if cfg.TASK_TYPE == "classification":
    ytr_t = torch.LongTensor(y_train)
    yva_t = torch.LongTensor(y_val)
    yte_t = torch.LongTensor(y_test)
else:
    ytr_t = to_tensor(y_train)
    yva_t = to_tensor(y_val)
    yte_t = to_tensor(y_test)

train_loader = DataLoader(TensorDataset(Xtr_t, ytr_t), batch_size=cfg.BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(TensorDataset(Xva_t, yva_t), batch_size=cfg.BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(TensorDataset(Xte_t, yte_t), batch_size=cfg.BATCH_SIZE, shuffle=False)

len(train_loader), len(val_loader), len(test_loader)

(2, 1, 1)

## 8) Build Model

In [117]:

input_size  = X_train_s.shape[1]
output_size = (len(np.unique(y_train)) if cfg.TASK_TYPE == "classification" else 1)

model = FeedforwardNeuralNetwork(
    input_size=input_size,
    hidden_layers=cfg.HIDDEN_LAYERS,
    output_size=output_size,
    dropout_rate=cfg.DROPOUT_RATE,
    batch_norm=cfg.BATCH_NORM,
    activation=cfg.ACTIVATION,
    task_type=cfg.TASK_TYPE
).to(cfg.DEVICE)

model

FeedforwardNeuralNetwork(
  (network): Sequential(
    (0): Linear(in_features=6, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=32, out_features=16, bias=True)
    (9): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=16, out_features=2, bias=True)
  )
)

## 9) Train

In [118]:

# Loss
if cfg.TASK_TYPE == "classification":
    if cfg.CLASS_WEIGHTS:
        # Compute balanced class weights from training labels
        from sklearn.utils.class_weight import compute_class_weight
        classes = np.unique(y_train)
        weights = compute_class_weight('balanced', classes=classes, y=y_train)
        class_weights = torch.tensor(weights, dtype=torch.float, device=cfg.DEVICE)
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        print("Class weights:", weights)
    else:
        criterion = nn.CrossEntropyLoss()
else:
    criterion = nn.MSELoss()

# Optimizer / Scheduler / Early stopping
optimizer = optim.Adam(model.parameters(), lr=cfg.LEARNING_RATE, weight_decay=cfg.WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)
early = EarlyStopping(patience=cfg.PATIENCE, min_delta=1e-3)

train_losses, val_losses = [], []

with mlflow.start_run(run_name="neural_network_training"):
    # Log config
    for k, v in cfg.__dict__.items():
        if not k.startswith("_"):
            mlflow.log_param(k, v)

    for epoch in range(cfg.MAX_EPOCHS):
        model.train()
        tr_loss, tr_correct, tr_total = 0.0, 0, 0
        for xb, yb in train_loader:
            xb, yb = xb.to(cfg.DEVICE), yb.to(cfg.DEVICE)
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb if cfg.TASK_TYPE == "classification" else yb.squeeze())
            loss.backward()
            optimizer.step()

            tr_loss += loss.item()
            tr_total += yb.size(0)
            if cfg.TASK_TYPE == "classification":
                tr_correct += out.argmax(1).eq(yb).sum().item()

        model.eval()
        va_loss, va_correct, va_total = 0.0, 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(cfg.DEVICE), yb.to(cfg.DEVICE)
                out = model(xb)
                loss = criterion(out, yb if cfg.TASK_TYPE == "classification" else yb.squeeze())
                va_loss += loss.item()
                va_total += yb.size(0)
                if cfg.TASK_TYPE == "classification":
                    va_correct += out.argmax(1).eq(yb).sum().item()

        tr_loss /= len(train_loader); va_loss /= len(val_loader)
        train_losses.append(tr_loss); val_losses.append(va_loss)
        scheduler.step(va_loss); early(va_loss, model)

        if cfg.TASK_TYPE == "classification":
            tr_acc = 100. * tr_correct / tr_total
            va_acc = 100. * va_correct / va_total
            if epoch % 10 == 0:
                print(f"Epoch {epoch:03d}  loss {tr_loss:.4f}/{va_loss:.4f}  acc {tr_acc:.2f}/{va_acc:.2f}")
            mlflow.log_metric("train_accuracy", tr_acc, step=epoch)
            mlflow.log_metric("val_accuracy", va_acc, step=epoch)
        else:
            if epoch % 10 == 0:
                print(f"Epoch {epoch:03d}  loss {tr_loss:.4f}/{va_loss:.4f}")

        mlflow.log_metric("train_loss", tr_loss, step=epoch)
        mlflow.log_metric("val_loss", va_loss, step=epoch)

        if early.early_stop:
            print(f"Early stopping at epoch {epoch}")
            mlflow.log_param("early_stopped_epoch", epoch)
            break

    # Save training curves
    plt.figure(figsize=(6,4))
    plt.plot(train_losses, label='Train')
    plt.plot(val_losses, label='Val')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Training History'); plt.legend(); plt.grid(True)
    os.makedirs(cfg.OUTPUTS_DIR, exist_ok=True)
    hist_path = os.path.join(cfg.OUTPUTS_DIR, 'training_history.png')
    plt.savefig(hist_path, dpi=300, bbox_inches='tight'); plt.close()
    mlflow.log_artifact(hist_path)

    # Log model
    sig = infer_signature(np.asarray(X_train_s), np.asarray(y_train))
    mlflow.pytorch.log_model(model, "neural_network_model", signature=sig)


Class weights: [1. 1.]
Epoch 000  loss 1.0546/0.9813  acc 52.78/37.10
Epoch 010  loss 0.6205/0.5688  acc 69.84/74.19
Epoch 020  loss 0.5016/0.4616  acc 74.21/82.26
Epoch 030  loss 0.4227/0.4167  acc 80.16/79.03
Epoch 040  loss 0.3833/0.3938  acc 82.14/79.03
Epoch 050  loss 0.3741/0.3780  acc 83.73/80.65
Epoch 060  loss 0.3518/0.3629  acc 84.92/82.26
Epoch 070  loss 0.3591/0.3476  acc 84.92/82.26
Epoch 080  loss 0.3168/0.3397  acc 86.90/83.87
Epoch 090  loss 0.2933/0.3333  acc 86.51/85.48
Epoch 100  loss 0.3049/0.3337  acc 86.11/87.10
Early stopping at epoch 106




## 10) Evaluate + Plots

In [119]:

model.eval()
preds, targets, probas = [], [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(cfg.DEVICE)
        out = model(xb)
        if cfg.TASK_TYPE == "classification":
            preds.extend(out.argmax(1).cpu().numpy())
            probas.extend(F.softmax(out, dim=1).cpu().numpy())
            targets.extend(yb.numpy())
        else:
            preds.extend(out.squeeze().cpu().numpy())
            targets.extend(yb.numpy())

if cfg.TASK_TYPE == "classification":
    accuracy = accuracy_score(targets, preds)
    precision = precision_score(targets, preds, average='weighted')
    recall = recall_score(targets, preds, average='weighted')
    f1 = f1_score(targets, preds, average='weighted')

    if len(np.unique(targets)) == 2:
        roc_auc = roc_auc_score(targets, np.array(probas)[:, 1])
        avg_precision = average_precision_score(targets, np.array(probas)[:, 1])
    else:
        roc_auc = roc_auc_score(targets, np.array(probas), multi_class='ovr')
        avg_precision = None

    print("NN Test Metrics:")
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1: {f1:.4f}")
    print(f"  ROC-AUC: {roc_auc:.4f}")
    if avg_precision is not None:
        print(f"  Avg Precision: {avg_precision:.4f}")

    # Confusion matrix
    cm = confusion_matrix(targets, preds)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix - NN'); plt.xlabel('Pred'); plt.ylabel('True')
    cm_path = os.path.join(cfg.OUTPUTS_DIR, 'nn_confusion_matrix.png')
    plt.savefig(cm_path, dpi=300, bbox_inches='tight'); plt.close()

    # ROC/PR (binary)
    if len(np.unique(targets)) == 2:
        from sklearn.metrics import roc_curve, precision_recall_curve
        fpr, tpr, _ = roc_curve(targets, np.array(probas)[:, 1])
        precision_curve, recall_curve, _ = precision_recall_curve(targets, np.array(probas)[:, 1])

        plt.figure(figsize=(5,4))
        plt.plot(fpr, tpr); plt.plot([0,1],[0,1],'k--')
        plt.xlabel('FPR'); plt.ylabel('TPR'); plt.title(f'ROC (AUC={roc_auc:.3f})')
        roc_path = os.path.join(cfg.OUTPUTS_DIR, 'nn_roc.png')
        plt.savefig(roc_path, dpi=300, bbox_inches='tight'); plt.close()

        plt.figure(figsize=(5,4))
        plt.plot(recall_curve, precision_curve)
        plt.xlabel('Recall'); plt.ylabel('Precision'); plt.title(f'PR (AP={avg_precision:.3f})')
        pr_path = os.path.join(cfg.OUTPUTS_DIR, 'nn_pr.png')
        plt.savefig(pr_path, dpi=300, bbox_inches='tight'); plt.close()

    with mlflow.start_run(run_name="neural_network_evaluation"):
        for k,v in dict(accuracy=accuracy, precision=precision, recall=recall, f1=f1, roc_auc=roc_auc).items():
            mlflow.log_metric(f"test_{k}", float(v))
        if avg_precision is not None:
            mlflow.log_metric("test_avg_precision", float(avg_precision))
        # Log artifacts
        mlflow.log_artifact(cm_path)
        if len(np.unique(targets)) == 2:
            mlflow.log_artifact(roc_path); mlflow.log_artifact(pr_path)

else:
    mse = mean_squared_error(targets, preds)
    mae = mean_absolute_error(targets, preds)
    rmse = np.sqrt(mse)
    r2 = r2_score(targets, preds)
    print("NN Test Metrics:")
    print(f"  MSE: {mse:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  R2: {r2:.4f}")

    # Plots
    plt.figure(figsize=(5,4))
    plt.scatter(targets, preds, alpha=0.6)
    lims = [min(targets+preds), max(targets+preds)]
    plt.plot(lims, lims, 'k--')
    plt.xlabel('Actual'); plt.ylabel('Predicted'); plt.title('Pred vs Actual')
    pva_path = os.path.join(cfg.OUTPUTS_DIR, 'nn_pred_vs_actual.png')
    plt.savefig(pva_path, dpi=300, bbox_inches='tight'); plt.close()

    residuals = np.array(targets) - np.array(preds)
    plt.figure(figsize=(5,4))
    plt.scatter(preds, residuals, alpha=0.6); plt.axhline(0, ls='--', c='k')
    plt.xlabel('Predicted'); plt.ylabel('Residuals'); plt.title('Residuals')
    res_path = os.path.join(cfg.OUTPUTS_DIR, 'nn_residuals.png')
    plt.savefig(res_path, dpi=300, bbox_inches='tight'); plt.close()

    with mlflow.start_run(run_name="neural_network_evaluation"):
        for k,v in dict(mse=mse, mae=mae, rmse=rmse, r2=r2).items():
            mlflow.log_metric(f"test_{k}", float(v))
        mlflow.log_artifact(pva_path); mlflow.log_artifact(res_path)

NN Test Metrics:
  Accuracy: 0.8548
  Precision: 0.8731
  Recall: 0.8548
  F1: 0.8583
  ROC-AUC: 0.9548
  Avg Precision: 0.9285


## 11) Baseline Models & Comparison

In [120]:

comparison = {}

def fit_and_eval_baseline(model, name):
    model.fit(X_train_s, y_train)
    y_pred = model.predict(X_test_s)

    if cfg.TASK_TYPE == "classification":
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted')
        rec = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        roc = None
        if hasattr(model, "predict_proba"):
            proba = model.predict_proba(X_test_s)
            if len(np.unique(y_test)) == 2:
                roc = roc_auc_score(y_test, proba[:,1])
            else:
                roc = roc_auc_score(y_test, proba, multi_class='ovr')
        comparison[name] = dict(accuracy=acc, precision=prec, recall=rec, f1_score=f1, roc_auc=roc)
    else:
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        rmse = np.sqrt(mse)
        comparison[name] = dict(mse=mse, mae=mae, r2_score=r2, rmse=rmse)

# Random Forest & Gradient Boosting
if cfg.TASK_TYPE == "classification":
    fit_and_eval_baseline(RandomForestClassifier(n_estimators=100, random_state=cfg.RANDOM_STATE), "Random Forest")
    fit_and_eval_baseline(GradientBoostingClassifier(n_estimators=100, random_state=cfg.RANDOM_STATE), "Gradient Boosting")
else:
    fit_and_eval_baseline(RandomForestRegressor(n_estimators=100, random_state=cfg.RANDOM_STATE), "Random Forest")
    fit_and_eval_baseline(GradientBoostingRegressor(n_estimators=100, random_state=cfg.RANDOM_STATE), "Gradient Boosting")

# LightGBM
if LIGHTGBM_AVAILABLE:
    if cfg.TASK_TYPE == "classification":
        fit_and_eval_baseline(lgb.LGBMClassifier(n_estimators=100, random_state=cfg.RANDOM_STATE, verbose=-1), "LightGBM")
    else:
        fit_and_eval_baseline(lgb.LGBMRegressor(n_estimators=100, random_state=cfg.RANDOM_STATE, verbose=-1), "LightGBM")

# CatBoost
if CATBOOST_AVAILABLE:
    if cfg.TASK_TYPE == "classification":
        fit_and_eval_baseline(cb.CatBoostClassifier(n_estimators=100, random_state=cfg.RANDOM_STATE, verbose=False), "CatBoost")
    else:
        fit_and_eval_baseline(cb.CatBoostRegressor(n_estimators=100, random_state=cfg.RANDOM_STATE, verbose=False), "CatBoost")

#LogisticRegression


if cfg.TASK_TYPE == "classification":
    fit_and_eval_baseline(LogisticRegression(max_iter=1000, random_state=cfg.RANDOM_STATE), "Logistic Regression")
else:
    warnings.warn("Logistic Regression is not suitable for regression tasks.")

comparison_df = pd.DataFrame(comparison).T.round(2)

comparison_df


Unnamed: 0,accuracy,precision,recall,f1_score,roc_auc
Random Forest,0.77,0.78,0.77,0.78,0.91
Gradient Boosting,0.82,0.84,0.82,0.83,0.9
LightGBM,0.82,0.83,0.82,0.82,0.9
CatBoost,0.79,0.8,0.79,0.79,0.91
Logistic Regression,0.84,0.85,0.84,0.84,0.92


### Comparison Plot

In [121]:

if comparison:
    models = list(comparison.keys())
    if cfg.TASK_TYPE == "classification":
        accs = [comparison[m]['accuracy'] for m in models]
        f1s  = [comparison[m]['f1_score'] for m in models]

        plt.figure(figsize=(6,4))
        plt.bar(models, accs)
        plt.title('Accuracy Comparison'); plt.ylabel('Accuracy'); plt.xticks(rotation=30, ha='right')
        comp_acc_path = os.path.join(cfg.OUTPUTS_DIR, 'comparison_accuracy.png')
        plt.savefig(comp_acc_path, dpi=300, bbox_inches='tight'); plt.close()

        plt.figure(figsize=(6,4))
        plt.bar(models, f1s)
        plt.title('F1-Score Comparison'); plt.ylabel('F1'); plt.xticks(rotation=30, ha='right')
        comp_f1_path = os.path.join(cfg.OUTPUTS_DIR, 'comparison_f1.png')
        plt.savefig(comp_f1_path, dpi=300, bbox_inches='tight'); plt.close()

    else:
        r2s  = [comparison[m]['r2_score'] for m in models]
        rmses = [comparison[m]['rmse'] for m in models]

        plt.figure(figsize=(6,4))
        plt.bar(models, r2s)
        plt.title('R2 Comparison'); plt.ylabel('R2'); plt.xticks(rotation=30, ha='right')
        comp_r2_path = os.path.join(cfg.OUTPUTS_DIR, 'comparison_r2.png')
        plt.savefig(comp_r2_path, dpi=300, bbox_inches='tight'); plt.close()

        plt.figure(figsize=(6,4))
        plt.bar(models, rmses)
        plt.title('RMSE Comparison'); plt.ylabel('RMSE'); plt.xticks(rotation=30, ha='right')
        comp_rmse_path = os.path.join(cfg.OUTPUTS_DIR, 'comparison_rmse.png')
        plt.savefig(comp_rmse_path, dpi=300, bbox_inches='tight'); plt.close()




## 12) Utilities
You can add reusable plotting functions or helpers here (kept simple in this notebook for clarity).
