# 04 · Model Training with MLflow
Train a baseline classifier and log to MLflow.

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report
import mlflow, mlflow.sklearn

FEAT_CSV = "/mnt/data/IPO_Databricks_Notebooks/features.csv"
df = pd.read_csv(FEAT_CSV)

y = df["Delisted"].values
X = df.drop(columns=["Delisted","ipoDate","delistingDate"], errors="ignore")

cat_cols = [c for c in X.columns if X[c].dtype == object]
num_cols = [c for c in X.columns if c not in cat_cols]

pre = ColumnTransformer([
    ("cats", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("nums", StandardScaler(), num_cols)
])

clf = LogisticRegression(max_iter=200)

pipe = Pipeline([("pre", pre), ("clf", clf)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

with mlflow.start_run():
    pipe.fit(X_train, y_train)
    y_prob = pipe.predict_proba(X_test)[:,1]
    y_pred = (y_prob >= 0.5).astype(int)

    auc = roc_auc_score(y_test, y_prob)
    print("AUC:", auc)
    print(classification_report(y_test, y_pred))

    mlflow.log_metric("auc", auc)
    mlflow.sklearn.log_model(pipe, artifact_path="model")


💡 You can run multiple experiments (try RandomForest/XGBoost) and compare **AUC** in MLflow.