In [4]:
# ----------------------------
# Step 0: Libraries
# ----------------------------
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC



# ----------------------------
# Step 1: Load Dataset
# ----------------------------
project_path = Path(r"C:\Users\Mafia\Downloads\mlops-assignment-1")
dataset_path = project_path / "data" / "Iris.csv"
df = pd.read_csv(dataset_path)

print("Dataset shape:", df.shape)
print(df.head())

X = df.drop("Species", axis=1)
y = df["Species"]

# ----------------------------
# Step 2: Train-Test Split
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------------------------
# Step 3: Define Models
# ----------------------------
models = {
    "LogisticRegression": LogisticRegression(max_iter=50, C=0.5),
    "DecisionTree": DecisionTreeClassifier(max_depth=2, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=10, max_depth=3, random_state=42)
}

# ----------------------------
# Step 4: MLflow Setup (SQLite Backend)
# ----------------------------
mlflow_db_path = project_path / "mlflow.db"
mlflow.set_tracking_uri(f"sqlite:///{mlflow_db_path.as_posix()}")
mlflow.set_experiment("Iris_Model_Comparison")
# ----------------------------
# Step 5: Train, Evaluate & Log Models
# ----------------------------
results = []

for name, model in models.items():
    # Agar koi run active hai to pehle usko close karo
    if mlflow.active_run():
        mlflow.end_run()

    with mlflow.start_run(run_name=name):
        # Train
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="macro")
        rec = recall_score(y_test, y_pred, average="macro")
        f1 = f1_score(y_test, y_pred, average="macro")

        results.append([name, acc, prec, rec, f1])

        print(f"\n{name} Classification Report:\n")
        print(classification_report(y_test, y_pred))

        # ----------------------------
        # Log Parameters
        # ----------------------------
        if hasattr(model, "get_params"):
            params = model.get_params()
            for p_name, p_value in params.items():
                mlflow.log_param(p_name, p_value)

        mlflow.log_param("model_name", name)

        # ----------------------------
        # Log Metrics
        # ----------------------------
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)
        mlflow.log_metric("f1_score", f1)

        # ----------------------------
        # Plot Confusion Matrix & Log as Artifact
        # ----------------------------
        cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
        plt.figure(figsize=(6, 4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                    xticklabels=model.classes_, yticklabels=model.classes_)
        plt.title(f"Confusion Matrix - {name}")
        plt.xlabel("Predicted")
        plt.ylabel("True")

        cm_path = project_path / "results" / f"{name}_confusion_matrix.png"
        plt.savefig(cm_path)
        plt.close()

        mlflow.log_artifact(str(cm_path))

        # ----------------------------
        # Save & Log Model
        # ----------------------------
        model_file = project_path / "models" / f"{name}.pkl"
        joblib.dump(model, model_file)
        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="model",
            registered_model_name=f"Iris_{name}"
        )

    # Har run ke baad ensure karo ke close ho jaye
    mlflow.end_run()


Dataset shape: (150, 6)
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

LogisticRegression Classification Report:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        23
Iris-versicolor       1.00      1.00      1.00        19
 Iris-virginica       1.00      1.00      1.00        18

       accuracy                           1.00        60
      macro avg       1.00      1.00      1.00        60
   weighted avg       1.00      1.00      1.00        60



Successfully registered model 'Iris_LogisticRegression'.
Created version '1' of model 'Iris_LogisticRegression'.



DecisionTree Classification Report:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        23
Iris-versicolor       1.00      1.00      1.00        19
 Iris-virginica       1.00      1.00      1.00        18

       accuracy                           1.00        60
      macro avg       1.00      1.00      1.00        60
   weighted avg       1.00      1.00      1.00        60



Successfully registered model 'Iris_DecisionTree'.
Created version '1' of model 'Iris_DecisionTree'.



RandomForest Classification Report:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        23
Iris-versicolor       1.00      1.00      1.00        19
 Iris-virginica       1.00      1.00      1.00        18

       accuracy                           1.00        60
      macro avg       1.00      1.00      1.00        60
   weighted avg       1.00      1.00      1.00        60



Successfully registered model 'Iris_RandomForest'.
Created version '1' of model 'Iris_RandomForest'.



KNN Classification Report:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        23
Iris-versicolor       1.00      1.00      1.00        19
 Iris-virginica       1.00      1.00      1.00        18

       accuracy                           1.00        60
      macro avg       1.00      1.00      1.00        60
   weighted avg       1.00      1.00      1.00        60



Successfully registered model 'Iris_KNN'.
Created version '1' of model 'Iris_KNN'.



SVC Classification Report:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        23
Iris-versicolor       1.00      1.00      1.00        19
 Iris-virginica       1.00      1.00      1.00        18

       accuracy                           1.00        60
      macro avg       1.00      1.00      1.00        60
   weighted avg       1.00      1.00      1.00        60



Successfully registered model 'Iris_SVC'.
Created version '1' of model 'Iris_SVC'.
