## MLFLOW Experimentation 

### Load Dataset

In [2]:
import pandas as pd

df=pd.read_csv("glass.csv")

In [3]:
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [4]:
X=df.drop(columns=["Type"])

In [5]:
X[:5]

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0


In [6]:
Y=df["Type"]

In [7]:
Y[:5]

0    1
1    1
2    1
3    1
4    1
Name: Type, dtype: int64

In [8]:
print(X.shape,Y.shape)

(214, 9) (214,)


### Train-Test Split

In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42,stratify=Y)

In [10]:
print(x_train.shape,x_test.shape)

(171, 9) (43, 9)


### Standardise Features

In [11]:
from sklearn.preprocessing import StandardScaler

# Standardize features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

### Logistic Regression

In [18]:
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# ===== Set MLflow tracking URI =====
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# ===== Single experiment for all models =====
experiment_name = "ML_Model_Experiments"
mlflow.set_experiment(experiment_name)
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# ===== Determine next run number for Logistic Regression =====
runs = client.search_runs([experiment_id], filter_string="tags.model_type='logistic_reg'")
next_id = len(runs) + 1

# ===== Model training =====
params = {"max_iter": 100, "C": 0.5, "solver": "saga"}
model = LogisticRegression(**params)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

# ===== Evaluation =====
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted')
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

metrics = {
    "accuracy": acc,
    "precision": prec,
    "recall": rec,
    "f1_score": f1
}

print(metrics)

# ===== MLflow Logging & Register Model =====
run_name = f"run{next_id}_logistic_reg"
artifact_path = f"logistic_reg_{next_id}"
model_registry_name = f"logistic_reg_model"  # name in MLflow Model Registry

with mlflow.start_run(run_name=run_name) as run:
    mlflow.set_tag("model_type", "logistic_reg")
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    
    # Log and register the model
    mlflow.sklearn.log_model(
        sk_model=model,
        name=artifact_path,
        registered_model_name=model_registry_name,
        input_example=x_test[:5]
    )

print(f"✅ {run_name} completed. Run ID: {run.info.run_id}, Artifact path: {artifact_path}")
print(f"✅ Model registered as '{model_registry_name}' in MLflow Model Registry")




{'accuracy': 0.7441860465116279, 'precision': 0.7126937984496123, 'recall': 0.7441860465116279, 'f1_score': 0.7266316579144787}


Registered model 'logistic_reg_model' already exists. Creating a new version of this model...
2025/10/04 22:49:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_reg_model, version 3


🏃 View run run3_logistic_reg at: http://127.0.0.1:5000/#/experiments/222058835319637488/runs/9693dcb2fe89498f8e519c3beca29b6e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/222058835319637488
✅ run3_logistic_reg completed. Run ID: 9693dcb2fe89498f8e519c3beca29b6e, Artifact path: logistic_reg_3
✅ Model registered as 'logistic_reg_model' in MLflow Model Registry


Created version '3' of model 'logistic_reg_model'.


### Random Forest

In [17]:
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# ===== Set MLflow tracking URI =====
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# ===== Single experiment for all models =====
experiment_name = "ML_Model_Experiments"
mlflow.set_experiment(experiment_name)
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# ===== Determine next run number for Random Forest =====
runs = client.search_runs([experiment_id], filter_string="tags.model_type='random_forest'")
next_id = len(runs) + 1

# ===== Model training =====
params = {"n_estimators": 120, "max_depth": None, "random_state": 42}
model = RandomForestClassifier(**params)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

# ===== Evaluation =====
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted')
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

metrics = {
    "accuracy": acc,
    "precision": prec,
    "recall": rec,
    "f1_score": f1
}

print(metrics)

# ===== MLflow Logging & Register Model =====
run_name = f"run{next_id}_random_forest"
artifact_path = f"random_forest_{next_id}"
model_registry_name = "random_forest_model"  # MLflow Model Registry name

with mlflow.start_run(run_name=run_name) as run:
    mlflow.set_tag("model_type", "random_forest")
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    
    # Log and register the model
    mlflow.sklearn.log_model(
        sk_model=model,
        name=artifact_path,
        registered_model_name=model_registry_name,
        input_example=x_test[:5]
    )

print(f"✅ {run_name} completed. Run ID: {run.info.run_id}, Artifact path: {artifact_path}")
print(f"✅ Model registered as '{model_registry_name}' in MLflow Model Registry")


{'accuracy': 0.8372093023255814, 'precision': 0.8542214739205162, 'recall': 0.8372093023255814, 'f1_score': 0.835526738827564}


Registered model 'random_forest_model' already exists. Creating a new version of this model...
2025/10/04 22:47:11 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest_model, version 2


🏃 View run run2_random_forest at: http://127.0.0.1:5000/#/experiments/222058835319637488/runs/b8b07cb2f207450e83158efd00ce42e3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/222058835319637488
✅ run2_random_forest completed. Run ID: b8b07cb2f207450e83158efd00ce42e3, Artifact path: random_forest_2
✅ Model registered as 'random_forest_model' in MLflow Model Registry


Created version '2' of model 'random_forest_model'.


### SVM

In [20]:
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder

# ===== Set MLflow tracking URI =====
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# ===== Single experiment =====
experiment_name = "ML_Model_Experiments"
mlflow.set_experiment(experiment_name)
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# ===== Determine next run number for SVM =====
runs = client.search_runs([experiment_id], filter_string="tags.model_type='svm'")
next_id = len(runs) + 1

# ===== Label encoding for multi-class =====
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# ===== Model training =====
params = {"C": 0.5, "kernel": "rbf", "probability": True, "random_state": 42}
model = SVC(**params)
model.fit(x_train, y_train_encoded)
y_pred_encoded = model.predict(x_test)
y_pred = le.inverse_transform(y_pred_encoded)

# ===== Evaluation =====
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted',zero_division=0)
rec = recall_score(y_test, y_pred, average='weighted',zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted',zero_division=0)
metrics = {"accuracy": acc, "precision": prec, "recall": rec, "f1_score": f1}

print(metrics)

# ===== MLflow Logging & Register Model =====
run_name = f"run{next_id}_svm"
artifact_path = f"svm_{next_id}"
model_registry_name = "svm_model"

with mlflow.start_run(run_name=run_name) as run:
    mlflow.set_tag("model_type", "svm")
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model=model,
        name=artifact_path,
        registered_model_name=model_registry_name,
        input_example=x_test[:5]
    )

print(f"✅ {run_name} completed. Run ID: {run.info.run_id}, Artifact path: {artifact_path}")


{'accuracy': 0.6744186046511628, 'precision': 0.6324750830564784, 'recall': 0.6744186046511628, 'f1_score': 0.6330514446793516}


Registered model 'svm_model' already exists. Creating a new version of this model...
2025/10/04 22:52:33 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: svm_model, version 2


🏃 View run run2_svm at: http://127.0.0.1:5000/#/experiments/222058835319637488/runs/1017d22d9d8e4d11856d9992d4de49f5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/222058835319637488
✅ run2_svm completed. Run ID: 1017d22d9d8e4d11856d9992d4de49f5, Artifact path: svm_2


Created version '2' of model 'svm_model'.


### Naive Bias

In [22]:
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# ===== Set MLflow tracking URI =====
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# ===== Single experiment =====
experiment_name = "ML_Model_Experiments"
mlflow.set_experiment(experiment_name)
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# ===== Determine next run number for Naive Bayes =====
runs = client.search_runs([experiment_id], filter_string="tags.model_type='naive_bayes'")
next_id = len(runs) + 1

# ===== Model training =====
model = GaussianNB()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

# ===== Evaluation =====
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted')
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
metrics = {"accuracy": acc, "precision": prec, "recall": rec, "f1_score": f1}

print(metrics)

# ===== MLflow Logging & Register Model =====
run_name = f"run{next_id}_naive_bayes"
artifact_path = f"naive_bayes_{next_id}"
model_registry_name = "naive_bayes_model"

with mlflow.start_run(run_name=run_name) as run:
    mlflow.set_tag("model_type", "naive_bayes")
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model=model,
        name=artifact_path,
        registered_model_name=model_registry_name,
        input_example=x_test[:5]
    )

print(f"✅ {run_name} completed. Run ID: {run.info.run_id}, Artifact path: {artifact_path}")


{'accuracy': 0.5116279069767442, 'precision': 0.47674418604651164, 'recall': 0.5116279069767442, 'f1_score': 0.45059159526723785}


Registered model 'naive_bayes_model' already exists. Creating a new version of this model...
2025/10/04 22:53:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: naive_bayes_model, version 2


🏃 View run run2_naive_bayes at: http://127.0.0.1:5000/#/experiments/222058835319637488/runs/1c96e30d578d49eb8f8f2f5520d67892
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/222058835319637488
✅ run2_naive_bayes completed. Run ID: 1c96e30d578d49eb8f8f2f5520d67892, Artifact path: naive_bayes_2


Created version '2' of model 'naive_bayes_model'.


### XGBoost

In [25]:
import mlflow
from mlflow.tracking import MlflowClient
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# ===== Set MLflow tracking URI =====
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# ===== Single experiment =====
experiment_name = "ML_Model_Experiments"
mlflow.set_experiment(experiment_name)
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# ===== Determine next run number for XGBoost =====
runs = client.search_runs([experiment_id], filter_string="tags.model_type='xgboost'")
next_id = len(runs) + 1

# ===== Label encoding for multi-class =====
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# ===== Model training =====
params = {
    "n_estimators": 100,
    "max_depth": 12,
    "learning_rate": 0.1,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "gamma": 0,
    "reg_alpha": 0,
    "reg_lambda": 1,
    "random_state": 42,
    "objective": "multi:softprob",
    "eval_metric": "mlogloss"
}

model = xgb.XGBClassifier(**params)
model.fit(x_train, y_train_encoded)
y_pred_encoded = model.predict(x_test)
y_pred = le.inverse_transform(y_pred_encoded)

# ===== Evaluation =====
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
metrics = {"accuracy": acc, "precision": prec, "recall": rec, "f1_score": f1}

print(metrics)

# ===== MLflow Logging & Register Model =====
run_name = f"run{next_id}_xgboost"
artifact_path = f"xgboost_{next_id}"
model_registry_name = "xgboost_model"

with mlflow.start_run(run_name=run_name) as run:
    mlflow.set_tag("model_type", "xgboost")
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model=model,
        name=artifact_path,
        registered_model_name=model_registry_name,
        input_example=x_test[:5]
    )

print(f"✅ {run_name} completed. Run ID: {run.info.run_id}, Artifact path: {artifact_path}")
print(f"✅ Model registered as '{model_registry_name}' in MLflow Model Registry")


{'accuracy': 0.7906976744186046, 'precision': 0.7972868217054263, 'recall': 0.7906976744186046, 'f1_score': 0.7876452692811955}


Registered model 'xgboost_model' already exists. Creating a new version of this model...
2025/10/04 22:56:47 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xgboost_model, version 3


🏃 View run run3_xgboost at: http://127.0.0.1:5000/#/experiments/222058835319637488/runs/c0c4c5314d5d4e7482ee9b0417230304
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/222058835319637488
✅ run3_xgboost completed. Run ID: c0c4c5314d5d4e7482ee9b0417230304, Artifact path: xgboost_3
✅ Model registered as 'xgboost_model' in MLflow Model Registry


Created version '3' of model 'xgboost_model'.


## Find out the best model

In [28]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment_name = "ML_Model_Experiments"
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# Get all runs
runs = client.search_runs([experiment_id], order_by=["metrics.accuracy DESC"])  # highest accuracy first

# Print summary
for r in runs:
    print(f"Run ID: {r.info.run_id}")
    print(f"Run Name: {r.info.run_name}")
    print(f"Model Type: {r.data.tags.get('model_type')}")
    print(f"Accuracy: {r.data.metrics.get('accuracy')}")
    print(f"F1-score: {r.data.metrics.get('f1_score')}")
    print(f"Artifact Path: {r.data.tags.get('mlflow.log-model.history', 'N/A')}")
    print("------")

# Extracting the best run

best_run = runs[0]
best_model_type = best_run.data.tags.get("model_type")
best_run_id = best_run.info.run_id
best_run_name = best_run.info.run_name
best_accuracy = best_run.data.metrics.get("accuracy")

print(f"Best Model: {best_model_type}")
print(f"Run ID: {best_run_id}")
print(f"Run Name: {best_run_name}")
print(f"Accuracy: {best_accuracy}")


Run ID: b8b07cb2f207450e83158efd00ce42e3
Run Name: run2_random_forest
Model Type: random_forest
Accuracy: 0.8372093023255814
F1-score: 0.835526738827564
Artifact Path: N/A
------
Run ID: e0706a2d9a954568a034455a1492e9fd
Run Name: run1_random_forest
Model Type: random_forest
Accuracy: 0.8372093023255814
F1-score: 0.835526738827564
Artifact Path: N/A
------
Run ID: c0c4c5314d5d4e7482ee9b0417230304
Run Name: run3_xgboost
Model Type: xgboost
Accuracy: 0.7906976744186046
F1-score: 0.7876452692811955
Artifact Path: N/A
------
Run ID: 3cd11d0c866447308707a5b426cb805d
Run Name: run2_xgboost
Model Type: xgboost
Accuracy: 0.7906976744186046
F1-score: 0.7876452692811955
Artifact Path: N/A
------
Run ID: b4da6b67db6f4082a54b6ff0b834696c
Run Name: run1_xgboost
Model Type: xgboost
Accuracy: 0.7906976744186046
F1-score: 0.7876452692811955
Artifact Path: N/A
------
Run ID: 9693dcb2fe89498f8e519c3beca29b6e
Run Name: run3_logistic_reg
Model Type: logistic_reg
Accuracy: 0.7441860465116279
F1-score: 0.726