In [3]:
import mlflow
import mlflow.sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import itertools
import joblib

In [4]:
# Load the Digits dataset
digits = datasets.load_digits()
X, y = digits.data, digits.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Hyperparameters to test
C_values = [0.1, 1, 10]
gamma_values = [0.01, 0.1, 1]

In [6]:
best_f1 = 0
best_model = None
best_params = {}

accuracies = []
f1_scores = []
configs = []

mlflow.set_experiment("Digits_SVM_Tracking")

2025/03/03 10:52:04 INFO mlflow.tracking.fluent: Experiment with name 'Digits_SVM_Tracking' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///home/janak/Documents/Mlflow_Blog/MlFlow_Tracking/mlruns/486826793273855203', creation_time=1740978424533, experiment_id='486826793273855203', last_update_time=1740978424533, lifecycle_stage='active', name='Digits_SVM_Tracking', tags={}>

In [7]:
for C, gamma in itertools.product(C_values, gamma_values):
    with mlflow.start_run():
        model = SVC(C=C, gamma=gamma, kernel='rbf', random_state=42)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        cm = confusion_matrix(y_test, y_pred)
        
        # Log parameters and metrics
        mlflow.log_param("C", C)
        mlflow.log_param("gamma", gamma)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("f1_score", f1)
        
        # Save confusion matrix plot
        plt.figure(figsize=(6, 5))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=digits.target_names, yticklabels=digits.target_names)
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title(f'Confusion Matrix (C={C}, gamma={gamma})')
        cm_filename = f"confusion_matrix_C{C}_gamma{gamma}.png"
        plt.savefig(cm_filename)
        plt.close()
        mlflow.log_artifact(cm_filename)
        
        # Track the best model
        if f1 > best_f1:
            best_f1 = f1
            best_model = model
            best_params = {"C": C, "gamma": gamma}
            
        accuracies.append(acc)
        f1_scores.append(f1)
        configs.append(f"C={C}, gamma={gamma}")


In [8]:
# Save the best model
if best_model:
    model_filename = "best_svm_model.pkl"
    joblib.dump(best_model, model_filename)
    mlflow.sklearn.log_model(best_model, "best_model")
    mlflow.log_artifact(model_filename)
    mlflow.log_params(best_params)



In [9]:
# Plot accuracy comparison
plt.figure(figsize=(10, 5))
plt.bar(configs, accuracies, color='blue', alpha=0.7)
plt.xlabel("Hyperparameter Configs")
plt.ylabel("Accuracy")
plt.title("Accuracy Comparison")
plt.xticks(rotation=45)
plt.savefig("accuracy_comparison.png")
mlflow.log_artifact("accuracy_comparison.png")
plt.close()

In [10]:
# Plot F1-score comparison
plt.figure(figsize=(10, 5))
plt.bar(configs, f1_scores, color='green', alpha=0.7)
plt.xlabel("Hyperparameter Configs")
plt.ylabel("F1 Score")
plt.title("F1 Score Comparison")
plt.xticks(rotation=45)
plt.savefig("f1_score_comparison.png")
mlflow.log_artifact("f1_score_comparison.png")
plt.close()

print(f"Best model parameters: {best_params}")

Best model parameters: {'C': 1, 'gamma': 0.01}
