In [40]:
import mlflow
from mlflow.models import infer_signature
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
from sklearn.datasets import make_classification
import numpy as np

In [41]:
# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, 
                         n_informative=5, n_redundant=10, 
                         n_clusters_per_class=2, random_state=1, 
                         weights=[0.5, 0.40, 0.1])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model parameters
lr_params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "random_state": 8888,
}

gb_params = {
    "n_estimators": 100,
    "learning_rate": 0.1,
    "random_state": 8888,
    "max_depth": 3
}

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [43]:
def train_and_evaluate_model(model, model_name, params, X_train, X_test, y_train, y_test):
    with mlflow.start_run(run_name=f"{model_name}"):
        # Train the model
        model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        report_dict = classification_report(y_test, y_pred, output_dict=True)
        
        # Log parameters
        mlflow.log_params(params)
        
        # Log overall metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("weighted_precision", report_dict['weighted avg']['precision'])
        mlflow.log_metric("weighted_recall", report_dict['weighted avg']['recall'])
        mlflow.log_metric("weighted_f1", report_dict['weighted avg']['f1-score'])
        mlflow.log_metric("macro_avg_precision", report_dict['macro avg']['precision'])
        mlflow.log_metric("macro_avg_recall", report_dict['macro avg']['recall'])
        mlflow.log_metric("macro_avg_f1", report_dict['macro avg']['f1-score'])
        
        # Log per-class metrics
        for class_idx in range(len(report_dict) - 3):  # -3 to exclude the averages
            class_name = str(class_idx)
            if class_name in report_dict:
                mlflow.log_metric(f"class_{class_name}_precision", report_dict[class_name]['precision'])
                mlflow.log_metric(f"class_{class_name}_recall", report_dict[class_name]['recall'])
                mlflow.log_metric(f"class_{class_name}_f1", report_dict[class_name]['f1-score'])
                mlflow.log_metric(f"class_{class_name}_support", report_dict[class_name]['support'])
        
        # Log the classification report as a text artifact
        # with open("classification_report.txt", "w") as f:
        #     f.write(classification_report(y_test, y_pred))
        mlflow.log_artifact("classification_report.txt")
        
        # Log the model
        signature = infer_signature(X_train, model.predict(X_train))
        model_info = mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path=f"{model_name.lower()}_model",
            signature=signature,
            input_example=X_train,
            registered_model_name=f"{model_name.lower()}-model",
        )
        
        print(f"\n{model_name} Results:")
        print(classification_report(y_test, y_pred))
        
        return model_info

In [None]:
# Set MLflow tracking URI
host = "127.0.0.1"
port = 8080

mlflow.set_tracking_uri(uri=f"http://{host}:{port}")
print(f"MLflow tracking URI set to: {mlflow.get_tracking_uri()}")

# Create a new MLflow Experiment with a unique name
experiment_name = "Model Comparison New"
try:
    # Try to get the experiment by name
    experiment = mlflow.get_experiment_by_name(experiment_name)
    
    # If experiment doesn't exist, create it
    if experiment is None:
        experiment_id = mlflow.create_experiment(experiment_name)
        print(f"Created new experiment: {experiment_name} with ID: {experiment_id}")
    else:
        print(f"Using existing experiment: {experiment_name}")
    
    # Set the experiment as active
    mlflow.set_experiment(experiment_name)
except Exception as e:
    print(f"Error setting up experiment: {e}")
    # Fallback to using the default experiment
    mlflow.set_experiment("Default")

# Train and evaluate LogisticRegression
lr = LogisticRegression(**lr_params)
lr_model_info = train_and_evaluate_model(lr, "LogisticRegression", lr_params, 
                                      X_train, X_test, y_train, y_test)

# Train and evaluate GradientBoostingClassifier
gb = GradientBoostingClassifier(**gb_params)
gb_model_info = train_and_evaluate_model(gb, "GradientBoosting", gb_params, 
                                      X_train, X_test, y_train, y_test)

In [None]:
# Compare predictions
lr_predictions = lr.predict(X_test)
gb_predictions = gb.predict(X_test)

# Create comparison DataFrame
comparison_df = pd.DataFrame({
    'actual_class': y_test,
    'lr_predicted': lr_predictions,
    'gb_predicted': gb_predictions
})

print("\nPrediction Comparison (first 5 samples):")
print(comparison_df.head())