# Iris Classification with MLflow and KServe

This notebook demonstrates:
1. Training a simple scikit-learn model on the Iris dataset
2. Logging the model to MLflow
3. Preparing the model for deployment with KServe

## 1. Setup and Install Dependencies

In [None]:
# Install required packages
!pip install mlflow scikit-learn boto3 -q

In [None]:
import os
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

## 2. Configure MLflow

Set MLflow tracking URI and authentication credentials.

In [None]:
# MLflow configuration
MLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# MLflow authentication
os.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')
os.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')

print(f"MLflow Tracking URI: {MLFLOW_TRACKING_URI}")
print(f"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}")

## 3. Load and Prepare Data

In [None]:
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print(f"Classes: {iris.target_names}")

## 4. Train Model with MLflow Tracking

In [None]:
# Set experiment
experiment_name = "iris-classification"
mlflow.set_experiment(experiment_name)

# Start MLflow run
with mlflow.start_run(run_name="logistic-regression") as run:
    # Train model
    model = LogisticRegression(max_iter=200, random_state=42)
    model.fit(X_train, y_train)
    
    # Predictions
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Log parameters
    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_param("max_iter", 200)
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    
    # Log model
    mlflow.sklearn.log_model(
        model, 
        "model",
        registered_model_name="iris-classifier"
    )
    
    print(f"\nRun ID: {run.info.run_id}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=iris.target_names))

## 5. Get Model Information for KServe Deployment

In [None]:
# Get the latest version of the registered model
client = mlflow.tracking.MlflowClient()
model_name = "iris-classifier"

# Use search_model_versions instead of deprecated get_latest_versions
model_versions = client.search_model_versions(f"name='{model_name}'")
latest_version = max(model_versions, key=lambda x: int(x.version))

print(f"\n=== Model Information for KServe ===")
print(f"Model Name: {model_name}")
print(f"Version: {latest_version.version}")
print(f"Run ID: {latest_version.run_id}")
print(f"\nArtifact URI: {latest_version.source}")
print(f"\nUse this information to configure the KServe InferenceService.")

## 6. Test Local Prediction

In [None]:
# Test with a sample
sample_input = [[5.1, 3.5, 1.4, 0.2]]  # Should predict 'setosa'
prediction = model.predict(sample_input)
predicted_class = iris.target_names[prediction[0]]

print(f"\nTest Input: {sample_input[0]}")
print(f"Predicted Class: {predicted_class}")
print(f"\nThis sample will be used to test the KServe deployment.")

## Next Steps

1. Note the Model Name and Version from above
2. Deploy the model using KServe with the InferenceService YAML
3. Test the deployed model endpoint