In [1]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [3]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-3.6.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-skinny==3.6.0 (from mlflow)
  Downloading mlflow_skinny-3.6.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.6.0 (from mlflow)
  Downloading mlflow_tracing-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting huey<3,>=2.5.0 (from mlflow)
  Downloading huey-2.5.4-py3-none-any.whl.metadata (4.6 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.6.0->mlflow)
  Downloading databricks_sdk-0.73.0-py3-none-any.whl.metadata (40 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î

In [5]:
"""
================================================================================
MLflow Lab Exercise: Steel Plates Faults Classification
================================================================================

OBJECTIVE:
Learn how to use MLflow to track machine learning experiments for classifying
faults in steel plates. You will train multiple classification models and
compare their performance using MLflow's tracking capabilities.

DATASET:
Steel Plates Faults Dataset
- Source: UCI Machine Learning Repository (ID: 198)
- Task: Classification (predict fault types in steel plates)
- Features: 27 numerical features describing steel plate characteristics
- Target: 7 different fault types

YOUR TASKS:
1. Complete the model hyperparameters (FIX ME sections)
2. Implement MLflow tracking for each model
3. Log parameters, metrics, and models
4. Compare model performance using MLflow

INSTRUCTIONS:
- Replace all "FIX ME" placeholders with appropriate values
- Run each cell in order
- Check MLflow UI at http://localhost:5000 after training

Good luck!
================================================================================
"""

# ============================================================================
# CELL 1: IMPORTS AND SETUP
# ============================================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from datetime import datetime
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ All imports successful!")
print("üì¶ Packages loaded:")
print(f"   - pandas: {pd.__version__}")
print(f"   - numpy: {np.__version__}")
print(f"   - mlflow: {mlflow.__version__}")

‚úÖ All imports successful!
üì¶ Packages loaded:
   - pandas: 2.2.2
   - numpy: 2.0.2
   - mlflow: 3.6.0


In [6]:
# ============================================================================
# CELL 2: LOAD AND PREPARE DATA USING UCIMLREPO
# ============================================================================

print("=" * 80)
print("üìä LOADING STEEL PLATES FAULTS DATASET")
print("=" * 80)

# Install required package first: pip install ucimlrepo
from ucimlrepo import fetch_ucirepo

# Fetch dataset
steel_plates_faults = fetch_ucirepo(id=198)

# Data (as pandas dataframes)
X = steel_plates_faults.data.features
y = steel_plates_faults.data.targets

# Metadata
print("üìã Dataset Metadata:")
print(f"   - Name: {steel_plates_faults.metadata['name']}")
print(f"   - Number of Instances: {steel_plates_faults.metadata['num_instances']}")
print(f"   - Number of Features: {steel_plates_faults.metadata['num_features']}")

# Variable information
print(f"\nüìä Dataset Shape: {X.shape[0]} rows √ó {X.shape[1]} columns")

# Display basic info
print(f"\nüìã Features Info:")
print(f"   Feature names: {list(X.columns)}")

# Check target variable
print(f"\nüéØ Target Variable Information:")
print(f"   Target columns: {list(y.columns)}")
print(f"   Target shape: {y.shape}")

# Since the target has multiple columns (one for each fault type), we need to convert to single column
# The dataset has 7 binary columns for each fault type
print(f"\nüîç Target value counts for each fault type:")
for col in y.columns:
    print(f"   {col}: {y[col].sum()} samples")

# Convert multi-column target to single column
y_single = y.idxmax(axis=1)  # Get the column name with the highest value (1)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_single)

print(f"\nüîç Encoded Target Classes:")
for i, class_name in enumerate(label_encoder.classes_):
    class_count = (y_encoded == i).sum()
    percentage = (class_count / len(y_encoded)) * 100
    print(f"   {i}: {class_name} ({class_count} samples, {percentage:.2f}%)")

# Check for missing values
print(f"\nüîç Missing values in features: {X.isnull().sum().sum()}")
print(f"üîç Missing values in target: {y.isnull().sum().sum()}")

üìä LOADING STEEL PLATES FAULTS DATASET
üìã Dataset Metadata:
   - Name: Steel Plates Faults
   - Number of Instances: 1941
   - Number of Features: 27

üìä Dataset Shape: 1941 rows √ó 27 columns

üìã Features Info:
   Feature names: ['X_Minimum', 'X_Maximum', 'Y_Minimum', 'Y_Maximum', 'Pixels_Areas', 'X_Perimeter', 'Y_Perimeter', 'Sum_of_Luminosity', 'Maximum_of_Luminosity', 'Length_of_Conveyer', 'TypeOfSteel_A300', 'TypeOfSteel_A400', 'Steel_Plate_Thickness', 'Edges_Index', 'Empty_Index', 'Square_Index', 'Outside_X_Index', 'Edges_X_Index', 'Edges_Y_Index', 'Outside_Global_Index', 'LogOfAreas', 'Log_X_Index', 'Log_Y_Index', 'Orientation_Index', 'Luminosity_Index', 'SigmoidOfAreas', 'Minimum_of_Luminosity']

üéØ Target Variable Information:
   Target columns: ['Pastry', 'Z_Scratch', 'K_Scratch', 'Stains', 'Dirtiness', 'Bumps', 'Other_Faults']
   Target shape: (1941, 7)

üîç Target value counts for each fault type:
   Pastry: 158 samples
   Z_Scratch: 190 samples
   K_Scratch: 391

In [7]:
# ============================================================================
# CELL 3: DATA EXPLORATION AND PREPARATION
# ============================================================================

print("\n" + "=" * 80)
print("üîß DATA EXPLORATION AND PREPARATION")
print("=" * 80)

# Display basic statistics
print(f"\nüìä Features Statistics:")
print(X.describe())

# Check class distribution
print(f"\nüìà Class Distribution:")
class_counts = pd.Series(y_encoded).value_counts().sort_index()
for class_idx, count in class_counts.items():
    class_name = label_encoder.classes_[class_idx]
    percentage = (count / len(y_encoded)) * 100
    print(f"   {class_name}: {count} samples ({percentage:.2f}%)")

# Prepare final dataset
print(f"\n‚úÖ Final Dataset Shape:")
print(f"   Features (X): {X.shape}")
print(f"   Target (y): {y_encoded.shape}")


üîß DATA EXPLORATION AND PREPARATION

üìä Features Statistics:
         X_Minimum    X_Maximum     Y_Minimum     Y_Maximum   Pixels_Areas  \
count  1941.000000  1941.000000  1.941000e+03  1.941000e+03    1941.000000   
mean    571.136012   617.964451  1.650685e+06  1.650739e+06    1893.878413   
std     520.690671   497.627410  1.774578e+06  1.774590e+06    5168.459560   
min       0.000000     4.000000  6.712000e+03  6.724000e+03       2.000000   
25%      51.000000   192.000000  4.712530e+05  4.712810e+05      84.000000   
50%     435.000000   467.000000  1.204128e+06  1.204136e+06     174.000000   
75%    1053.000000  1072.000000  2.183073e+06  2.183084e+06     822.000000   
max    1705.000000  1713.000000  1.298766e+07  1.298769e+07  152655.000000   

        X_Perimeter   Y_Perimeter  Sum_of_Luminosity  Maximum_of_Luminosity  \
count   1941.000000   1941.000000       1.941000e+03            1941.000000   
mean     111.855229     82.965997       2.063121e+05             130.1937

In [8]:
# ============================================================================
# CELL 4: DATA SPLITTING AND SCALING
# ============================================================================

print("\n" + "=" * 80)
print("‚úÇÔ∏è DATA SPLITTING AND SCALING")
print("=" * 80)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

print(f"‚úÖ Data split:")
print(f"   Training set: {X_train.shape[0]} samples")
print(f"   Test set: {X_test.shape[0]} samples")

# Display class distribution for splits
print(f"\nüìä Training set class distribution:")
train_counts = pd.Series(y_train).value_counts().sort_index()
for class_idx, count in train_counts.items():
    class_name = label_encoder.classes_[class_idx]
    percentage = (count / len(y_train)) * 100
    print(f"   {class_name}: {count} samples ({percentage:.2f}%)")

print(f"\nüìä Test set class distribution:")
test_counts = pd.Series(y_test).value_counts().sort_index()
for class_idx, count in test_counts.items():
    class_name = label_encoder.classes_[class_idx]
    percentage = (count / len(y_test)) * 100
    print(f"   {class_name}: {count} samples ({percentage:.2f}%)")

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"\n‚úÖ Features scaled using StandardScaler")


‚úÇÔ∏è DATA SPLITTING AND SCALING
‚úÖ Data split:
   Training set: 1552 samples
   Test set: 389 samples

üìä Training set class distribution:
   Bumps: 321 samples (20.68%)
   Dirtiness: 44 samples (2.84%)
   K_Scratch: 313 samples (20.17%)
   Other_Faults: 538 samples (34.66%)
   Pastry: 126 samples (8.12%)
   Stains: 58 samples (3.74%)
   Z_Scratch: 152 samples (9.79%)

üìä Test set class distribution:
   Bumps: 81 samples (20.82%)
   Dirtiness: 11 samples (2.83%)
   K_Scratch: 78 samples (20.05%)
   Other_Faults: 135 samples (34.70%)
   Pastry: 32 samples (8.23%)
   Stains: 14 samples (3.60%)
   Z_Scratch: 38 samples (9.77%)

‚úÖ Features scaled using StandardScaler


In [None]:
# ============================================================================
# CELL 5: MODEL HYPERPARAMETERS - LOGISTIC REGRESSION (FIX ME!)
# ============================================================================

"""
FIX ME: Define hyperparameters for Logistic Regression

HINTS:
- C: Inverse of regularization strength (try values between 0.1 and 10)
- penalty: Regularization type ('l1', 'l2', 'elasticnet', 'none')
- solver: Algorithm to use ('newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga')
- max_iter: Maximum number of iterations (try 100, 200, 500)
"""

lr_params = {
    'C': "FIX ME",  # Regularization strength
    'penalty': "FIX ME",  # Regularization type
    'solver': "FIX ME",  # Solver algorithm
    'max_iter': "FIX ME",  # Maximum iterations
    'random_state': 42,
    'multi_class': 'auto'
}

print("üìù Logistic Regression Parameters:")
print(lr_params)

In [None]:
# ============================================================================
# CELL 6: MODEL HYPERPARAMETERS - RANDOM FOREST (FIX ME!)
# ============================================================================

"""
FIX ME: Define hyperparameters for Random Forest Classifier

HINTS:
- n_estimators: Number of trees (try 50, 100, 200)
- max_depth: Maximum tree depth (try 10, 20, 30, None)
- min_samples_split: Minimum samples to split (try 2, 5, 10)
- min_samples_leaf: Minimum samples at leaf node (try 1, 2, 4)
"""

rf_params = {
    'n_estimators': "FIX ME",  # Number of trees
    'max_depth': "FIX ME",  # Maximum depth
    'min_samples_split': "FIX ME",  # Minimum samples to split
    'min_samples_leaf': "FIX ME",  # Minimum samples at leaf
    'random_state': 42
}

print("üå≤ Random Forest Classifier Parameters:")
print(rf_params)

In [None]:
# ============================================================================
# CELL 7: MODEL HYPERPARAMETERS - GRADIENT BOOSTING (FIX ME!)
# ============================================================================

"""
FIX ME: Define hyperparameters for Gradient Boosting Classifier

HINTS:
- n_estimators: Number of boosting stages (try 100, 200, 300)
- learning_rate: Shrinks contribution of each tree (try 0.05, 0.1, 0.2)
- max_depth: Maximum tree depth (try 3, 5, 7)
- subsample: Fraction of samples for training (try 0.8, 0.9, 1.0)
"""

gb_params = {
    'n_estimators': "FIX ME",  # Number of boosting stages
    'learning_rate': "FIX ME",  # Learning rate
    'max_depth': "FIX ME",  # Maximum depth
    'subsample': "FIX ME",  # Subsample ratio
    'random_state': 42
}

print("‚ö° Gradient Boosting Classifier Parameters:")
print(gb_params)

In [None]:
# ============================================================================
# CELL 8: SETUP MLFLOW EXPERIMENT (FIX ME!)
# ============================================================================

"""
FIX ME: Set up MLflow experiment and enable autologging

TASKS:
1. Set the experiment name using mlflow.set_experiment()
2. Enable sklearn autologging using mlflow.sklearn.autolog()

"""

print("\n" + "=" * 80)
print("üöÄ SETTING UP MLFLOW")
print("=" * 80)

experiment_name = "FIX ME"

# FIX ME: Set experiment name
"FIX ME"

# FIX ME: Enable autologging
"FIX ME"

print(f"‚úÖ MLflow experiment created: '{experiment_name}'")
print(f"üìç Tracking URI: {mlflow.get_tracking_uri()}")
print(f"‚úÖ MLflow autolog enabled for scikit-learn")

In [None]:
# ============================================================================
# CELL 9: TRAIN AND LOG MODEL FUNCTION (FIX ME!)
# ============================================================================

"""
FIX ME: Complete the train_and_log_model function

TASKS:
1. Start an MLflow run with mlflow.start_run()
2. Log parameters using mlflow.log_params()
3. Log metrics using mlflow.log_metrics()
4. Log the trained model using mlflow.sklearn.log_model()
5. Set tags using mlflow.set_tags()

HINTS:
- Use 'with mlflow.start_run(run_name=model_name) as run:' context
- mlflow.log_params(params)
- mlflow.log_metrics(metrics_dict)
- mlflow.sklearn.log_model(model, "model")
- mlflow.set_tags({"key": "value"})
"""

def train_and_log_model(model, model_name, params, X_train, X_test, y_train, y_test):
    print(f"\n{'='*80}")
    print(f"üéØ Training Model: {model_name}")
    print(f"{'='*80}")

    # FIX ME: Start MLflow run
    "FIX ME":

        print(f"üìä Run ID: {run.info.run_id}")

        # FIX ME: Log parameters
        "FIX ME"
        print(f"‚úÖ Parameters logged: {params}")

        # Train model
        start_time = datetime.now()
        model.fit(X_train, y_train)
        training_time = (datetime.now() - start_time).total_seconds()

        # Make predictions
        y_pred_train = model.predict(X_train)
        y_pred_test = model.predict(X_test)
        y_pred_proba_test = model.predict_proba(X_test) if hasattr(model, 'predict_proba') else None

        # Calculate classification metrics
        metrics = {
            # Training metrics
            'train_accuracy': accuracy_score(y_train, y_pred_train),
            'train_precision_macro': precision_score(y_train, y_pred_train, average='macro', zero_division=0),
            'train_recall_macro': recall_score(y_train, y_pred_train, average='macro', zero_division=0),
            'train_f1_macro': f1_score(y_train, y_pred_train, average='macro', zero_division=0),

            # Test metrics
            'test_accuracy': accuracy_score(y_test, y_pred_test),
            'test_precision_macro': precision_score(y_test, y_pred_test, average='macro', zero_division=0),
            'test_recall_macro': recall_score(y_test, y_pred_test, average='macro', zero_division=0),
            'test_f1_macro': f1_score(y_test, y_pred_test, average='macro', zero_division=0),

            # Training time
            'training_time_seconds': training_time
        }

        # FIX ME: Log metrics
        "FIX ME"

        print(f"‚úÖ Metrics logged:")
        print(f"\n   Training Metrics:")
        print(f"   - Accuracy: {metrics['train_accuracy']:.4f}")
        print(f"   - Precision (macro): {metrics['train_precision_macro']:.4f}")
        print(f"   - Recall (macro): {metrics['train_recall_macro']:.4f}")
        print(f"   - F1 (macro): {metrics['train_f1_macro']:.4f}")
        print(f"\n   Test Metrics:")
        print(f"   - Accuracy: {metrics['test_accuracy']:.4f}")
        print(f"   - Precision (macro): {metrics['test_precision_macro']:.4f}")
        print(f"   - Recall (macro): {metrics['test_recall_macro']:.4f}")
        print(f"   - F1 (macro): {metrics['test_f1_macro']:.4f}")
        print(f"\n   Training Time: {training_time:.2f}s")

        # FIX ME: Log model
        "FIX ME"
        print(f"‚úÖ Model artifact saved")

        # FIX ME: Set tags
        "FIX ME"
        print(f"‚úÖ Tags added for easy filtering")

        print(f"\nüéâ Run completed successfully!")

        return run.info.run_id, metrics

print("‚úÖ Training function defined!")

In [None]:
# ============================================================================
# CELL 10: TRAIN LOGISTIC REGRESSION (FIX ME!)
# ============================================================================

"""
FIX ME: Train Logistic Regression with MLflow tracking

TASKS:
1. Create LogisticRegression instance with lr_params
2. Call train_and_log_model() with appropriate arguments

HINTS:
- lr_model = LogisticRegression(**lr_params)
- train_and_log_model(lr_model, "Logistic_Regression", lr_params, X_train_scaled, X_test_scaled, y_train, y_test)
"""

print("\n" + "=" * 80)
print("üìù MODEL 1: LOGISTIC REGRESSION")
print("=" * 80)

# FIX ME: Create model instance
lr_model = "FIX ME"

# FIX ME: Train and log model
lr_run_id, lr_metrics = "FIX ME"

print(f"\n‚úÖ Logistic Regression training complete!")
print(f"   Run ID: {lr_run_id}")
print(f"   Test Accuracy: {lr_metrics['test_accuracy']:.4f}")
print(f"   Test F1 Score: {lr_metrics['test_f1_macro']:.4f}")

In [None]:
# ============================================================================
# CELL 11: TRAIN RANDOM FOREST (FIX ME!)
# ============================================================================

"""
FIX ME: Train Random Forest with MLflow tracking

TASKS:
1. Create RandomForestClassifier instance with rf_params
2. Call train_and_log_model() with appropriate arguments

HINTS:
- rf_model = RandomForestClassifier(**rf_params)
- train_and_log_model(rf_model, "Random_Forest", rf_params, X_train_scaled, X_test_scaled, y_train, y_test)
"""

print("\n" + "=" * 80)
print("üå≤ MODEL 2: RANDOM FOREST CLASSIFIER")
print("=" * 80)

# FIX ME: Create model instance
rf_model = "FIX ME"

# FIX ME: Train and log model
rf_run_id, rf_metrics = "FIX ME"

print(f"\n‚úÖ Random Forest training complete!")
print(f"   Run ID: {rf_run_id}")
print(f"   Test Accuracy: {rf_metrics['test_accuracy']:.4f}")
print(f"   Test F1 Score: {rf_metrics['test_f1_macro']:.4f}")

In [None]:
# ============================================================================
# CELL 12: TRAIN GRADIENT BOOSTING (FIX ME!)
# ============================================================================

"""
FIX ME: Train Gradient Boosting with MLflow tracking

TASKS:
1. Create GradientBoostingClassifier instance with gb_params
2. Call train_and_log_model() with appropriate arguments

HINTS:
- gb_model = GradientBoostingClassifier(**gb_params)
- train_and_log_model(gb_model, "Gradient_Boosting", gb_params, X_train_scaled, X_test_scaled, y_train, y_test)
"""

print("\n" + "=" * 80)
print("‚ö° MODEL 3: GRADIENT BOOSTING CLASSIFIER")
print("=" * 80)

# FIX ME: Create model instance
gb_model = "FIX ME"

# FIX ME: Train and log model
gb_run_id, gb_metrics = "FIX ME"

print(f"\n‚úÖ Gradient Boosting training complete!")
print(f"   Run ID: {gb_run_id}")
print(f"   Test Accuracy: {gb_metrics['test_accuracy']:.4f}")
print(f"   Test F1 Score: {gb_metrics['test_f1_macro']:.4f}")

In [None]:
# ============================================================================
# CELL 13: COMPARE RESULTS FROM MLFLOW
# ============================================================================

print("\n" + "=" * 80)
print("üìä COMPARING RESULTS FROM MLFLOW")
print("=" * 80)

# Get experiment and runs
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
runs = client.search_runs(
    experiment_ids=[experiment.experiment_id],
    order_by=["metrics.test_accuracy DESC"]
)

print(f"\n‚úÖ Found {len(runs)} runs in experiment '{experiment_name}'")

# Create comparison DataFrame
mlflow_results = []
for run in runs:
    mlflow_results.append({
        'Model': run.data.tags.get('mlflow.runName', 'Unknown'),
        'Test_Accuracy': run.data.metrics.get('test_accuracy', 0),
        'Test_Precision': run.data.metrics.get('test_precision_macro', 0),
        'Test_Recall': run.data.metrics.get('test_recall_macro', 0),
        'Test_F1_Score': run.data.metrics.get('test_f1_macro', 0),
        'Train_Accuracy': run.data.metrics.get('train_accuracy', 0),
        'Training_Time': run.data.metrics.get('training_time_seconds', 0),
        'Run_ID': run.info.run_id[:8] + '...'
    })

results_df = pd.DataFrame(mlflow_results)

print("\n" + "=" * 80)
print("üèÜ FINAL RESULTS (Sorted by Test Accuracy)")
print("=" * 80)
print(results_df.to_string(index=False))

# Find best model
if len(results_df) > 0:
    best_model = results_df.loc[results_df['Test_Accuracy'].idxmax()]
    print("\n" + "=" * 80)
    print("ü•á BEST MODEL (Highest Test Accuracy)")
    print("=" * 80)
    print(f"Model: {best_model['Model']}")
    print(f"Test Accuracy: {best_model['Test_Accuracy']:.4f}")
    print(f"Test Precision: {best_model['Test_Precision']:.4f}")
    print(f"Test Recall: {best_model['Test_Recall']:.4f}")
    print(f"Test F1 Score: {best_model['Test_F1_Score']:.4f}")
    print(f"Training Time: {best_model['Training_Time']:.2f}s")
    print(f"Run ID: {best_model['Run_ID']}")


In [None]:
# ============================================================================
# CELL 14: LAUNCH MLFLOW UI (OPTIONAL)
# ============================================================================

"""
To view the MLflow UI, run this command in your terminal:

    mlflow ui --host 0.0.0.0 --port 5000

Then open your browser and navigate to: http://localhost:5000

In the UI you can:
- Compare model performance visually
- View parameter combinations
- Download trained models
- Track experiment history
- Filter and search runs
- Analyze classification results
"""

print("\n" + "=" * 80)
print("üéâ LAB COMPLETE!")
print("=" * 80)
print("\nüìà Key Takeaways:")
print("   - Trained 3 classification models for steel plates faults detection")
print("   - Used MLflow to track all experiments automatically")
print("   - Logged parameters, metrics, and trained models")
print("   - Compared models using multiple classification metrics")
print("   - Can now reproduce results and deploy best model")
print("\nNext steps:")
print("1. Run in terminal: mlflow ui --host 0.0.0.0 --port 5000")
print("2. Open: http://localhost:5000 in your browser")
print("3. Explore your experiments in the MLflow UI")
print("4. Try different hyperparameters to improve performance!")
print("\nüí° Challenge: Can you get Test Accuracy above 0.85?")
print("   Hint: Try adjusting n_estimators, learning_rate, or max_depth")
print("   Consider feature engineering or ensemble methods!")
print("\n" + "=" * 80)