In [None]:
import pandas as pd
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import lightgbm as lgb
from sklearn.metrics import accuracy_score, classification_report
import os

tracking_uri = "http://mlflow:5000" if os.getenv("DOCKER_ENV") == "true" else "http://localhost:5000"
mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment("Classification Gauntlet (AI4I)")

print(f"MLflow tracking URI set to: {mlflow.get_tracking_uri()}")

In [None]:
# --- 1. Data Loading and Preprocessing ---
df = pd.read_csv('data/AI4I_2020_uci_dataset/ai4i2020.csv')
df = df.drop(['UDI', 'Product ID'], axis=1)
df = pd.get_dummies(df, columns=['Type'], drop_first=True)
X = df.drop('Machine failure', axis=1)
y = df['Machine failure']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data loaded and preprocessed.")

In [None]:
# --- 2. Baseline Model Training ---
import time
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Define our baseline models
models = {
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVC': SVC(probability=True, random_state=42),
    'LightGBM': lgb.LGBMClassifier(random_state=42, verbose=-1)
}

baseline_results = {}

print("🚀 Starting Baseline Model Training...")
print("=" * 50)

for model_name, model in models.items():
    print(f"\n📊 Training {model_name}...")
    
    # Start MLflow run
    with mlflow.start_run(run_name=f"{model_name}_baseline") as run:
        # Log tags for easy filtering
        mlflow.set_tag("phase", "baseline")
        mlflow.set_tag("model_type", model_name)
        mlflow.set_tag("features", "standard")
        mlflow.set_tag("dataset", "AI4I_2020")
        
        # Record training time
        start_time = time.time()
        
        # Train the model
        model.fit(X_train_scaled, y_train)
        
        training_time = time.time() - start_time
        
        # Make predictions
        y_pred = model.predict(X_test_scaled)
        y_pred_proba = model.predict_proba(X_test_scaled)[:, 1] if hasattr(model, 'predict_proba') else None
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        auc_score = roc_auc_score(y_test, y_pred_proba) if y_pred_proba is not None else None
        
        # Log parameters
        if hasattr(model, 'get_params'):
            mlflow.log_params(model.get_params())
        
        # Log metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        if auc_score:
            mlflow.log_metric("auc_score", auc_score)
        mlflow.log_metric("training_time_seconds", training_time)
        
        # Log dataset info
        mlflow.log_metric("train_samples", len(X_train))
        mlflow.log_metric("test_samples", len(X_test))
        mlflow.log_metric("n_features", X_train_scaled.shape[1])
        
        # Store results for summary
        baseline_results[model_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'auc_score': auc_score,
            'training_time': training_time,
            'run_id': run.info.run_id
        }
        
        # Register the model
        model_uri = f"runs:/{run.info.run_id}/model"
        mlflow.sklearn.log_model(
            model,
            "model",
            registered_model_name=f"ai4i_classifier_{model_name.lower()}_baseline"
        )
        
        # Fix the f-string formatting issue
        auc_display = f"{auc_score:.4f}" if auc_score else "N/A"
        print(f"✅ {model_name} - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, AUC: {auc_display}")
        print(f"   Training time: {training_time:.2f}s")
        print(f"   Run ID: {run.info.run_id}")

print("\n🎯 Baseline Training Complete!")
print("=" * 50)

# Display summary table
import pandas as pd
results_df = pd.DataFrame(baseline_results).T
print("\n📈 Baseline Results Summary:")
print(results_df[['accuracy', 'f1_score', 'auc_score', 'training_time']].round(4))

In [None]:
# --- 3. Advanced Feature Engineering ---
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

print("🔧 Starting Advanced Feature Engineering...")
print("=" * 50)

# First, let's examine our original features
print(f"Original features: {list(X.columns)}")
print(f"Original dataset shape: {X.shape}")

# Create a copy of our original data for feature engineering
X_engineered = X.copy()

# 1. DOMAIN-SPECIFIC FEATURES for Industrial Equipment
print("\n🏭 Creating Domain-Specific Features...")

# Power Efficiency Ratio (critical for industrial equipment)
X_engineered['power_efficiency'] = X_engineered['Torque [Nm]'] / (X_engineered['Rotational speed [rpm]'] + 1e-6)

# Temperature-Power Stress Index (overheating under load)
X_engineered['temp_power_stress'] = X_engineered['Process temperature [K]'] * X_engineered['Air temperature [K]'] / (X_engineered['Tool wear [min]'] + 1)

# Wear Rate (critical for predictive maintenance)
X_engineered['wear_rate'] = X_engineered['Tool wear [min]'] / (X_engineered['Rotational speed [rpm]'] + 1e-6)

# Temperature Delta (thermal stress indicator)
X_engineered['temp_delta'] = X_engineered['Process temperature [K]'] - X_engineered['Air temperature [K]']

# 2. STATISTICAL ROLLING FEATURES (temporal patterns)
print("📊 Creating Statistical Features...")

# For demonstration, we'll create rolling statistics on key numerical columns
numerical_cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

# Sort by a pseudo-time index (using index as time proxy)
X_sorted = X_engineered.sort_index()

# Rolling means (trend indicators)
for col in numerical_cols:
    X_engineered[f'{col}_rolling_mean_5'] = X_sorted[col].rolling(window=5, min_periods=1).mean()
    X_engineered[f'{col}_rolling_std_5'] = X_sorted[col].rolling(window=5, min_periods=1).std().fillna(0)

# 3. INTERACTION FEATURES (non-linear relationships)
print("🔗 Creating Interaction Features...")

# Critical interactions for machine failure prediction
X_engineered['torque_speed_interaction'] = X_engineered['Torque [Nm]'] * X_engineered['Rotational speed [rpm]']
X_engineered['temp_wear_interaction'] = X_engineered['Process temperature [K]'] * X_engineered['Tool wear [min]']
X_engineered['temp_torque_interaction'] = X_engineered['Process temperature [K]'] * X_engineered['Torque [Nm]']

# 4. BINNED FEATURES (categorical insights from continuous)
print("📦 Creating Binned Features...")

# Temperature stress levels
X_engineered['temp_stress_level'] = pd.cut(X_engineered['temp_delta'], 
                                         bins=[-np.inf, -5, 0, 5, np.inf], 
                                         labels=['low', 'normal', 'elevated', 'high']).astype(str)

# Tool wear categories
X_engineered['wear_category'] = pd.cut(X_engineered['Tool wear [min]'], 
                                     bins=[0, 50, 150, 250, np.inf], 
                                     labels=['new', 'moderate', 'high', 'critical']).astype(str)

# Convert categorical binned features to dummy variables
X_engineered = pd.get_dummies(X_engineered, columns=['temp_stress_level', 'wear_category'], drop_first=True)

# 5. POLYNOMIAL FEATURES (capture non-linear patterns)
print("🧮 Creating Polynomial Features...")

# Select key features for polynomial expansion (to avoid explosion)
key_features = ['Torque [Nm]', 'Rotational speed [rpm]', 'Tool wear [min]', 'power_efficiency']
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X_engineered[key_features])
poly_feature_names = poly.get_feature_names_out(key_features)

# Add polynomial features
for i, name in enumerate(poly_feature_names):
    if name not in key_features:  # Skip original features
        X_engineered[f'poly_{name}'] = X_poly[:, i]

print(f"\n✨ Feature Engineering Complete!")
print(f"Original features: {X.shape[1]}")
print(f"Engineered features: {X_engineered.shape[1]}")
print(f"New features added: {X_engineered.shape[1] - X.shape[1]}")

# Prepare engineered train/test splits
X_train_eng, X_test_eng, _, _ = train_test_split(X_engineered, y, test_size=0.2, random_state=42, stratify=y)

# Scale the engineered features
scaler_eng = StandardScaler()
X_train_eng_scaled = scaler_eng.fit_transform(X_train_eng)
X_test_eng_scaled = scaler_eng.transform(X_test_eng)

print(f"\n🎯 Engineered dataset ready for training!")
print(f"Training shape: {X_train_eng_scaled.shape}")
print(f"Test shape: {X_test_eng_scaled.shape}")

# Show sample of new features
new_features = [col for col in X_engineered.columns if col not in X.columns]
print(f"\n🔍 Sample of new features created:")
for feature in new_features[:10]:  # Show first 10 new features
    print(f"  • {feature}")
if len(new_features) > 10:
    print(f"  ... and {len(new_features) - 10} more features")

In [None]:
# --- 4. Feature-Engineered Model Training ---

print("🚀 Starting Feature-Engineered Model Training...")
print("=" * 50)

# Define the same models for fair comparison
models_engineered = {
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVC': SVC(probability=True, random_state=42),
    'LightGBM': lgb.LGBMClassifier(random_state=42, verbose=-1)
}

engineered_results = {}

for model_name, model in models_engineered.items():
    print(f"\n📊 Training {model_name} (Feature-Engineered)...")
    
    # Start MLflow run
    with mlflow.start_run(run_name=f"{model_name}_engineered") as run:
        # Log tags for easy filtering and comparison
        mlflow.set_tag("phase", "engineered")
        mlflow.set_tag("model_type", model_name)
        mlflow.set_tag("features", "engineered")
        mlflow.set_tag("dataset", "AI4I_2020")
        
        # Record training time
        start_time = time.time()
        
        # Train the model on engineered features
        model.fit(X_train_eng_scaled, y_train)
        
        training_time = time.time() - start_time
        
        # Make predictions
        y_pred_eng = model.predict(X_test_eng_scaled)
        y_pred_proba_eng = model.predict_proba(X_test_eng_scaled)[:, 1] if hasattr(model, 'predict_proba') else None
        
        # Calculate metrics
        accuracy_eng = accuracy_score(y_test, y_pred_eng)
        precision_eng = precision_score(y_test, y_pred_eng)
        recall_eng = recall_score(y_test, y_pred_eng)
        f1_eng = f1_score(y_test, y_pred_eng)
        auc_score_eng = roc_auc_score(y_test, y_pred_proba_eng) if y_pred_proba_eng is not None else None
        
        # Calculate improvement over baseline
        baseline_f1 = baseline_results[model_name]['f1_score']
        f1_improvement = ((f1_eng - baseline_f1) / baseline_f1) * 100
        
        baseline_accuracy = baseline_results[model_name]['accuracy']
        accuracy_improvement = ((accuracy_eng - baseline_accuracy) / baseline_accuracy) * 100
        
        # Log parameters
        if hasattr(model, 'get_params'):
            mlflow.log_params(model.get_params())
        
        # Log metrics
        mlflow.log_metric("accuracy", accuracy_eng)
        mlflow.log_metric("precision", precision_eng)
        mlflow.log_metric("recall", recall_eng)
        mlflow.log_metric("f1_score", f1_eng)
        if auc_score_eng:
            mlflow.log_metric("auc_score", auc_score_eng)
        mlflow.log_metric("training_time_seconds", training_time)
        
        # Log improvement metrics
        mlflow.log_metric("f1_improvement_percent", f1_improvement)
        mlflow.log_metric("accuracy_improvement_percent", accuracy_improvement)
        mlflow.log_metric("baseline_f1_score", baseline_f1)
        mlflow.log_metric("baseline_accuracy", baseline_accuracy)
        
        # Log dataset info
        mlflow.log_metric("train_samples", len(X_train_eng))
        mlflow.log_metric("test_samples", len(X_test_eng))
        mlflow.log_metric("n_features", X_train_eng_scaled.shape[1])
        mlflow.log_metric("n_engineered_features", X_train_eng_scaled.shape[1] - X_train_scaled.shape[1])
        
        # Store results for summary
        engineered_results[model_name] = {
            'accuracy': accuracy_eng,
            'precision': precision_eng,
            'recall': recall_eng,
            'f1_score': f1_eng,
            'auc_score': auc_score_eng,
            'training_time': training_time,
            'f1_improvement': f1_improvement,
            'accuracy_improvement': accuracy_improvement,
            'run_id': run.info.run_id
        }
        
        # Register the engineered model
        model_uri = f"runs:/{run.info.run_id}/model"
        mlflow.sklearn.log_model(
            model,
            "model",
            registered_model_name=f"ai4i_classifier_{model_name.lower()}_engineered"
        )
        
        # Fix the f-string formatting issue
        auc_display_eng = f"{auc_score_eng:.4f}" if auc_score_eng else "N/A"
        print(f"✅ {model_name} (Engineered) - Accuracy: {accuracy_eng:.4f}, F1: {f1_eng:.4f}, AUC: {auc_display_eng}")
        print(f"   Training time: {training_time:.2f}s")
        print(f"   📈 F1 Improvement: {f1_improvement:+.2f}% | Accuracy Improvement: {accuracy_improvement:+.2f}%")
        print(f"   Run ID: {run.info.run_id}")

print("\n🎯 Feature-Engineered Training Complete!")
print("=" * 50)

In [None]:
# --- 5. Comprehensive Results Analysis ---

print("📊 CLASSIFICATION GAUNTLET RESULTS")
print("=" * 60)

# Create comprehensive comparison DataFrame
comparison_data = []

for model_name in models.keys():
    # Baseline results
    baseline = baseline_results[model_name]
    engineered = engineered_results[model_name]
    
    comparison_data.append({
        'Model': f"{model_name} (Baseline)",
        'Features': 'Standard',
        'Accuracy': baseline['accuracy'],
        'F1_Score': baseline['f1_score'],
        'AUC_Score': baseline['auc_score'],
        'Training_Time': baseline['training_time'],
        'Feature_Count': X_train_scaled.shape[1]
    })
    
    comparison_data.append({
        'Model': f"{model_name} (Engineered)",
        'Features': 'Engineered',
        'Accuracy': engineered['accuracy'],
        'F1_Score': engineered['f1_score'],
        'AUC_Score': engineered['auc_score'],
        'Training_Time': engineered['training_time'],
        'Feature_Count': X_train_eng_scaled.shape[1],
        'F1_Improvement': engineered['f1_improvement'],
        'Accuracy_Improvement': engineered['accuracy_improvement']
    })

comparison_df = pd.DataFrame(comparison_data)

print("\n🏆 COMPLETE PERFORMANCE COMPARISON")
print("-" * 60)
display_cols = ['Model', 'Features', 'Accuracy', 'F1_Score', 'AUC_Score', 'Training_Time', 'Feature_Count']
print(comparison_df[display_cols].round(4).to_string(index=False))

print("\n📈 FEATURE ENGINEERING IMPACT")
print("-" * 60)
for model_name in models.keys():
    eng_results = engineered_results[model_name]
    base_results = baseline_results[model_name]
    
    print(f"\n{model_name}:")
    print(f"  🎯 F1 Score: {base_results['f1_score']:.4f} → {eng_results['f1_score']:.4f} ({eng_results['f1_improvement']:+.2f}%)")
    print(f"  🎯 Accuracy: {base_results['accuracy']:.4f} → {eng_results['accuracy']:.4f} ({eng_results['accuracy_improvement']:+.2f}%)")
    if eng_results['auc_score'] and base_results['auc_score']:
        auc_improvement = ((eng_results['auc_score'] - base_results['auc_score']) / base_results['auc_score']) * 100
        print(f"  🎯 AUC Score: {base_results['auc_score']:.4f} → {eng_results['auc_score']:.4f} ({auc_improvement:+.2f}%)")

# Identify champion models
print(f"\n🏆 CHAMPION MODELS")
print("-" * 60)

# Best baseline model
best_baseline = max(baseline_results.items(), key=lambda x: x[1]['f1_score'])
print(f"🥇 Best Baseline: {best_baseline[0]} (F1: {best_baseline[1]['f1_score']:.4f})")

# Best engineered model
best_engineered = max(engineered_results.items(), key=lambda x: x[1]['f1_score'])
print(f"🥇 Best Engineered: {best_engineered[0]} (F1: {best_engineered[1]['f1_score']:.4f})")

# Overall champion
overall_best = best_engineered if best_engineered[1]['f1_score'] > best_baseline[1]['f1_score'] else best_baseline
print(f"👑 OVERALL CHAMPION: {overall_best[0]} ({'Engineered' if overall_best == best_engineered else 'Baseline'})")

# Feature engineering value
avg_f1_improvement = np.mean([eng['f1_improvement'] for eng in engineered_results.values()])
print(f"\n📊 FEATURE ENGINEERING VALUE")
print(f"   Average F1 improvement: {avg_f1_improvement:+.2f}%")
print(f"   Features added: {X_train_eng_scaled.shape[1] - X_train_scaled.shape[1]}")
print(f"   Feature expansion: {X_train_scaled.shape[1]} → {X_train_eng_scaled.shape[1]} features")

print(f"\n✅ Classification Gauntlet Complete! Check MLflow UI at: {mlflow.get_tracking_uri()}")
print("🎯 All models registered and ready for deployment!")
print("=" * 60)