# Week 3 Interactive Workshop: Supervised Learning with Metaflow Pipelines

Complete workshop covering supervised learning fundamentals, Metaflow ML pipelines, and LangChain model interpretation.

## 🎯 Workshop Objectives
- Implement multiple supervised learning algorithms (classification & regression)
- Build scalable ML pipelines using Metaflow with parallel execution
- Compare and evaluate models using comprehensive metrics
- Perform hyperparameter tuning with cross-validation
- Integrate LLM-powered model interpretation using LangChain

In [None]:
# Environment verification and imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_wine, make_classification, make_regression
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB
from lightgbm import LGBMRegressor, LGBMClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    mean_squared_error, r2_score, mean_absolute_error, roc_auc_score
)
from metaflow import FlowSpec, step, Parameter, resources
import warnings
from datetime import datetime

# LangChain imports (with fallback)
try:
    from langchain.prompts import PromptTemplate
    from langchain_community.llms import Ollama
    from langchain_core.output_parsers import StrOutputParser
    LANGCHAIN_AVAILABLE = True
    print("✅ LangChain available for model interpretation")
except ImportError:
    LANGCHAIN_AVAILABLE = False
    print("⚠️ LangChain not available - will use alternative explanations")

warnings.filterwarnings('ignore')
plt.style.use('default')
sns.set_palette("husl")

print("🎯 Week 3 Workshop Environment Ready!")

## Part 1: Classification with Wine Dataset

In [None]:
# Load and explore wine dataset
wine_data = load_wine()
X_wine = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
y_wine = wine_data.target
target_names = wine_data.target_names

print(f"📊 Dataset Shape: {X_wine.shape}")
print(f"🎯 Classes: {len(target_names)} - {list(target_names)}")

# Data preprocessing
X_train, X_test, y_train, y_test = train_test_split(
    X_wine, y_wine, test_size=0.2, random_state=42, stratify=y_wine
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"📊 Training set: {X_train_scaled.shape}")
print(f"📊 Test set: {X_test_scaled.shape}")

print(pd.Series(y_wine).value_counts(normalize=True).sort_index() * 100)

In [None]:
# Train multiple classification algorithms
classifiers = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(random_state=42, probability=True),
    'Naive Bayes': GaussianNB(),
    'LightGBM': LGBMClassifier(random_state=42, verbose=-1)
}

classification_results = {}

for name, classifier in classifiers.items():
    print(f"Training {name}...")
    
    start_time = datetime.now()
    classifier.fit(X_train_scaled, y_train)
    y_pred = classifier.predict(X_test_scaled)
    training_time = (datetime.now() - start_time).total_seconds()
    
    accuracy = accuracy_score(y_test, y_pred)
    
    classification_results[name] = {
        'model': classifier,
        'predictions': y_pred,
        'accuracy': accuracy,
        'training_time': training_time
    }
    
    print(f"   ✅ Accuracy: {accuracy:.3f} | Time: {training_time:.2f}s")

# Sort by accuracy
sorted_results = sorted(classification_results.items(), 
                       key=lambda x: x[1]['accuracy'], reverse=True)

print("\n📈 Performance Ranking:")
for i, (name, results) in enumerate(sorted_results, 1):
    print(f"   {i}. {name}: {results['accuracy']:.3f}")

In [None]:
# Cross-validation for all classifiers (5 folds)
from sklearn.model_selection import cross_val_score

print("🔄 Cross-validation for all classifiers (5 folds)")
cv_results = {}
for name, classifier in classifiers.items():
    scores = cross_val_score(classifier, X_train_scaled, y_train, cv=5, scoring='accuracy')
    cv_results[name] = scores
    print(f"{name}: Mean CV accuracy = {scores.mean():.3f} ± {scores.std():.3f}")

# Optionally, display as DataFrame
cv_df = pd.DataFrame(cv_results)
print("\nCV scores (per fold):")
print(cv_df)

## Part 2: Regression with Synthetic Housing Data

In [None]:
# Create synthetic housing dataset
np.random.seed(42)
n_samples = 500

house_size = np.random.normal(2000, 500, n_samples)
bedrooms = np.random.poisson(3, n_samples) + 1
age = np.random.exponential(15, n_samples)
location_score = np.random.uniform(1, 10, n_samples)
crime_rate = np.random.exponential(3, n_samples)

price = (
    house_size * 150 +
    bedrooms * 10000 +
    location_score * 5000 +
    -age * 1000 +
    -crime_rate * 2000 +
    np.random.normal(0, 20000, n_samples)
)

regression_data = pd.DataFrame({
    'house_size': house_size,
    'bedrooms': bedrooms,
    'age': age,
    'location_score': location_score,
    'crime_rate': crime_rate,
    'price': price
})

print(f"📊 Regression Dataset Shape: {regression_data.shape}")

# Prepare regression data
X_reg = regression_data.drop('price', axis=1)
y_reg = regression_data['price']

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

scaler_reg = StandardScaler()
X_train_reg_scaled = scaler_reg.fit_transform(X_train_reg)
X_test_reg_scaled = scaler_reg.transform(X_test_reg)

In [None]:
# Train regression models
regressors = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(alpha=1.0, random_state=42),
    'Lasso Regression': Lasso(alpha=1.0, random_state=42),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'SVR': SVR(kernel='rbf'),
    'LightGBM': LGBMRegressor(random_state=42, verbose=-1)
}

regression_results = {}

for name, regressor in regressors.items():
    print(f"Training {name}...")
    
    start_time = datetime.now()
    regressor.fit(X_train_reg_scaled, y_train_reg)
    y_pred_reg = regressor.predict(X_test_reg_scaled)
    training_time = (datetime.now() - start_time).total_seconds()
    
    mse = mean_squared_error(y_test_reg, y_pred_reg)
    r2 = r2_score(y_test_reg, y_pred_reg)
    
    regression_results[name] = {
        'model': regressor,
        'predictions': y_pred_reg,
        'mse': mse,
        'r2': r2,
        'training_time': training_time
    }
    
    print(f"   ✅ R²: {r2:.3f} | RMSE: ${np.sqrt(mse):,.0f}")

# Sort by R² score
sorted_reg_results = sorted(regression_results.items(), 
                           key=lambda x: x[1]['r2'], reverse=True)

print("\n📈 Regression Performance Ranking:")
for i, (name, results) in enumerate(sorted_reg_results, 1):
    print(f"   {i}. {name}: R²={results['r2']:.3f}")

## Part 3: Complete Metaflow ML Pipeline

In [None]:
# Complete Metaflow ML Pipeline
class SupervisedLearningFlow(FlowSpec):
    """
    Complete supervised learning pipeline with parallel model training.
    """
    
    dataset_type = Parameter('dataset_type', help='Type of dataset: wine or housing', default='wine')
    test_size = Parameter('test_size', help='Test set size (0.0-1.0)', default=0.2)
    n_cv_folds = Parameter('n_cv_folds', help='Number of cross-validation folds', default=5)
    
    @step
    def start(self):
        """Load and prepare the dataset for training."""
        print(f"🚀 Starting Supervised Learning Pipeline")
        print(f"   Dataset: {self.dataset_type}")
        
        if self.dataset_type == 'wine':
            wine_data = load_wine()
            self.X = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
            self.y = wine_data.target
            self.target_names = wine_data.target_names
            self.problem_type = 'classification'
            
            self.algorithms = {
                'logistic_regression': LogisticRegression(random_state=42, max_iter=1000),
                'random_forest': RandomForestClassifier(n_estimators=100, random_state=42),
                'gradient_boosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
                'svm': SVC(random_state=42, probability=True),
                'naive_bayes': GaussianNB()
            }
        else:  # housing dataset
            self.X = X_reg
            self.y = y_reg
            self.target_names = ['price']
            self.problem_type = 'regression'
            
            self.algorithms = {
                'linear_regression': LinearRegression(),
                'ridge_regression': Ridge(alpha=1.0, random_state=42),
                'lasso_regression': Lasso(alpha=1.0, random_state=42),
                'random_forest': RandomForestRegressor(n_estimators=100, random_state=42),
                'svr': SVR(kernel='rbf')
            }
        
        print(f"📊 Dataset shape: {self.X.shape}")
        print(f"🎯 Problem type: {self.problem_type}")
        
        self.next(self.preprocess_data)
    
    @step
    def preprocess_data(self):
        """Preprocess the data: split and scale."""
        print("🔧 Preprocessing data...")
        
        if self.problem_type == 'classification':
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y, test_size=self.test_size, 
                random_state=42, stratify=self.y
            )
        else:
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y, test_size=self.test_size, random_state=42
            )
        
        self.scaler = StandardScaler()
        self.X_train_scaled = self.scaler.fit_transform(self.X_train)
        self.X_test_scaled = self.scaler.transform(self.X_test)
        
        print(f"   📊 Training set: {self.X_train_scaled.shape}")
        print(f"   📊 Test set: {self.X_test_scaled.shape}")
        
        self.algorithm_names = list(self.algorithms.keys())
        self.next(self.train_model, foreach='algorithm_names')
    
    @resources(memory=2000, cpu=2)
    @step
    def train_model(self):
        """Train individual models in parallel."""
        self.current_algorithm = self.input
        algorithm = self.algorithms[self.current_algorithm]
        
        print(f"🏋️ Training {self.current_algorithm}...")
        
        start_time = datetime.now()
        algorithm.fit(self.X_train_scaled, self.y_train)
        y_pred = algorithm.predict(self.X_test_scaled)
        training_time = (datetime.now() - start_time).total_seconds()
        
        if self.problem_type == 'classification':
            accuracy = accuracy_score(self.y_test, y_pred)
            cv_scores = cross_val_score(algorithm, self.X_train_scaled, self.y_train, 
                                       cv=self.n_cv_folds, scoring='accuracy')
            
            self.model_results = {
                'algorithm': self.current_algorithm,
                'model': algorithm,
                'predictions': y_pred,
                'accuracy': accuracy,
                'cv_mean': cv_scores.mean(),
                'cv_std': cv_scores.std(),
                'training_time': training_time
            }
            
            print(f"   ✅ {self.current_algorithm}: Accuracy={accuracy:.3f}, CV={cv_scores.mean():.3f}±{cv_scores.std():.3f}")
            
        else:  # regression
            mse = mean_squared_error(self.y_test, y_pred)
            r2 = r2_score(self.y_test, y_pred)
            cv_scores = cross_val_score(algorithm, self.X_train_scaled, self.y_train, 
                                       cv=self.n_cv_folds, scoring='r2')
            
            self.model_results = {
                'algorithm': self.current_algorithm,
                'model': algorithm,
                'predictions': y_pred,
                'mse': mse,
                'r2': r2,
                'cv_mean': cv_scores.mean(),
                'cv_std': cv_scores.std(),
                'training_time': training_time
            }
            
            print(f"   ✅ {self.current_algorithm}: R²={r2:.3f}, CV={cv_scores.mean():.3f}±{cv_scores.std():.3f}")
        
        self.next(self.aggregate_results)
    
    @step
    def aggregate_results(self, inputs):
        """Aggregate results from all parallel training tasks."""
        print("🔄 Aggregating results from parallel training...")
        
        self.all_model_results = {}
        self.training_summary = []
        
        for input_flow in inputs:
            algorithm = input_flow.current_algorithm
            results = input_flow.model_results
            self.all_model_results[algorithm] = results
            self.training_summary.append(results)
        
        print(f"✅ Aggregated results from {len(self.all_model_results)} models")
        self.next(self.model_selection)
    
    @step
    def model_selection(self):
        """Select the best model based on cross-validation performance."""
        print("🏆 Selecting best model...")
        
        best_algorithm = max(self.all_model_results.keys(), 
                           key=lambda x: self.all_model_results[x]['cv_mean'])
        best_score = self.all_model_results[best_algorithm]['cv_mean']
        
        self.best_model = {
            'algorithm': best_algorithm,
            'results': self.all_model_results[best_algorithm],
            'score': best_score
        }
        
        print(f"🏆 Best Model: {best_algorithm}")
        print(f"📊 Best Score: {best_score:.3f}")
        
        self.next(self.hyperparameter_tuning)
    
    @step
    def hyperparameter_tuning(self):
        """Perform hyperparameter tuning on the best model."""
        print("⚙️ Hyperparameter tuning for best model...")
        
        best_algorithm = self.best_model['algorithm']
        print(f"🔧 Tuning {best_algorithm}...")
        
        param_grids = {
            'logistic_regression': {'C': [0.1, 1.0, 10.0], 'solver': ['liblinear', 'lbfgs']},
            'random_forest': {'n_estimators': [50, 100], 'max_depth': [None, 10], 'min_samples_split': [2, 5]},
            'gradient_boosting': {'n_estimators': [50, 100], 'learning_rate': [0.1, 0.2], 'max_depth': [3, 5]},
            'svm': {'C': [0.1, 1.0, 10.0], 'gamma': ['scale', 'auto']},
            'ridge_regression': {'alpha': [0.1, 1.0, 10.0]},
            'lasso_regression': {'alpha': [0.1, 1.0, 10.0]},
            'svr': {'C': [0.1, 1.0, 10.0], 'gamma': ['scale', 'auto']}
        }
        
        param_grid = param_grids.get(best_algorithm, {})
        
        if param_grid:
            base_model = self.algorithms[best_algorithm]
            scoring = 'accuracy' if self.problem_type == 'classification' else 'r2'
            
            grid_search = GridSearchCV(
                base_model, param_grid, cv=self.n_cv_folds, 
                scoring=scoring, n_jobs=-1
            )
            
            grid_search.fit(self.X_train_scaled, self.y_train)
            
            self.tuned_model = grid_search.best_estimator_
            self.best_params = grid_search.best_params_
            self.tuning_score = grid_search.best_score_
            
            print(f"✅ Best parameters: {self.best_params}")
            print(f"📈 Tuned CV score: {self.tuning_score:.3f}")
        else:
            print("ℹ️ No hyperparameters to tune for this algorithm")
            self.tuned_model = self.all_model_results[best_algorithm]['model']
            self.best_params = {}
            self.tuning_score = self.best_model['score']
        
        self.next(self.final_evaluation)
    
    @step
    def final_evaluation(self):
        """Final evaluation with tuned model."""
        print("📊 Final Model Evaluation")
        
        y_pred_final = self.tuned_model.predict(self.X_test_scaled)
        
        if self.problem_type == 'classification':
            final_accuracy = accuracy_score(self.y_test, y_pred_final)
            conf_matrix = confusion_matrix(self.y_test, y_pred_final)
            
            self.final_results = {
                'accuracy': final_accuracy,
                'confusion_matrix': conf_matrix.tolist(),
                'predictions': y_pred_final.tolist(),
                'actual': self.y_test.tolist()
            }
            
            print(f"🎯 Final Accuracy: {final_accuracy:.3f}")
        else:
            final_mse = mean_squared_error(self.y_test, y_pred_final)
            final_r2 = r2_score(self.y_test, y_pred_final)
            
            self.final_results = {
                'mse': final_mse,
                'r2': final_r2,
                'rmse': np.sqrt(final_mse),
                'predictions': y_pred_final.tolist(),
                'actual': self.y_test.tolist()
            }
            
            print(f"🎯 Final R²: {final_r2:.3f}")
        
        self.next(self.generate_insights)
    
    @step  
    def generate_insights(self):
        """Generate insights and model interpretation."""
        print("💡 Generating Model Insights")
        
        insights = []
        
        # Feature importance (if available)
        if hasattr(self.tuned_model, 'feature_importances_'):
            feature_importance = dict(zip(self.X.columns, self.tuned_model.feature_importances_))
            top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]
            
            insights.append("🔍 Top 5 Most Important Features:")
            for i, (feature, importance) in enumerate(top_features, 1):
                insights.append(f"   {i}. {feature}: {importance:.3f}")
        
        # Performance insights
        if self.problem_type == 'classification':
            accuracy = self.final_results['accuracy']
            if accuracy > 0.95:
                insights.append("🏆 Excellent model performance - ready for production!")
            elif accuracy > 0.90:
                insights.append("✅ Very good performance - minor optimizations possible")
            elif accuracy > 0.80:
                insights.append("⚠️ Good performance - consider feature engineering")
            else:
                insights.append("❌ Performance needs improvement")
        else:
            r2 = self.final_results['r2']
            if r2 > 0.90:
                insights.append("🏆 Excellent predictive power!")
            elif r2 > 0.75:
                insights.append("✅ Good predictive performance")
            else:
                insights.append("⚠️ Moderate predictive power - room for improvement")
        
        # Training efficiency insights
        fastest_model = min(self.training_summary, key=lambda x: x['training_time'])
        insights.append(f"⚡ Fastest training: {fastest_model['algorithm']} ({fastest_model['training_time']:.2f}s)")
        
        best_cv = max(self.training_summary, key=lambda x: x['cv_mean'])
        insights.append(f"🎯 Best cross-validation: {best_cv['algorithm']} ({best_cv['cv_mean']:.3f})")
        
        self.model_insights = insights
        
        for insight in insights:
            print(insight)
        
        self.next(self.end)
    
    @step
    def end(self):
        """Complete the pipeline and generate final summary."""
        print("\n🎉 SUPERVISED LEARNING PIPELINE COMPLETE!")
        
        summary = {
            'dataset_info': {
                'type': self.dataset_type,
                'shape': f"{self.X.shape[0]} samples × {self.X.shape[1]} features",
                'problem_type': self.problem_type
            },
            'training_summary': {
                'algorithms_tested': len(self.all_model_results),
                'best_algorithm': self.best_model['algorithm'],
                'hyperparameter_tuning': bool(self.best_params),
                'total_training_time': sum(result['training_time'] for result in self.all_model_results.values())
            },
            'performance': self.final_results,
            'insights': self.model_insights
        }
        
        self.pipeline_summary = summary
        
        print("📊 Pipeline Summary:")
        print(f"   🗃️ Dataset: {summary['dataset_info']['shape']}")
        print(f"   🤖 Algorithms tested: {summary['training_summary']['algorithms_tested']}")
        print(f"   🏆 Best model: {summary['training_summary']['best_algorithm']}")
        print(f"   ⚙️ Hyperparameter tuning: {'Yes' if summary['training_summary']['hyperparameter_tuning'] else 'No'}")
        
        if self.problem_type == 'classification':
            print(f"   🎯 Final accuracy: {self.final_results['accuracy']:.3f}")
        else:
            print(f"   🎯 Final R²: {self.final_results['r2']:.3f}")
        
        print("\n✨ All results and models saved by Metaflow!")
        print("💾 Access results using: flow.run.data")
        print("🔄 Reproduce anytime: python flow.py run")

print("✅ SupervisedLearningFlow class defined!")
print("💡 To run: save as .py file and execute 'python pipeline.py run'")

## Part 4: LangChain Model Interpretation

In [None]:
# LangChain Model Interpretation System
def create_model_interpreter():
    """Create LangChain-based model interpretation system."""
    if not LANGCHAIN_AVAILABLE:
        print("⚠️ LangChain not available - using fallback interpretation")
        return None
    
    interpretation_prompt = PromptTemplate(
        input_variables=["model_type", "accuracy", "features", "dataset"],
        template="""
        You are an expert data scientist explaining machine learning results to a business audience.
        
        Model Details:
        - Algorithm: {model_type}
        - Accuracy: {accuracy}
        - Key Features: {features}
        - Dataset: {dataset}
        
        Please provide:
        1. A simple explanation of how this model works
        2. What the accuracy score means in practical terms
        3. Which features are most important and why
        4. Recommendations for model deployment
        
        Keep the explanation clear and business-focused.
        """
    )
    
    try:
        llm = Ollama(model="llama3.2")
        interpretation_chain = interpretation_prompt | llm | StrOutputParser()
        return interpretation_chain
    except Exception as e:
        print(f"⚠️ Could not initialize LangChain: {e}")
        return None

def interpret_model_results(model_name, accuracy, top_features, dataset_name):
    """Generate natural language interpretation of model results."""
    interpreter = create_model_interpreter()
    
    if interpreter is None:
        return f"""
        📊 Model Interpretation (Fallback):
        
        The {model_name} algorithm achieved {accuracy:.1%} accuracy on the {dataset_name} dataset.
        
        Key insights:
        • This model can correctly predict the outcome {accuracy:.1%} of the time
        • Most important features: {', '.join(top_features[:3])}
        • {'Excellent' if accuracy > 0.9 else 'Good' if accuracy > 0.8 else 'Fair'} performance for business use
        
        Recommendation: {'Ready for deployment' if accuracy > 0.9 else 'Consider additional tuning'}
        """
    
    try:
        interpretation = interpreter.invoke({
            "model_type": model_name,
            "accuracy": f"{accuracy:.1%}",
            "features": ", ".join(top_features[:5]),
            "dataset": dataset_name
        })
        return f"🧠 LLM Interpretation:\n{interpretation}"
    except Exception as e:
        print(f"⚠️ LLM interpretation failed: {e}")
        return interpret_model_results(model_name, accuracy, top_features, dataset_name)

# Example usage
print("🔍 Example Model Interpretation:")
example_interpretation = interpret_model_results(
    model_name="Random Forest", 
    accuracy=0.94, 
    top_features=["alcohol", "flavanoids", "color_intensity", "od280/od315_of_diluted_wines"],
    dataset_name="Wine Quality"
)
print(example_interpretation)

## Part 5: Advanced Evaluation and Hyperparameter Tuning

In [None]:
# Advanced hyperparameter tuning demonstration
print("⚙️ Advanced Hyperparameter Tuning")

# Example with Random Forest
rf_model = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10],
    'min_samples_split': [2, 5]
}

grid_search = GridSearchCV(
    rf_model, param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=0
)

print("🔄 Running grid search...")
grid_search.fit(X_train_scaled, y_train)

print("\n🏆 Grid Search Results:")
print(f"   Best parameters: {grid_search.best_params_}")
print(f"   Best CV score: {grid_search.best_score_:.3f}")
print(f"   Total combinations tested: {len(grid_search.cv_results_['params'])}")

# Test the best model
best_rf = grid_search.best_estimator_
y_pred_tuned = best_rf.predict(X_test_scaled)
tuned_accuracy = accuracy_score(y_test, y_pred_tuned)

print(f"\n📊 Tuned Model Performance:")
print(f"   Test accuracy: {tuned_accuracy:.3f}")
print(f"   Improvement: {tuned_accuracy - classification_results['Random Forest']['accuracy']:.3f}")

# Feature importance from tuned model
feature_importance = best_rf.feature_importances_
feature_names = X_wine.columns
importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

print(f"\n🔍 Top 5 Features (Tuned Model):")
for i, row in importance_df.head().iterrows():
    print(f"   {row['feature']}: {row['importance']:.3f}")

print("\n✅ Hyperparameter tuning complete!")

## Workshop Summary & Next Steps

In [None]:
# Workshop Summary
print("\n" + "="*60)
print("🎓 WEEK 3 WORKSHOP SUMMARY")
print("="*60)

print("\n🏆 What You've Accomplished:")
accomplishments = [
    "✅ Built complete Metaflow ML pipeline with parallel training",
    "✅ Implemented multiple supervised learning algorithms",
    "✅ Mastered cross-validation and model evaluation",
    "✅ Performed hyperparameter tuning with GridSearchCV",
    "✅ Integrated LangChain for model interpretation",
    "✅ Created comprehensive evaluation frameworks",
    "✅ Generated business-ready model insights"
]

for achievement in accomplishments:
    print(f"   {achievement}")

print("\n🛠️ Key Skills Developed:")
skills = [
    "🌊 Metaflow pipeline development with @foreach and @resources",
    "🤖 Scikit-learn model training and evaluation",
    "📊 Advanced metrics: accuracy, precision, recall, F1, AUC, R²",
    "⚙️ Hyperparameter optimization techniques",
    "🧠 LLM-powered model interpretation",
    "📈 Cross-validation and model selection",
    "🔍 Feature importance analysis"
]

for skill in skills:
    print(f"   {skill}")

print("\n🚀 Production Readiness Checklist:")
production_items = [
    "📋 Model versioning and artifact management",
    "🔄 Automated retraining pipelines", 
    "📊 Performance monitoring and alerting",
    "🛡️ Model validation and testing",
    "📝 Documentation and interpretation",
    "🌐 Deployment and serving infrastructure"
]

for item in production_items:
    print(f"   {item}")

print("\n🎯 Week 4 Preview - Advanced ML & LangGraph:")
week4_topics = [
    "🕸️ LangGraph for complex AI workflows",
    "🔄 Multi-agent systems and tool calling",
    "🧪 Advanced model architectures",
    "🌐 End-to-end deployment strategies",
    "📊 MLOps and monitoring systems",
    "🚀 Scaling to production workloads"
]

for topic in week4_topics:
    print(f"   {topic}")

print("\n💡 Recommended Practice:")
practice_items = [
    "🔄 Run the complete pipeline with different datasets",
    "⚙️ Experiment with hyperparameter ranges", 
    "📊 Create custom evaluation metrics",
    "🧠 Try different LLM models for interpretation",
    "🎯 Apply to your own datasets",
    "📚 Review LangGraph documentation"
]

for item in practice_items:
    print(f"   {item}")

print("\n📚 Additional Resources:")
resources = [
    "📖 Metaflow ML Best Practices: https://docs.metaflow.org/scaling/remote-tasks/introduction",
    "📖 Scikit-learn User Guide: https://scikit-learn.org/stable/user_guide.html",
    "📖 LangChain Model Integration: https://python.langchain.com/docs/integrations/llms/",
    "📖 Cross-validation Guide: https://scikit-learn.org/stable/modules/cross_validation.html",
    "📖 Hyperparameter Tuning: https://scikit-learn.org/stable/modules/grid_search.html"
]

for resource in resources:
    print(f"   {resource}")

print("\n🎉 Excellent work! You're now ready for advanced ML and LangGraph!")
print("🏆 - INRIVA AI Academy Team")

print("\n📝 To save and run the complete pipeline:")
print("   1. Copy the SupervisedLearningFlow class to a .py file")
print("   2. Add required imports at the top")
print("   3. Run with: python your_pipeline.py run")
print("   4. View results with: python your_pipeline.py show")

print("\n✨ All pipeline artifacts are automatically versioned by Metaflow!")

## 📝 Pipeline Export Instructions

### Save as Python File
```python
# complete_ml_pipeline.py
from metaflow import FlowSpec, step, Parameter, resources
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, r2_score, mean_absolute_error
from datetime import datetime

# Copy the SupervisedLearningFlow class here

if __name__ == '__main__':
    SupervisedLearningFlow()
```

### Run the Pipeline
```bash
# Run with default parameters (wine dataset)
python complete_ml_pipeline.py run

# Run with housing dataset
python complete_ml_pipeline.py run --dataset_type housing

# Run with custom parameters
python complete_ml_pipeline.py run --test_size 0.3 --n_cv_folds 10
```

### View Results
```bash
# Show latest run
python complete_ml_pipeline.py show

# List all runs
python complete_ml_pipeline.py list
```

### Access Results Programmatically
```python
from metaflow import Flow

# Get latest run
run = Flow('SupervisedLearningFlow').latest_run

# Access pipeline summary
summary = run.data.pipeline_summary
print(f"Best model: {summary['training_summary']['best_algorithm']}")

# Access model results
model_results = run.data.all_model_results
for model, results in model_results.items():
    print(f"{model}: {results['cv_mean']:.3f}")
```

## 🎯 Next Steps

1. **Experiment** with different datasets and parameters
2. **Extend** the pipeline with additional algorithms
3. **Deploy** models using Metaflow's cloud capabilities
4. **Integrate** with your existing ML infrastructure

**Ready for Week 4: Advanced ML & LangGraph! 🚀**