In [None]:
# Example: Environment Configuration Management
import os
from dataclasses import dataclass
from typing import Dict, Any

@dataclass
class EnvironmentConfig:
    """Configuration for different deployment environments"""
    environment: str
    data_source: str
    model_registry_url: str
    compute_resources: Dict[str, Any]
    monitoring_enabled: bool

def get_environment_config(env_name: str) -> EnvironmentConfig:
    """Get configuration for specific environment"""
    
    configs = {
        "development": EnvironmentConfig(
            environment="development",
            data_source="dev_dataset",
            model_registry_url="dev-registry.internal",
            compute_resources={"cpu": "2", "memory": "4Gi"},
            monitoring_enabled=False
        ),
        "staging": EnvironmentConfig(
            environment="staging",
            data_source="staging_dataset",
            model_registry_url="staging-registry.internal",
            compute_resources={"cpu": "4", "memory": "8Gi"},
            monitoring_enabled=True
        ),
        "production": EnvironmentConfig(
            environment="production",
            data_source="prod_dataset",
            model_registry_url="prod-registry.internal",
            compute_resources={"cpu": "8", "memory": "16Gi"},
            monitoring_enabled=True
        )
    }
    
    return configs.get(env_name, configs["development"])

# Example usage
current_env = os.getenv("DEPLOYMENT_ENV", "development")
config = get_environment_config(current_env)
print(f"Environment: {config.environment}")
print(f"Data Source: {config.data_source}")
print(f"Compute Resources: {config.compute_resources}")


In [None]:
# Example: Unit Test for Feature Engineering
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

def test_feature_scaling():
    """Unit test for feature scaling functionality"""
    # Sample data
    data = pd.DataFrame({
        'feature1': [1, 2, 3, 4, 5],
        'feature2': [10, 20, 30, 40, 50]
    })
    
    # Apply scaling
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data)
    
    # Test assertions
    assert np.allclose(scaled_data.mean(axis=0), 0, atol=1e-7), "Mean should be close to 0"
    assert np.allclose(scaled_data.std(axis=0), 1, atol=1e-7), "Std should be close to 1"
    
    print("✅ Feature scaling test passed!")
    return True

# Run the test
test_feature_scaling()


In [None]:
# Example: Integration Test for Pipeline Components
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

def test_training_pipeline_integration():
    """Integration test for the complete training pipeline"""
    
    # Generate sample data
    X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Training step
    model = RandomForestClassifier(n_estimators=10, random_state=42)
    model.fit(X_train, y_train)
    
    # Validation step
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Integration test assertions
    assert accuracy > 0.7, f"Model accuracy {accuracy:.3f} is below threshold 0.7"
    assert not np.any(np.isnan(y_pred)), "Predictions contain NaN values"
    assert len(y_pred) == len(y_test), "Prediction length mismatch"
    
    print(f"✅ Integration test passed! Accuracy: {accuracy:.3f}")
    return True

# Run the integration test
test_training_pipeline_integration()
