# ML Automation Framework - Quickstart

This notebook demonstrates the basic usage of the ML Automation Framework.

## Features Covered
1. Loading and validating configurations
2. Running a classification pipeline
3. Viewing results in MLflow

In [None]:
# Install the framework (run once)
# %pip install -e ..

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Framework imports
from ml_framework.config.loader import load_config
from ml_framework.pipelines.classification import ClassificationPipeline
from ml_framework.utils.runtime import get_runtime

print(f"Running on: {get_runtime().value}")

## 1. Create Sample Data

First, let's create a sample dataset for demonstration.

In [None]:
# Generate sample classification data
np.random.seed(42)
n_samples = 1000

data = pd.DataFrame({
    'tenure_months': np.random.randint(1, 72, n_samples),
    'monthly_charges': np.random.uniform(20, 100, n_samples),
    'total_charges': np.random.uniform(100, 5000, n_samples),
    'contract_type': np.random.choice(['month-to-month', 'one_year', 'two_year'], n_samples),
    'payment_method': np.random.choice(['credit_card', 'bank_transfer', 'electronic_check'], n_samples),
    'num_support_tickets': np.random.poisson(2, n_samples),
})

# Create target based on features (simplified churn logic)
churn_prob = (
    0.3 - 0.005 * data['tenure_months'] + 
    0.005 * data['monthly_charges'] +
    0.05 * data['num_support_tickets']
).clip(0.05, 0.95)

data['is_churned'] = (np.random.random(n_samples) < churn_prob).astype(int)

print(f"Dataset shape: {data.shape}")
print(f"Churn rate: {data['is_churned'].mean():.2%}")
data.head()

In [None]:
# Save sample data
data_path = Path('../data')
data_path.mkdir(exist_ok=True)

data.to_parquet(data_path / 'churn_dataset.parquet', index=False)
print(f"Data saved to: {data_path / 'churn_dataset.parquet'}")

## 2. Load Configuration

The framework uses YAML configs for reproducibility.

In [None]:
# Load and validate configuration
config = load_config('../configs/classification/example_churn.yaml')

print(f"Pipeline: {config.name}")
print(f"Type: {config.pipeline_type.value}")
print(f"Model: {config.model.model_type.value}")
print(f"Experiment: {config.mlflow.experiment_name}")

## 3. Run Pipeline

Execute the classification pipeline.

In [None]:
# Create and run pipeline
pipeline = ClassificationPipeline(config)
results = pipeline.run()

print("\n=== Results ===")
print(f"MLflow Run ID: {results['run_id']}")
print("\nMetrics:")
for name, value in sorted(results['metrics'].items()):
    print(f"  {name}: {value:.4f}")

## 4. Make Predictions

Use the trained model to make predictions.

In [None]:
# Create new data for prediction
new_customers = pd.DataFrame({
    'tenure_months': [6, 24, 48],
    'monthly_charges': [80, 50, 30],
    'total_charges': [480, 1200, 1440],
    'contract_type': ['month-to-month', 'one_year', 'two_year'],
    'payment_method': ['electronic_check', 'credit_card', 'bank_transfer'],
    'num_support_tickets': [5, 1, 0],
})

# Get predictions
predictions = pipeline.predict(new_customers)
probabilities = pipeline.predict_proba(new_customers)

new_customers['predicted_churn'] = predictions
new_customers['churn_probability'] = probabilities[1].values

new_customers

## 5. View MLflow Results

Start the MLflow UI to explore experiments:

```bash
mlflow ui --port 5000
```

Then visit http://localhost:5000

In [None]:
# Optional: Load model from MLflow
import mlflow

# Load the logged model
model_uri = f"runs:/{results['run_id']}/model"
loaded_model = mlflow.sklearn.load_model(model_uri)

print(f"Model loaded from: {model_uri}")
print(f"Model type: {type(loaded_model).__name__}")