## Step 1: Setup and Create Synthetic Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from tempfile import TemporaryDirectory

from foodspec import FoodSpec

print("Phase 1: Unified FoodSpec API - Complete Workflow Demo")
print("="*70)

In [None]:
# Create synthetic Raman dataset
print("\n1. Creating synthetic spectroscopy data...")
np.random.seed(42)
n_samples = 30
n_wavenumbers = 200

# Simulate Raman spectra (3 oil types with distinct signatures)
x = np.random.randn(n_samples, n_wavenumbers) * 0.5 + np.linspace(0, 10, n_wavenumbers)
wn = np.linspace(500, 2000, n_wavenumbers)

oil_types = ["olive", "sunflower", "canola"] * 10
metadata = pd.DataFrame({
    "sample_id": [f"oil_{i:03d}" for i in range(n_samples)],
    "oil_type": oil_types,
    "batch": np.random.randint(1, 4, n_samples),
})

print(f"   Created {n_samples} samples × {n_wavenumbers} wavenumbers")
print(f"   Classes: {set(oil_types)}")
print(f"   Wavenumber range: {wn[0]:.0f} - {wn[-1]:.0f} cm⁻¹")

## Step 2: Initialize FoodSpec and Execute Chainable Workflow

In [None]:
# Initialize FoodSpec
print("\n2. Initializing FoodSpec...")
fs = FoodSpec(
    x,
    wavenumbers=wn,
    metadata=metadata,
    modality="raman",
    kind="oils",
)
print(f"   ✓ Initialized: {len(fs.data)} samples, {fs.data.x.shape[1]} wavenumbers")

# Execute chainable workflow
print("\n3. Executing chainable workflow...")
print("   ├─ QC: outlier detection...")
fs.qc(method="isolation_forest", threshold=0.1)

print("   ├─ Preprocessing: baseline correction + smoothing...")
fs.preprocess(
    baseline_correction="airpls",
    baseline_lambda=10000,
    smoothing_method="savgol",
    smoothing_window=11,
)

print("   ├─ Training: random forest classifier...")
fs.train(
    task="classification",
    target="oil_type",
    model_type="rf",
    cv_folds=3,
)

print("   └─ Workflow complete")

## Step 3: Access Metrics and Diagnostics

In [None]:
print("\n4. Workflow Summary:")
print(f"   {fs.workflow_summary}")

print("\n5. Metrics (Key Performance Indicators):")
if hasattr(fs, 'bundle') and hasattr(fs.bundle, 'metrics'):
    for key, val in fs.bundle.metrics.items():
        if isinstance(val, dict):
            print(f"   {key}:")
            for k, v in val.items():
                if isinstance(v, float):
                    print(f"     - {k}: {v:.4f}")
        elif isinstance(val, float):
            print(f"   {key}: {val:.4f}")
        else:
            print(f"   {key}: {val}")
else:
    print("   (Metrics available in fs.bundle.metrics after workflow)")

print("\n6. Diagnostics Available:")
if hasattr(fs, 'bundle') and hasattr(fs.bundle, 'diagnostics'):
    for key in fs.bundle.diagnostics.keys():
        print(f"   - {key}")
else:
    print("   (Diagnostics available in fs.bundle.diagnostics)")

## Step 4: Export and Save Results

In [None]:
print("\n7. Exporting results...")
with TemporaryDirectory() as tmpdir:
    out_path = Path(tmpdir)
    fs.export(out_path)
    
    print(f"   Exported to: {out_path}")
    print(f"\n   Output files:")
    for file in sorted(out_path.rglob("*")):
        if file.is_file():
            size = file.stat().st_size
            rel_path = file.relative_to(out_path)
            print(f"   ├─ {rel_path} ({size} bytes)")

## Step 5: Key Takeaways

### What We Learned:

1. **Chainable API:** Methods return `self` for intuitive workflow composition
2. **Built-in QC:** Automatic outlier detection protects downstream analyses
3. **Standardized Preprocessing:** Proven baseline correction, smoothing, normalization
4. **Reproducibility:** Full provenance tracking (metadata, parameters, versions)
5. **Export Format:** Organized output with metrics, diagnostics, model artifacts

### Workflow Advantages:

- **Simplicity:** No manual intermediate file management
- **Debugging:** Built-in diagnostics show what happened at each step
- **Best Practices:** Defaults follow spectroscopy best practices
- **Reproducibility:** Save/load workflow configuration as YAML/JSON
- **Auditable:** Provenance file documents entire history

### Customization:

You can customize any step:
```python
fs.qc(method="mahalanobis", threshold=2.0)  # Different outlier method
fs.preprocess(baseline_correction=None)       # Skip baseline
fs.train(model_type="svm", kernel="rbf")    # Different classifier
```

### Next Steps:

1. Load your own data: `x, metadata = load_your_spectra()`
2. Customize preprocessing parameters based on your instrument
3. Explore different models (RF, SVM, Deep Learning)
4. Compare cross-validation results
5. Export and archive results with full provenance