# Indoor Navigation ML Decision Models

This notebook demonstrates the full pipeline:
1. Loading annotated data
2. Extracting features
3. Training StepFilter and AnchorSelection models
4. Evaluating performance
5. Running inference on new routes

In [None]:
import sys, os
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.insert(0, PROJECT_ROOT)
os.chdir(PROJECT_ROOT)
print('Project root:', PROJECT_ROOT)

## 1. Load Training Data

Load human annotations from local submissions or Google Sheets,
then re-run route generation to capture numeric features.

In [None]:
from ml.data.data_loader import build_training_dataset
from ml.data.dataset import train_val_split, save_dataset_cache, load_dataset_cache

CACHE_PATH = 'ml/checkpoints/dataset_cache.json'

# Load from local submissions (or use source='sheets', sheets_url='...')
if os.path.exists(CACHE_PATH):
    records = load_dataset_cache(CACHE_PATH)
else:
    records = build_training_dataset(source='local', venue='zorlu')
    if records:
        save_dataset_cache(records, CACHE_PATH)

print(f'Total route records: {len(records)}')

## 2. Inspect Features

Extract feature vectors from a single route to understand dimensions.

In [None]:
from ml.features.feature_extractor import FeatureExtractor

fe = FeatureExtractor()

if records:
    rec = records[0]
    step_feats = fe.extract_step_features(rec)
    anchor_feats = fe.extract_anchor_features(rec)

    print(f'Route: {rec["route_id"]}')
    print(f'Step features shape: {step_feats.shape}')  # (N_steps, 13)
    print(f'Anchor feature records: {len(anchor_feats)}')
    if anchor_feats:
        print(f'Per-anchor feature shape: {anchor_feats[0]["features"].shape}')  # (3, 8)
    print()
    print('Step labels:', rec.get('step_labels', {}))
    print('Anchor labels:', rec.get('anchor_labels', {}))
else:
    print('No records available. Add data to submissions/ folder.')

## 3. Create Datasets

In [None]:
from ml.data.dataset import StepFilterDataset, AnchorSelectionDataset

train_recs, val_recs = train_val_split(records, val_ratio=0.2, seed=42)
print(f'Train: {len(train_recs)} routes, Val: {len(val_recs)} routes')

train_sf = StepFilterDataset(train_recs, fe)
val_sf = StepFilterDataset(val_recs, fe)
print(f'\nStepFilter train: {len(train_sf)} steps {train_sf.label_distribution()}')
print(f'StepFilter val:   {len(val_sf)} steps {val_sf.label_distribution()}')

train_as = AnchorSelectionDataset(train_recs, fe)
val_as = AnchorSelectionDataset(val_recs, fe)
print(f'\nAnchorSelector train: {len(train_as)} samples {train_as.label_distribution()}')
print(f'AnchorSelector val:   {len(val_as)} samples {val_as.label_distribution()}')

## 4. Train Models

In [None]:
from ml.training.train import train_step_filter, train_anchor_selector

# Train Step Filter
sf_results = train_step_filter(
    train_recs, val_recs,
    lr=1e-3, batch_size=32, epochs=100, patience=10
)

In [None]:
# Train Anchor Selector
as_results = train_anchor_selector(
    train_recs, val_recs,
    lr=1e-3, batch_size=32, epochs=100, patience=10
)

## 5. Visualize Training History

In [None]:
import json

# Load histories
sf_hist_path = 'ml/checkpoints/step_filter_history.json'
as_hist_path = 'ml/checkpoints/anchor_selector_history.json'

if os.path.exists(sf_hist_path):
    with open(sf_hist_path) as f:
        sf_history = json.load(f)
    epochs = [h['epoch'] for h in sf_history]
    train_loss = [h['train_loss'] for h in sf_history]
    val_loss = [h['val_loss'] for h in sf_history]
    f1_scores = [h['f1'] for h in sf_history]
    
    print('StepFilter Training Summary:')
    print(f'  Best val_loss: {min(val_loss):.4f} at epoch {epochs[val_loss.index(min(val_loss))]}')
    print(f'  Best F1:       {max(f1_scores):.4f} at epoch {epochs[f1_scores.index(max(f1_scores))]}')

if os.path.exists(as_hist_path):
    with open(as_hist_path) as f:
        as_history = json.load(f)
    epochs = [h['epoch'] for h in as_history]
    top1 = [h['top1_accuracy'] for h in as_history]
    
    print('\nAnchorSelector Training Summary:')
    print(f'  Best top-1 accuracy: {max(top1):.4f} at epoch {epochs[top1.index(max(top1))]}')

## 6. Run Inference on a New Route

In [None]:
from ml.inference.pipeline import create_pipeline

pipeline = create_pipeline()

if records:
    test_route = records[0]
    result = pipeline.predict(test_route)
    
    print('Step Decisions:')
    for d in result['all_step_decisions']:
        status = 'KEEP' if d['keep'] else 'DELETE'
        print(f"  Step {d['step_number']:2d} [{d['action']:<12}] -> {status} (p={d['keep_prob']:.3f})")
    
    print(f"\nFiltered: {len(result['filtered_steps'])} / {len(test_route['steps'])} steps kept")
    
    print('\nAnchor Decisions:')
    for d in result['anchor_decisions']:
        print(f"  Step {d['step_number']:2d} -> Candidate {d['selected_idx']} "
              f"{d['selected_anchor']} (proba={d['proba']})")
else:
    print('No data available for testing.')

## 7. Integration Example

How to integrate with the Flask app (viewer_app.py):

```python
from ml.inference.pipeline import create_pipeline

# At app startup:
ml_pipeline = create_pipeline()

# After route generation in /api/route endpoint:
route_data = {
    'steps': route_info.get('steps', []),
    'turns': route_info.get('turns', []),
    'path_points': route_info.get('path_points', []),
    'total_distance': route_info['summary']['total_distance_meters'],
}
decisions = ml_pipeline.predict(route_data)

# Use decisions['filtered_steps'] instead of all steps
# Use decisions['anchor_decisions'] for anchor selection
```