# Pipeline Demo: New Features

This notebook demonstrates the improved pipeline with:
1. **Auto-detection** - Pipeline figures out which stages to run
2. **Organized outputs** - Clear directory structure
3. **Serialization** - Save/load intermediate results
4. **Batch processing** - Process multiple videos


In [None]:
import importlib
import datafawn
importlib.reload(datafawn)

from pathlib import Path


## Setup: Create Pipeline Components


In [None]:
# Postprocessors
rel_paws = ['front_left_paw_rel', 'front_right_paw_rel', 'back_left_paw_rel', 'back_right_paw_rel']
reference_map = {
    'back_base': ['front_left_paw', 'front_right_paw'],
    'tail_base': ['back_left_paw', 'back_right_paw']
}

rel_pp = datafawn.RelativePawPositionPostprocessor()
error_pp = datafawn.ErrorPostprocessor(
    bodyparts=rel_paws,
    use_velocity=True,
    use_likelihood=True,
    use_distance=True,
    velocity_kwargs={'threshold_pixels': 50, 'window_size': 5},
    likelihood_kwargs={'min_likelihood': 0.5},
    distance_kwargs={'reference_map': reference_map, 'max_distance': 300}
)

# Event extractor
zeni_extractor = datafawn.ZeniExtractor(
    window_size=5,
    show_plots=False
)

# Pipeline (no pose estimator for this demo - we'll use existing pose data)
pipeline = datafawn.EventDetectionPipeline(
    postprocessors=[rel_pp, error_pp],
    event_extractors=[zeni_extractor]
)

print("Pipeline created!")


---
## 1. Auto-Detection: Start from Pose Data

When you provide `pose_data_path`, the pipeline automatically:
- ✅ Skips pose estimation
- ✅ Runs postprocessing
- ✅ Runs event extraction


In [None]:
# Example: Start from existing pose data
POSE_DATA_PATH = 'data_example/pose_estimates/deerrunning/deerrunning_superanimal_quadruped_hrnet_w32_fasterrcnn_resnet50_fpn_v2_.h5'

results = pipeline.run(
    pose_data_path=POSE_DATA_PATH,
    output_dir='demo_output/from_pose_data'
)

# Check which stages ran
print("Stages that ran:")
for stage, ran in results['metadata']['stages_run'].items():
    status = "✅" if ran else "⏭️ skipped"
    print(f"  {stage}: {status}")


In [None]:
# Check the organized output structure
print("Output files created:")
for name, path in results['output_paths'].items():
    print(f"  {name}: {path}")


In [None]:
# Check the events extracted
print("Events extracted:")
for (scorer, individual), event_dict in results['events'].items():
    print(f"\n  {individual}:")
    for event_type, frames in event_dict.items():
        print(f"    {event_type}: {len(frames)} events")


---
## 2. Auto-Detection: Start from Postprocessed Data

If you already have postprocessed data, skip even more stages!


In [None]:
# Use postprocessed data from previous run
postprocessed_data = results['postprocessed_data']

results2 = pipeline.run(
    postprocessed_data=postprocessed_data,
    output_dir='demo_output/from_postprocessed'
)

print("Stages that ran:")
for stage, ran in results2['metadata']['stages_run'].items():
    status = "✅" if ran else "⏭️ skipped"
    print(f"  {stage}: {status}")


---
## 3. Serialization: Save and Load Results

Save results to files, then load them later to continue processing.


In [None]:
# Save results to a directory
saved_paths = pipeline.save_results(results, 'demo_output/saved_results')

print("Saved files:")
for name, path in saved_paths.items():
    print(f"  {name}: {path}")


In [None]:
# Later: Load results back
loaded = datafawn.EventDetectionPipeline.load_results('demo_output/saved_results')

print("Loaded data:")
print(f"  pose_data shape: {loaded.get('pose_data').shape if loaded.get('pose_data') is not None else 'None'}")
print(f"  postprocessed_data shape: {loaded.get('postprocessed_data').shape if loaded.get('postprocessed_data') is not None else 'None'}")
print(f"  events: {len(loaded.get('events', {}))} individuals")


In [None]:
# Continue processing from loaded events
# (e.g., if you added a soundscape generator later)
results3 = pipeline.run(
    events=loaded['events'],
    output_dir='demo_output/from_loaded'
)

print("Continued from loaded events:")
for stage, ran in results3['metadata']['stages_run'].items():
    status = "✅" if ran else "⏭️ skipped"
    print(f"  {stage}: {status}")


---
## 4. Batch Processing: Multiple Videos

Process multiple videos at once with organized output per video.


In [None]:
# For batch processing with videos, you'd need a pose estimator
# Here's how it would look:

# import torch
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# dlc_estimator = datafawn.DeepLabCutPoseEstimator(
#     model_name='superanimal_quadruped',
#     device=device
# )

# full_pipeline = datafawn.EventDetectionPipeline(
#     pose_estimator=dlc_estimator,
#     postprocessors=[rel_pp, error_pp],
#     event_extractors=[zeni_extractor]
# )

# # Batch process multiple videos
# results_list = full_pipeline.run_batch(
#     video_paths=['videos/deer1.mp4', 'videos/deer2.mp4', 'videos/dog1.mp4'],
#     output_base_dir='batch_results'
# )

print("Batch processing example (commented out - requires pose estimator + GPU)")
print("\nOutput structure would be:")
print("batch_results/")
print("├── deer1/")
print("│   ├── pose_estimation/")
print("│   ├── postprocessing/")
print("│   ├── events/")
print("│   └── soundscapes/")
print("├── deer2/")
print("│   └── ...")
print("└── dog1/")
print("    └── ...")


---
## Summary: Input Types

| Input Provided | Stages Run |
|----------------|------------|
| `video_path` | Pose Est → Postproc → Events → Soundscape |
| `pose_data` or `pose_data_path` | Postproc → Events → Soundscape |
| `postprocessed_data` or `postprocessed_data_path` | Events → Soundscape |
| `events` or `events_path` | Soundscape only |


In [None]:
# Cleanup demo output
import shutil
if Path('demo_output').exists():
    # shutil.rmtree('demo_output')  # Uncomment to delete demo files
    print("Demo output in 'demo_output/' - uncomment above line to delete")
