# üé¨ SP1: Video Pipeline for 3D Object Detection

This notebook demonstrates the **video processing capabilities** of the SP1 pipeline.

## Features
- Process video files with 3D object detection
- Real-time webcam processing
- Depth map visualization
- Annotated video output

---

## 1. Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Clone repository
import os

REPO_URL = "https://github.com/Zulqarnain-cc34/3d_detection.git"
REPO_NAME = "3d_detection"

if not os.path.exists(REPO_NAME):
    print("üì• Cloning repository...")
    !git clone {REPO_URL}
else:
    print(f"üìÅ Repository exists. Pulling latest...")
    %cd {REPO_NAME}
    !git pull
    %cd ..

%cd {REPO_NAME}
!git log -1 --oneline

In [None]:
# Install dependencies
print("üì¶ Installing dependencies...")
!pip install -q -r requirements.txt
print("‚úÖ Dependencies installed!")

## 2. Initialize Pipeline

In [None]:
# Import pipeline components
import sys
sys.path.insert(0, '.')

from src.pipeline import SP1Pipeline
from src.video_pipeline import SP1VideoPipeline, VideoConfig

# Select device
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# Initialize the base SP1 pipeline
print("\nüöÄ Initializing SP1 Pipeline...")
pipeline = SP1Pipeline(device=DEVICE)
print("‚úÖ Pipeline ready!")

In [None]:
# Configure video pipeline
config = VideoConfig(
    detection_classes=[
        "person", "chair", "table", "couch", "tv", 
        "laptop", "bottle", "cup", "book", "phone"
    ],
    confidence_threshold=0.25,
    process_every_n_frames=1,  # Process every frame (set to 2 or 3 for faster processing)
    show_depth_minimap=True,
    show_3d_overlay=True,
    show_fps=True,
    show_object_panel=True,
    depth_colormap="plasma"
)

# Create video pipeline
video_pipeline = SP1VideoPipeline(pipeline, config)
print("üé¨ Video pipeline configured!")

## 3. Download Sample Video

In [None]:
import urllib.request
import os

# Create directories
os.makedirs("data", exist_ok=True)
os.makedirs("outputs", exist_ok=True)

# Download a sample video (indoor scene)
# You can replace this URL with your own video
SAMPLE_VIDEO_URL = "https://github.com/intel-iot-devkit/sample-videos/raw/master/head-pose-face-detection-female-and-male.mp4"
SAMPLE_VIDEO_PATH = "data/sample_video.mp4"

if not os.path.exists(SAMPLE_VIDEO_PATH):
    print("üì• Downloading sample video...")
    urllib.request.urlretrieve(SAMPLE_VIDEO_URL, SAMPLE_VIDEO_PATH)
    print(f"‚úÖ Downloaded: {SAMPLE_VIDEO_PATH}")
else:
    print(f"üìÅ Using existing: {SAMPLE_VIDEO_PATH}")

# Show video info
import cv2
cap = cv2.VideoCapture(SAMPLE_VIDEO_PATH)
print(f"\nVideo properties:")
print(f"  Resolution: {int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))}x{int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))}")
print(f"  FPS: {cap.get(cv2.CAP_PROP_FPS):.1f}")
print(f"  Frames: {int(cap.get(cv2.CAP_PROP_FRAME_COUNT))}")
cap.release()

## 4. Process Video File

In [None]:
# Process the sample video
INPUT_VIDEO = SAMPLE_VIDEO_PATH
OUTPUT_VIDEO = "outputs/detected_video.mp4"

print(f"üé¨ Processing: {INPUT_VIDEO}")
print(f"üì§ Output: {OUTPUT_VIDEO}")
print()

# Process video (set max_frames for quick test)
stats = video_pipeline.process_video(
    input_path=INPUT_VIDEO,
    output_path=OUTPUT_VIDEO,
    max_frames=100,  # Process first 100 frames for demo (remove for full video)
    display=False    # No display in Colab (set True for local)
)

print(f"\nüìä Processing Stats:")
print(f"  Frames: {stats['frames_processed']}")
print(f"  Detections: {stats['total_detections']}")
if 'avg_fps' in stats:
    print(f"  Avg FPS: {stats['avg_fps']:.1f}")

## 5. Display Output Video

In [None]:
from IPython.display import HTML
from base64 import b64encode

def show_video(video_path, width=800):
    """Display video in Colab notebook."""
    mp4 = open(video_path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""
    <video width={width} controls>
        <source src="{data_url}" type="video/mp4">
    </video>
    """)

# Display the output video
print("üé• Output Video with 3D Detection:")
show_video(OUTPUT_VIDEO)

## 6. Frame-by-Frame Processing (Generator)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Process frames using generator
print("üì∏ Processing individual frames...\n")

frames_to_show = []
frame_count = 0

for result in video_pipeline.generate_frames(INPUT_VIDEO):
    frame_count += 1
    
    # Save every 30th frame for display
    if frame_count % 30 == 0:
        frames_to_show.append({
            'annotated': cv2.cvtColor(result.annotated_frame, cv2.COLOR_BGR2RGB),
            'depth': result.depth_colormap,
            'frame_num': result.frame_number,
            'num_objects': len(result.detections_3d),
            'fps': result.current_fps
        })
        print(f"Frame {result.frame_number}: {len(result.detections_3d)} objects, {result.current_fps:.1f} FPS")
    
    # Stop after 150 frames for demo
    if frame_count >= 150:
        break

print(f"\n‚úÖ Processed {frame_count} frames")

In [None]:
# Display sample frames
if frames_to_show:
    n_frames = min(len(frames_to_show), 4)
    fig, axes = plt.subplots(n_frames, 2, figsize=(14, 4*n_frames))
    
    for i, frame_data in enumerate(frames_to_show[:n_frames]):
        # Annotated frame
        axes[i, 0].imshow(frame_data['annotated'])
        axes[i, 0].set_title(f"Frame {frame_data['frame_num']} | {frame_data['num_objects']} objects | {frame_data['fps']:.1f} FPS")
        axes[i, 0].axis('off')
        
        # Depth map
        depth_rgb = cv2.cvtColor(frame_data['depth'], cv2.COLOR_BGR2RGB)
        axes[i, 1].imshow(depth_rgb)
        axes[i, 1].set_title(f"Depth Map - Frame {frame_data['frame_num']}")
        axes[i, 1].axis('off')
    
    plt.tight_layout()
    plt.savefig('outputs/frame_samples.png', dpi=150)
    plt.show()
    print("üíæ Saved: outputs/frame_samples.png")

## 7. Upload Your Own Video (Optional)

In [None]:
from google.colab import files

print("üì§ Upload your video file:")
uploaded = files.upload()

if uploaded:
    uploaded_file = list(uploaded.keys())[0]
    custom_input = f"data/{uploaded_file}"
    custom_output = f"outputs/detected_{uploaded_file}"
    
    # Move to data folder
    import shutil
    shutil.move(uploaded_file, custom_input)
    
    print(f"\nüé¨ Processing your video: {custom_input}")
    
    stats = video_pipeline.process_video(
        input_path=custom_input,
        output_path=custom_output,
        display=False
    )
    
    print(f"\n‚úÖ Done! Output saved to: {custom_output}")
    show_video(custom_output)

## 8. Download Output

In [None]:
from google.colab import files

# Download the processed video
print("üì• Downloading output video...")
files.download(OUTPUT_VIDEO)

---

## üìù API Reference

### VideoConfig Options

| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `detection_classes` | List[str] | furniture list | Object classes to detect |
| `confidence_threshold` | float | 0.25 | Min detection confidence |
| `process_every_n_frames` | int | 1 | Frame skip (1=all, 2=every 2nd) |
| `show_depth_minimap` | bool | True | Show depth map corner overlay |
| `show_3d_overlay` | bool | True | Show 3D detection boxes |
| `show_fps` | bool | True | Show FPS counter |
| `show_object_panel` | bool | True | Show object list panel |
| `depth_colormap` | str | "plasma" | Colormap: plasma, viridis, magma, jet |
| `output_fps` | float | 30.0 | Output video FPS |

### SP1VideoPipeline Methods

```python
# Process video file
stats = video_pipeline.process_video(
    input_path='input.mp4',
    output_path='output.mp4',
    max_frames=None,      # None = all frames
    display=True,         # Show live preview
    callback=None         # Optional: callback(FrameResult) -> bool
)

# Run on webcam
stats = video_pipeline.run_webcam(
    camera_id=0,
    output_path='webcam_output.mp4'  # Optional
)

# Generator for custom processing
for result in video_pipeline.generate_frames('video.mp4'):
    # result.annotated_frame - BGR frame with overlays
    # result.depth_colormap - Depth visualization
    # result.detections_3d - List of BoundingBox3D
    pass
```