# 01 - Data Exploration: PianoVAM Dataset

This notebook explores the PianoVAM dataset for piano fingering detection.

**Contents:**
- Load dataset from HuggingFace
- Visualize sample videos and frames
- Parse and visualize MIDI data
- Explore hand skeleton annotations
- Understand data quality and characteristics


In [None]:
# Install dependencies (run once in Colab)
# !pip install -q datasets huggingface_hub opencv-python mido matplotlib seaborn tqdm torchcodec

import os
import sys

# Setup for Colab
if 'google.colab' in str(get_ipython()):
    if not os.path.exists('computer-vision'):
        !git clone https://github.com/esnylmz/computer-vision.git
    os.chdir('computer-vision')
    !pip install -q -e .
    # Install torchcodec for audio decoding (required by HuggingFace datasets)
    !pip install -q torchcodec
else:
    # Local development
    sys.path.insert(0, '..')

# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from tqdm.notebook import tqdm

print("Setup complete!")


## 1. Load PianoVAM Dataset

Load the dataset from HuggingFace and explore its structure.


In [None]:
# Import project modules
from src.data.dataset import PianoVAMDataset
from src.data.midi_utils import MidiProcessor
from src.data.video_utils import VideoProcessor
from src.utils.config import load_config

# Load configuration
config = load_config('configs/default.yaml')
print(f"Project: {config.project_name} v{config.version}")

# =============================================================================
# PianoVAM Dataset - Split Information
# =============================================================================
# Total: 106 samples across 3 splits
#   - train: 73 samples
#   - validation: 19 samples (column value: 'valid')
#   - test: 14 samples
#
# Our loader filters by the 'split' column in the data.
# You can use: 'train', 'validation', 'valid', 'val', 'test'
# =============================================================================

print("\nLoading PianoVAM dataset splits...")
print("Using streaming=True for efficient loading")
print("Limiting to 20 samples per split for exploration...\n")

# Load train split
try:
    train_dataset = PianoVAMDataset(split='train', streaming=True, max_samples=20)
    print("Train dataset ready\n")
except Exception as e:
    print(f"Error loading train dataset: {e}\n")
    # Try non-streaming as fallback
    print("Trying non-streaming mode...")
    train_dataset = PianoVAMDataset(split='train', streaming=False, max_samples=20)
    print("Train dataset ready (non-streaming)\n")

# Load validation split (accepts 'validation', 'valid', or 'val')
val_dataset = PianoVAMDataset(split='validation', streaming=True, max_samples=20)
print("Validation dataset ready\n")

# Load test split
test_dataset = PianoVAMDataset(split='test', streaming=True, max_samples=20)
print("Test dataset ready")

print("\n" + "="*60)
print("All splits loaded successfully!")
print("="*60)


In [None]:
# Explore a sample (using iteration for streaming mode)
sample = next(iter(train_dataset))

print(f"Sample ID: {sample.id}")
print(f"Composer: {sample.metadata['composer']}")
print(f"Piece: {sample.metadata['piece']}")
print(f"Performer: {sample.metadata['performer']}")
print(f"Skill Level: {sample.metadata['skill_level']}")
print(f"Duration: {sample.metadata['duration']:.1f}s")
print(f"\nKeyboard Corners: {sample.metadata['keyboard_corners']}")
print(f"\nPaths:")
print(f"  Video: {sample.video_path[:80]}...")
print(f"  MIDI: {sample.midi_path[:80]}...")
print(f"  Skeleton: {sample.skeleton_path[:80]}...")
