# Simplified Test: 200 Balanced Samples with Unified Module

This notebook demonstrates the simplified API using the refactored modules.

In [1]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, '/shared_data0/weiqiuy/llm_cholec_organ/src')

from endopoint.datasets import build_dataset
from endopoint.fewshot import UnifiedFewShotSelector

print("✅ Modules imported")

✅ Modules imported


## Test Function Using New Pipeline Method

In [None]:
def test_dataset_simplified(dataset_name, dataset_config=None, force_regenerate=False):
    """Simplified test using the new pipeline method.
    
    All the balance analysis and configuration is now handled by the module.
    
    Args:
        dataset_name: Name of the dataset
        dataset_config: Optional dataset configuration
        force_regenerate: If True, delete cache and regenerate
    """
    print("\n" + "="*70)
    print(f"Testing: {dataset_name}")
    print("="*70)
    
    # Default configurations
    if dataset_config is None:
        configs = {
            "cholecseg8k_local": {
                "data_dir": "/shared_data0/weiqiuy/datasets/cholecseg8k"
            },
            "cholec_organs": {
                "data_dir": "/shared_data0/weiqiuy/real_drs/data/abdomen_exlib",
                "video_globs": "public",
                "gen_seed": 56,
                "train_val_seed": 0
            },
            "cholec_gonogo": {
                "data_dir": "/shared_data0/weiqiuy/real_drs/data/abdomen_exlib",
                "video_globs": "public",
                "gen_seed": 56,
                "train_val_seed": 0
            }
        }
        dataset_config = configs.get(dataset_name, {})
    
    # Load dataset
    dataset = build_dataset(dataset_name, **dataset_config)
    
    print(f"\n📊 Dataset Info:")
    print(f"  Tag: {dataset.dataset_tag}")
    print(f"  Train: {dataset.total('train')} examples")
    print(f"  Classes: {len(dataset.label_ids)}")
    
    # Create selector with 200 samples and 30% minimum quota
    output_dir = Path(f"/shared_data0/weiqiuy/llm_cholec_organ/data_info/{dataset_name}_balanced_200")
    
    # If force_regenerate, clear the cached balanced indices
    if force_regenerate:
        cache_file = output_dir / "balanced_test_indices_advanced_200.json"
        if cache_file.exists():
            print(f"⚠️ Removing old cache file: {cache_file.name}")
            cache_file.unlink()
            print("  Cache cleared - will regenerate with correct parameters")
    
    selector = UnifiedFewShotSelector(
        dataset=dataset,
        output_dir=output_dir,
        n_test_samples=200,  # 200 balanced samples
        n_pos_examples=1,
        n_neg_absent=1,
        n_neg_wrong=1,
        min_pixels=50,
        seed=42,
        cache_enabled=True
    )
    
    print(f"📁 Output directory: {output_dir}")
    print(f"\n🔧 Configuration:")
    print(f"  Test samples: 200")
    print(f"  Min quota for rare classes: 30% (60 samples)")
    print(f"  Cap for abundant classes: 70% of ideal distribution")
    
    # Run the complete pipeline with all analysis
    results = selector.run_balanced_selection_pipeline(
        split="train",
        visualize=True,  # Print detailed analysis
        save_summary=True  # Save summary to file
    )
    
    # Verify we got 200 samples
    actual_samples = len(results['test_indices'])
    if actual_samples != 200:
        print(f"\n⚠️ WARNING: Expected 200 samples but got {actual_samples}")
        print("  Consider running with force_regenerate=True to clear cache")
    
    # The pipeline already printed everything, just return results
    return results

print("✅ Simplified test function updated with 30% minimum quota (60 samples)")

## Test All Datasets

In [3]:
# Test CholecSeg8k - force regeneration to use correct 20% quota (40 samples)
results_cholecseg8k = test_dataset_simplified("cholecseg8k_local", force_regenerate=True)


Testing: cholecseg8k_local
Video-based split created:
  Total videos: 17
  Train: 13 videos, 5760 frames
  Validation: 1 videos, 560 frames
  Test: 3 videos, 1760 frames
  Train videos: video01, video09, video20, video52, video35, video48, video26, video43, video12, video27, video55, video18, video25
  Val videos: video28
  Test videos: video37, video17, video24

📊 Dataset Info:
  Tag: cholecseg8k_local
  Train: 5760 examples
  Classes: 12
⚠️ Removing old cache file: balanced_test_indices_advanced_200.json
  Cache cleared - will regenerate with correct parameters
📁 Output directory: /shared_data0/weiqiuy/llm_cholec_organ/data_info/cholecseg8k_local_balanced_200

🔧 Configuration:
  Test samples: 200
  Min quota for rare classes: 40% (80 samples)
  Cap for abundant classes: 70% of ideal distribution

🔄 Step 1: Computing presence matrix for train split...
Loaded cached presence matrix from presence_matrix_train.npy

📊 Class distribution in train set:
  Abdominal Wall                   50

In [4]:
# Test CholecOrgans - force regeneration to use correct 20% quota  
results_organs = test_dataset_simplified("cholec_organs", force_regenerate=True)


Testing: cholec_organs
Indexing training videos...


  Train videos: 100%|██████████| 96/96 [00:00<00:00, 555.34it/s]


Indexing test videos...


  Test videos: 100%|██████████| 25/25 [00:00<00:00, 538.57it/s]


Building examples list...


  Checking files: 100%|██████████| 1015/1015 [00:00<00:00, 3509.70it/s]


CholecOrgans dataset indexed:
  Total examples: 1015
  Train: 716 examples
  Validation: 80 examples
  Test: 219 examples

📊 Dataset Info:
  Tag: cholec_organs
  Train: 716 examples
  Classes: 3
⚠️ Removing old cache file: balanced_test_indices_advanced_200.json
  Cache cleared - will regenerate with correct parameters
📁 Output directory: /shared_data0/weiqiuy/llm_cholec_organ/data_info/cholec_organs_balanced_200

🔧 Configuration:
  Test samples: 200
  Min quota for rare classes: 40% (80 samples)
  Cap for abundant classes: 70% of ideal distribution

🔄 Step 1: Computing presence matrix for train split...
Loaded cached presence matrix from presence_matrix_train.npy

📊 Class distribution in train set:
  Liver                             715 samples ( 99.9%)
  Gallbladder                       713 samples ( 99.6%)
  Hepatocystic Triangle             711 samples ( 99.3%)

🔄 Step 2: Selecting 200 balanced test samples...
  Configuration:
    - Rare class boost: top 1 rarest classes
    - Mi

In [5]:
# Test CholecGoNoGo - force regeneration to use correct 20% quota
results_gonogo = test_dataset_simplified("cholec_gonogo", force_regenerate=True)


Testing: cholec_gonogo
Indexing training videos...


  Train videos: 100%|██████████| 96/96 [00:00<00:00, 577.35it/s]


Indexing test videos...


  Test videos: 100%|██████████| 25/25 [00:00<00:00, 567.19it/s]


Building examples list...


  Checking files: 100%|██████████| 1015/1015 [00:00<00:00, 6047.42it/s]


CholecGoNoGo dataset indexed:
  Total examples: 1015
  Train: 716 examples
  Validation: 80 examples
  Test: 219 examples

📊 Dataset Info:
  Tag: cholec_gonogo
  Train: 716 examples
  Classes: 2
⚠️ Removing old cache file: balanced_test_indices_advanced_200.json
  Cache cleared - will regenerate with correct parameters
📁 Output directory: /shared_data0/weiqiuy/llm_cholec_organ/data_info/cholec_gonogo_balanced_200

🔧 Configuration:
  Test samples: 200
  Min quota for rare classes: 40% (80 samples)
  Cap for abundant classes: 70% of ideal distribution

🔄 Step 1: Computing presence matrix for train split...
Loaded cached presence matrix from presence_matrix_train.npy

📊 Class distribution in train set:
  Go                                652 samples ( 91.1%)
  NoGo                              650 samples ( 90.8%)

🔄 Step 2: Selecting 200 balanced test samples...
  Configuration:
    - Rare class boost: top 1 rarest classes
    - Min quota for rare: 80 samples (40%)
    - Cap for abundant

## Summary Comparison

In [6]:
# Compare results across datasets
print("\n" + "="*70)
print("📊 Summary: 200 Balanced Test Samples per Dataset")
print("="*70)

all_results = [
    ("cholecseg8k_local", results_cholecseg8k),
    ("cholec_organs", results_organs),
    ("cholec_gonogo", results_gonogo)
]

print(f"\n{'Dataset':<20} {'Classes':<10} {'Train Size':<12} {'Test Size':<12} {'Balance Improvement'}")
print("-" * 80)

for dataset_name, res in all_results:
    metrics = res['balance_comparison']['metrics']
    improvement = metrics['balance_improvement_pct']
    
    # Color code based on improvement
    if improvement > 20:
        color = "🟢"
    elif improvement > 10:
        color = "🟡"
    elif improvement > 0:
        color = "🟠"
    else:
        color = "🔴"
    
    print(f"{dataset_name:<20} {len(res['test_indices']):<10} "
          f"{res['presence_matrix_shape'][0]:<12} {len(res['test_indices']):<12} "
          f"{color} {improvement:+6.1f}%")

print("\n✅ All datasets now have 200 balanced test samples!")
print("📁 Summaries saved in each dataset's output directory as 'pipeline_summary.json'")


📊 Summary: 200 Balanced Test Samples per Dataset

Dataset              Classes    Train Size   Test Size    Balance Improvement
--------------------------------------------------------------------------------
cholecseg8k_local    200        5760         200          🟢  +29.4%
cholec_organs        140        716          140          🟢 +100.0%
cholec_gonogo        151        716          151          🟢 +100.0%

✅ All datasets now have 200 balanced test samples!
📁 Summaries saved in each dataset's output directory as 'pipeline_summary.json'
