# OpenAI Badminton Analysis - Batch Processing

This notebook demonstrates batch processing of multiple badminton videos for shot classification.
It processes entire directories of pose data and generates comprehensive analysis reports.

In [None]:
#!pip install openai

from openai import OpenAI
import sys
import os
import csv
import json
from pathlib import Path
import time
from tqdm import tqdm

# Add the badminton package to the path
sys.path.insert(0, '/Users/chanakyd/work/vdark/badminton')

# Import prompt modules
import badminton.llm_analysis.bd_prompt as bd_prompt
import badminton.llm_analysis.shot_classification_prompt as scp

# Import VideoPoseDataset
from badminton.data.video_pose_dataset import VideoPoseDataset

# Import utility functions
from badminton.utilities.coco_keypoints import create_keypoints_dict

# Import analysis components
from badminton.data.pose_data_loader import PoseDataLoader
from badminton.features.pose_feature_extractor import PoseFeatureExtractor
from badminton.analysis.shot_descriptor import ShotDescriptor

## Setup OpenAI Client

Configure the OpenAI client for batch processing.

In [None]:
# OpenAI client setup (add your API key)
client = OpenAI(
    api_key="your-api-key-here"  # Replace with your actual API key
)

# Model configuration
model = "gpt-4"
max_tokens = 500

print(f"OpenAI client configured with model: {model}")

## Batch Processing Configuration

Set up directories and parameters for batch processing.

In [None]:
# Define data directories
data_root = Path("VB_DATA/poses")
output_dir = Path("output/batch_analysis")
output_dir.mkdir(parents=True, exist_ok=True)

# Get all shot type directories
shot_directories = [d for d in data_root.iterdir() if d.is_dir()]
print(f"Found {len(shot_directories)} shot type directories:")
for shot_dir in shot_directories:
    csv_files = list(shot_dir.glob("*.csv"))
    print(f"  {shot_dir.name}: {len(csv_files)} files")

## Processing Functions

Define functions for batch processing and analysis.

In [None]:
def process_single_file(csv_path, video_path=None):
    """Process a single pose file and generate shot description."""
    try:
        # Create VideoPoseDataset
        vpd = VideoPoseDataset(poses_path=str(csv_path), video_path=video_path)
        
        # Generate shot description for green player
        shot_description = vpd.get_shot_description_for_player(player='green')
        
        # Get analysis summary
        analysis = vpd.analyze_shot_pattern(player='green')
        
        return {
            'success': True,
            'description': shot_description,
            'analysis': analysis,
            'frame_count': len(vpd)
        }
    except Exception as e:
        return {
            'success': False,
            'error': str(e)
        }

def classify_with_openai(shot_description, file_info):
    """Classify shot using OpenAI API."""
    try:
        # Generate prompt
        prompt = scp.SC_BASE_PROMPT + scp.SC_INPUT_PROMPT + shot_description
        
        # Make API call
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
            max_tokens=max_tokens
        )
        
        return {
            'success': True,
            'classification': response.choices[0].message.content,
            'tokens_used': response.usage.total_tokens if hasattr(response, 'usage') else None
        }
    except Exception as e:
        return {
            'success': False,
            'error': str(e)
        }

print("Processing functions defined.")

## Batch Processing Execution

Process all files and generate classifications.

In [None]:
# Initialize results storage
all_results = []
processing_stats = {
    'total_files': 0,
    'successful_processing': 0,
    'successful_classification': 0,
    'total_tokens': 0,
    'errors': []
}

# Process each shot type directory
for shot_dir in tqdm(shot_directories, desc="Processing shot types"):
    shot_type = shot_dir.name
    csv_files = list(shot_dir.glob("*.csv"))
    
    print(f"\nProcessing {shot_type}: {len(csv_files)} files")
    
    for csv_file in tqdm(csv_files, desc=f"Files in {shot_type}", leave=False):
        processing_stats['total_files'] += 1
        
        # Find corresponding video file
        video_file = csv_file.with_suffix('.mp4')
        video_path = str(video_file) if video_file.exists() else None
        
        # Process pose data
        pose_result = process_single_file(csv_file, video_path)
        
        if not pose_result['success']:
            processing_stats['errors'].append({
                'file': str(csv_file),
                'stage': 'pose_processing',
                'error': pose_result['error']
            })
            continue
        
        processing_stats['successful_processing'] += 1
        
        # Classify with OpenAI (uncomment to enable API calls)
        # classification_result = classify_with_openai(
        #     pose_result['description'], 
        #     {'file': str(csv_file), 'shot_type': shot_type}
        # )
        
        # For demo purposes, create mock classification
        classification_result = {
            'success': True,
            'classification': f'Mock classification for {shot_type}',
            'tokens_used': 150
        }
        
        if classification_result['success']:
            processing_stats['successful_classification'] += 1
            if classification_result['tokens_used']:
                processing_stats['total_tokens'] += classification_result['tokens_used']
        else:
            processing_stats['errors'].append({
                'file': str(csv_file),
                'stage': 'classification',
                'error': classification_result['error']
            })
        
        # Store results
        result = {
            'file': str(csv_file),
            'shot_type': shot_type,
            'frame_count': pose_result.get('frame_count', 0),
            'pose_processing': pose_result['success'],
            'classification_success': classification_result['success'],
            'classification': classification_result.get('classification', ''),
            'tokens_used': classification_result.get('tokens_used', 0),
            'analysis_summary': pose_result.get('analysis', {})
        }
        all_results.append(result)
        
        # Add small delay to respect API rate limits
        time.sleep(0.1)

print(f"\nBatch processing complete!")
print(f"Total files processed: {processing_stats['total_files']}")
print(f"Successful pose processing: {processing_stats['successful_processing']}")
print(f"Successful classifications: {processing_stats['successful_classification']}")
print(f"Total tokens used: {processing_stats['total_tokens']}")
print(f"Errors encountered: {len(processing_stats['errors'])}")

## Results Analysis

Analyze the batch processing results and generate reports.

In [None]:
# Analyze results by shot type
shot_type_stats = {}
for result in all_results:
    shot_type = result['shot_type']
    if shot_type not in shot_type_stats:
        shot_type_stats[shot_type] = {
            'total_files': 0,
            'successful_classifications': 0,
            'total_frames': 0,
            'total_tokens': 0
        }
    
    stats = shot_type_stats[shot_type]
    stats['total_files'] += 1
    stats['total_frames'] += result['frame_count']
    stats['total_tokens'] += result['tokens_used']
    
    if result['classification_success']:
        stats['successful_classifications'] += 1

# Display statistics
print("\n=== BATCH PROCESSING STATISTICS ===")
print(f"{'Shot Type':<25} {'Files':<8} {'Success':<8} {'Frames':<8} {'Tokens':<8}")
print("-" * 65)

for shot_type, stats in shot_type_stats.items():
    success_rate = stats['successful_classifications'] / stats['total_files'] * 100
    print(f"{shot_type:<25} {stats['total_files']:<8} {success_rate:<7.1f}% {stats['total_frames']:<8} {stats['total_tokens']:<8}")

# Calculate overall statistics
total_files = sum(stats['total_files'] for stats in shot_type_stats.values())
total_success = sum(stats['successful_classifications'] for stats in shot_type_stats.values())
overall_success_rate = total_success / total_files * 100 if total_files > 0 else 0

print("-" * 65)
print(f"{'TOTAL':<25} {total_files:<8} {overall_success_rate:<7.1f}% {processing_stats['total_tokens']:<8}")

## Save Results

Save the batch processing results to files for further analysis.

In [None]:
# Save detailed results to JSON
results_file = output_dir / "batch_results.json"
with open(results_file, 'w') as f:
    json.dump({
        'processing_stats': processing_stats,
        'shot_type_stats': shot_type_stats,
        'detailed_results': all_results
    }, f, indent=2)

# Save summary CSV
summary_file = output_dir / "batch_summary.csv"
with open(summary_file, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['file', 'shot_type', 'frame_count', 'classification_success', 'tokens_used'])
    
    for result in all_results:
        writer.writerow([
            result['file'],
            result['shot_type'],
            result['frame_count'],
            result['classification_success'],
            result['tokens_used']
        ])

# Save error log if there are errors
if processing_stats['errors']:
    error_file = output_dir / "errors.json"
    with open(error_file, 'w') as f:
        json.dump(processing_stats['errors'], f, indent=2)
    print(f"\nErrors saved to: {error_file}")

print(f"\nResults saved to:")
print(f"  Detailed results: {results_file}")
print(f"  Summary CSV: {summary_file}")
print(f"\nBatch processing complete!")

## Usage Notes

### Configuration
- **API Key**: Add your OpenAI API key in the setup section
- **Model**: Configure the model (gpt-4, gpt-3.5-turbo, etc.)
- **Rate Limits**: Adjust the delay between API calls as needed

### Output Files
- **batch_results.json**: Complete results with all processing details
- **batch_summary.csv**: Summary table for easy analysis
- **errors.json**: Error log for debugging failed processing

### Customization
- Modify `process_single_file()` to extract different features
- Update `classify_with_openai()` to use different prompts
- Add additional analysis metrics as needed

### Performance
- Processing time depends on file count and API response times
- Monitor token usage to manage costs
- Use progress bars to track processing status