# Shot CLassification accuracy

For each of the videos in respective folders inside the dataset directory:
- backfoot_defence
- drive
- frontfoot_defence
- misc

Find the accuracy of attribute generation

In [1]:
import os
import pandas as pd
from tqdm import tqdm
from VLM.helpers import VideoProcessor
from VLM.get_attributes import GeminiVideoAttributeExtractor, OpenAIVideoAttributeExtractor

# Initialize processors
video_processor = VideoProcessor()
attribute_extractor = OpenAIVideoAttributeExtractor()

# Define dataset directories
shot_directories = [
    'backfoot_defence',
    'drive', 
    'frontfoot_defence',
    'misc'
]

def create_classification_results(base_dir, output_csv='shot_classification_results.csv'):
    # Create/overwrite the CSV file with headers
    pd.DataFrame(columns=['video_path', 'ground_truth_class', 'predicted_shot_class']).to_csv(output_csv, index=False)
    
    # Count total videos first
    total_videos = sum(
        len([f for f in os.listdir(os.path.join(base_dir, d)) 
             if f.endswith(('.mp4', '.avi', '.mov'))])
        for d in shot_directories
        if os.path.exists(os.path.join(base_dir, d))
    )
    
    with tqdm(total=total_videos, desc="Processing videos") as pbar:
        for shot_type in shot_directories:
            shot_dir = os.path.join(base_dir, shot_type)
            
            # Skip if directory doesn't exist
            if not os.path.exists(shot_dir):
                print(f"Directory not found: {shot_dir}")
                continue
                
            # Process each video in the directory
            video_files = [f for f in os.listdir(shot_dir) 
                         if f.endswith(('.mp4', '.avi', '.mov'))]
            
            for video_file in video_files:
                video_path = os.path.join(shot_dir, video_file)
                try:
                    # Process video and get attributes
                    frames = video_processor.process_video(video_path, n_frames=5)
                    attributes = attribute_extractor.get_attributes(frames)
                    
                    # Create result row
                    result = pd.DataFrame([{
                        'video_path': video_path,
                        'ground_truth_class': shot_type.lower(),
                        'predicted_shot_class': attributes.shot_type.lower()
                    }])
                    
                    # Append to CSV
                    result.to_csv(output_csv, mode='a', header=False, index=False)
                    
                except Exception as e:
                    print(f"\nError processing {video_file}: {str(e)}")
                    continue
                
                finally:
                    pbar.update(1)
    
    # Load and display summary at the end
    results_df = pd.read_csv(output_csv)
    print(f"\nResults saved to {output_csv}")
    print("\nFirst few entries:")
    print(results_df.head())
    
    # # Calculate and display accuracy
    # accuracy = (results_df['ground_truth_class'] == results_df['predicted_shot_class']).mean() * 100
    # print(f"\nOverall accuracy: {accuracy:.2f}%")

# Run the evaluation (replace with your dataset path)
dataset_path = "dataset"
create_classification_results(dataset_path)

Processing videos:   5%|▍         | 14/282 [01:20<25:47,  5.77s/it]


KeyboardInterrupt: 