In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/car-accident/pede_acci.mp4
/kaggle/input/car-accident/caps_van.mp4


In [2]:
import cv2
import torch
from PIL import Image
import numpy as np
from transformers import AutoImageProcessor, AutoModelForObjectDetection
from tqdm.notebook import tqdm
import torchvision.transforms as transforms
import os
from datetime import timedelta

def process_video_gpu(video_path, confidence_threshold=0.5, batch_size=4, output_dir='accident_frames'):
    """
    Process a video file for accident detection using GPU acceleration and save accident frames.
    
    Args:
        video_path (str): Path to the video file
        confidence_threshold (float): Minimum confidence score to consider a detection
        batch_size (int): Number of frames to process simultaneously
        output_dir (str): Directory to save detected accident frames
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Check GPU availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Initialize the model and processor
    processor = AutoImageProcessor.from_pretrained("hilmantm/detr-traffic-accident-detection")
    model = AutoModelForObjectDetection.from_pretrained("hilmantm/detr-traffic-accident-detection")
    model = model.to(device)
    model.eval()
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError("Error opening video file")
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Create video writer for output
    output_path = video_path.rsplit('.', 1)[0] + '_detected.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Create progress bar
    pbar = tqdm(total=total_frames, desc="Processing frames")
    
    detections_log = []
    frames_batch = []
    original_frames = []
    frame_indices = []  # Keep track of frame indices
    
    def process_batch(frames_batch, original_frames, frame_indices):
        # Prepare batch inputs
        inputs = processor(images=frames_batch, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Run inference
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Convert outputs to CPU for post-processing
        results = processor.post_process_object_detection(
            outputs, 
            threshold=confidence_threshold,
            target_sizes=[(height, width) for _ in range(len(frames_batch))]
        )
        
        # Process each frame's results
        for idx, (result, original_frame, frame_idx) in enumerate(zip(results, original_frames, frame_indices)):
            frame_detections = []
            accident_detected = False
            
            for score, label, box in zip(result["scores"], result["labels"], result["boxes"]):
                box = [int(i) for i in box.tolist()]
                label_name = model.config.id2label[label.item()]
                score_val = score.item()
                
                if score_val >= confidence_threshold:
                    frame_detections.append({
                        'label': label_name,
                        'score': score_val,
                        'box': {'xmin': box[0], 'ymin': box[1], 'xmax': box[2], 'ymax': box[3]}
                    })
                    
                    # Draw bounding box
                    cv2.rectangle(
                        original_frame,
                        (box[0], box[1]),
                        (box[2], box[3]),
                        (0, 255, 0),
                        2
                    )
                    
                    # Add label
                    label_text = f"{label_name}: {score_val:.2f}"
                    cv2.putText(
                        original_frame,
                        label_text,
                        (box[0], box[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 255, 0),
                        2
                    )
                    
                    # Check if this is an accident detection
                    if 'accident' in label_name.lower():
                        accident_detected = True
            
            if frame_detections:
                timestamp = frame_idx / fps
                time_str = str(timedelta(seconds=int(timestamp)))
                
                detections_log.append({
                    'frame': frame_idx,
                    'timestamp': timestamp,
                    'time_str': time_str,
                    'detections': frame_detections
                })
                
                # Save frame if accident is detected
                if accident_detected:
                    frame_filename = os.path.join(
                        output_dir, 
                        f'accident_frame_{frame_idx}_time_{time_str.replace(":", "-")}.jpg'
                    )
                    cv2.imwrite(frame_filename, original_frame)
            
            # Write the processed frame
            out.write(original_frame)
            pbar.update(1)
        
        return detections_log
    
    # Process video in batches
    current_frame_idx = 0
    while cap.isOpened():
        batch_count = 0
        frames_batch = []
        original_frames = []
        frame_indices = []
        
        while batch_count < batch_size:
            ret, frame = cap.read()
            if not ret:
                break
                
            # Convert frame to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames_batch.append(frame_rgb)
            original_frames.append(frame)
            frame_indices.append(current_frame_idx)
            
            current_frame_idx += 1
            batch_count += 1
        
        if not frames_batch:
            break
            
        detections_log = process_batch(frames_batch, original_frames, frame_indices)
    
    # Clean up
    cap.release()
    out.release()
    pbar.close()
    
    # Print summary
    print(f"\nProcessing complete. Output saved to {output_path}")
    print(f"Accident frames saved to directory: {output_dir}")
    print(f"\nDetection Summary:")
    accident_frames = [log for log in detections_log if any('accident' in det['label'].lower() for det in log['detections'])]
    print(f"Total frames with accidents detected: {len(accident_frames)}")
    
    if accident_frames:
        print("\nAccident detections:")
        for log in accident_frames:
            print(f"\nFrame {log['frame']} (Timestamp: {log['time_str']}):")
            for det in log['detections']:
                if 'accident' in det['label'].lower():
                    print(f"- {det['label']} (confidence: {det['score']:.2f})")
    
    return detections_log, accident_frames



In [4]:
# Example usage
if __name__ == "__main__":
    video_path = "/kaggle/input/car-accident/caps_van.mp4"
    detections, accident_frames = process_video_gpu(
        video_path, 
        confidence_threshold=0.975, 
        batch_size=4,
        output_dir='accident_frames'
    )

Using device: cuda


Processing frames:   0%|          | 0/188 [00:00<?, ?it/s]


Processing complete. Output saved to /kaggle/input/car-accident/pede_acci_detected.mp4
Accident frames saved to directory: accident_frames

Detection Summary:
Total frames with accidents detected: 1

Accident detections:

Frame 187 (Timestamp: 0:00:06):
- accident (confidence: 0.57)
- accident (confidence: 0.54)


In [2]:
import cv2
import torch
from PIL import Image
import numpy as np
from transformers import AutoImageProcessor, AutoModelForObjectDetection
from tqdm.notebook import tqdm
import torchvision.transforms as transforms
import os
from datetime import timedelta

def process_video_gpu(video_path, confidence_threshold=0.5, batch_size=4, output_dir='accident_frames'):
    """
    Process a video file for accident detection using GPU acceleration and save accident frames.
    Also saves cropped images of the first accident detection.
    
    Args:
        video_path (str): Path to the video file
        confidence_threshold (float): Minimum confidence score to consider a detection
        batch_size (int): Number of frames to process simultaneously
        output_dir (str): Directory to save detected accident frames
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Check GPU availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Initialize the model and processor
    processor = AutoImageProcessor.from_pretrained("hilmantm/detr-traffic-accident-detection")
    model = AutoModelForObjectDetection.from_pretrained("hilmantm/detr-traffic-accident-detection")
    model = model.to(device)
    model.eval()
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError("Error opening video file")
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Create video writer for output
    output_path = video_path.rsplit('.', 1)[0] + '_detected.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Create progress bar
    pbar = tqdm(total=total_frames, desc="Processing frames")
    
    detections_log = []
    frames_batch = []
    original_frames = []
    frame_indices = []
    first_accident_saved = False  # Flag to track if we've saved the first accident crop
    
    def process_batch(frames_batch, original_frames, frame_indices, first_accident_saved):
        # Prepare batch inputs
        inputs = processor(images=frames_batch, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Run inference
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Convert outputs to CPU for post-processing
        results = processor.post_process_object_detection(
            outputs, 
            threshold=confidence_threshold,
            target_sizes=[(height, width) for _ in range(len(frames_batch))]
        )
        
        # Process each frame's results
        for idx, (result, original_frame, frame_idx) in enumerate(zip(results, original_frames, frame_indices)):
            frame_detections = []
            accident_detected = False
            
            for score, label, box in zip(result["scores"], result["labels"], result["boxes"]):
                box = [int(i) for i in box.tolist()]
                label_name = model.config.id2label[label.item()]
                score_val = score.item()
                
                if score_val >= confidence_threshold:
                    frame_detections.append({
                        'label': label_name,
                        'score': score_val,
                        'box': {'xmin': box[0], 'ymin': box[1], 'xmax': box[2], 'ymax': box[3]}
                    })
                    
                    # Draw bounding box
                    cv2.rectangle(
                        original_frame,
                        (box[0], box[1]),
                        (box[2], box[3]),
                        (0, 255, 0),
                        2
                    )
                    
                    # Add label
                    label_text = f"{label_name}: {score_val:.2f}"
                    cv2.putText(
                        original_frame,
                        label_text,
                        (box[0], box[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 255, 0),
                        2
                    )
                    
                    # Check if this is an accident detection
                    if 'accident' in label_name.lower():
                        accident_detected = True
                        
                        # Save the first accident detection crop if we haven't already
                        if not first_accident_saved:
                            # Extract the region inside the bounding box
                            cropped_frame = original_frame[box[1]:box[3], box[0]:box[2]]
                            
                            # Save the cropped image
                            crop_filename = os.path.join(
                                output_dir,
                                f'croped_frame_{frame_idx}.jpg'
                            )
                            cv2.imwrite(crop_filename, cropped_frame)
                            first_accident_saved = True
                            print(f"\nCropped image saved to: {crop_filename}")
            
            if frame_detections:
                timestamp = frame_idx / fps
                time_str = str(timedelta(seconds=int(timestamp)))
                
                detections_log.append({
                    'frame': frame_idx,
                    'timestamp': timestamp,
                    'time_str': time_str,
                    'detections': frame_detections
                })
                
                # Save frame if accident is detected
                if accident_detected:
                    frame_filename = os.path.join(
                        output_dir, 
                        f'accident_frame_{frame_idx}_time_{time_str.replace(":", "-")}.jpg'
                    )
                    cv2.imwrite(frame_filename, original_frame)
            
            # Write the processed frame
            out.write(original_frame)
            pbar.update(1)
        
        return detections_log, first_accident_saved
    
    # Process video in batches
    current_frame_idx = 0
    while cap.isOpened():
        batch_count = 0
        frames_batch = []
        original_frames = []
        frame_indices = []
        
        while batch_count < batch_size:
            ret, frame = cap.read()
            if not ret:
                break
                
            # Convert frame to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames_batch.append(frame_rgb)
            original_frames.append(frame)
            frame_indices.append(current_frame_idx)
            
            current_frame_idx += 1
            batch_count += 1
        
        if not frames_batch:
            break
            
        detections_log, first_accident_saved = process_batch(frames_batch, original_frames, frame_indices, first_accident_saved)
    
    # Clean up
    cap.release()
    out.release()
    pbar.close()
    
    # Print summary
    print(f"\nProcessing complete. Output saved to {output_path}")
    print(f"Accident frames saved to directory: {output_dir}")
    print(f"\nDetection Summary:")
    accident_frames = [log for log in detections_log if any('accident' in det['label'].lower() for det in log['detections'])]
    print(f"Total frames with accidents detected: {len(accident_frames)}")
    
    if accident_frames:
        print("\nAccident detections:")
        for log in accident_frames:
            print(f"\nFrame {log['frame']} (Timestamp: {log['time_str']}):")
            for det in log['detections']:
                if 'accident' in det['label'].lower():
                    print(f"- {det['label']} (confidence: {det['score']:.2f})")
    
    return detections_log, accident_frames

# Example usage
if __name__ == "__main__":
    video_path = "/kaggle/input/car-accident/caps_van.mp4"
    detections, accident_frames = process_video_gpu(
        video_path, 
        confidence_threshold=0.975, 
        batch_size=4,
        output_dir='accident_frames'
    )

Using device: cuda


preprocessor_config.json:   0%|          | 0.00/274 [00:00<?, ?B/s]

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/166M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Processing frames:   0%|          | 0/813 [00:00<?, ?it/s]


Cropped image saved to: accident_frames/croped_frame_230.jpg

Processing complete. Output saved to /kaggle/input/car-accident/caps_van_detected.mp4
Accident frames saved to directory: accident_frames

Detection Summary:
Total frames with accidents detected: 10

Accident detections:

Frame 230 (Timestamp: 0:00:09):
- accident (confidence: 0.98)

Frame 231 (Timestamp: 0:00:09):
- accident (confidence: 0.98)

Frame 248 (Timestamp: 0:00:09):
- accident (confidence: 0.99)

Frame 249 (Timestamp: 0:00:09):
- accident (confidence: 0.99)

Frame 250 (Timestamp: 0:00:10):
- accident (confidence: 0.99)

Frame 251 (Timestamp: 0:00:10):
- accident (confidence: 0.99)

Frame 269 (Timestamp: 0:00:10):
- accident (confidence: 0.98)

Frame 270 (Timestamp: 0:00:10):
- accident (confidence: 0.98)

Frame 271 (Timestamp: 0:00:10):
- accident (confidence: 0.98)

Frame 272 (Timestamp: 0:00:10):
- accident (confidence: 0.98)
