In [2]:
import cv2
import numpy as np
import torch

In [3]:
def extract_scenes_gpu_torch(video_path, threshold=0.5, resolution=(256, 256)):
    """
    Extract scene-based keyframes using PyTorch and GPU acceleration with improved GPU utilization
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # Read video
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Pre-load a larger number of frames to CPU memory first
    preload_batch = 32  # Increased from 8
    all_frames = []
    all_indices = []
    
    # Read frames into CPU memory
    for frame_idx in range(frame_count):
        ret, frame = cap.read()
        if not ret:
            break
            
        # Resize and normalize on CPU
        frame = cv2.resize(frame, resolution)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        all_frames.append(frame)
        all_indices.append(frame_idx)
    
    cap.release()
    
    # Now process on GPU in larger batches
    keyframes = []
    frame_indices = []
    prev_frame_tensor = None
    
    # Process in larger GPU batches
    batch_size = 64  # Significantly increased batch size
    
    for i in range(0, len(all_frames), batch_size):
        batch_frames = all_frames[i:i+batch_size]
        batch_indices = all_indices[i:i+batch_size]
        
        # Convert batch to tensor - this will be a larger tensor now
        batch_tensor = torch.tensor(np.array(batch_frames), 
                                  dtype=torch.float32, 
                                  device=device) / 255.0
        
        # Force GPU synchronization to ensure utilization is measured
        torch.cuda.synchronize()
        
        # First frame is always a keyframe
        if prev_frame_tensor is None:
            keyframes.append(batch_frames[0])
            frame_indices.append(batch_indices[0])
            prev_frame_tensor = batch_tensor[0].unsqueeze(0)
            continue
        
        # Add some more intensive operations to better utilize GPU
        # Calculate differences between all frames in batch and previous frame
        expanded_prev = prev_frame_tensor.expand(len(batch_frames), -1, -1, -1)
        
        # This operation should be more intensive for the GPU
        diffs = torch.norm(batch_tensor - expanded_prev, dim=(1,2,3))
        
        # Force synchronization again
        torch.cuda.synchronize()
        
        # Process results
        for j in range(len(batch_frames)):
            if diffs[j].item() > threshold:
                keyframes.append(batch_frames[j])
                frame_indices.append(batch_indices[j])
            
        # Update previous frame
        prev_frame_tensor = batch_tensor[-1].unsqueeze(0)
    
    return keyframes, frame_indices

In [4]:
extract_scenes_gpu_torch('E:/Multimedia/0/1430100-main-compressed.mp4')

RuntimeError: linalg.matrix_norm: dim must be a 2-tuple. Got 1 2 3

In [1]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"PyTorch CUDA version: {torch.version.cuda}")

CUDA available: True
PyTorch CUDA version: 12.6
