# Rename videos

In [None]:
import os

def rename_mp4_files(directory, label):
    """
    Renames all .mp4 files in the specified directory to follow the naming scheme:
    <id>_<label>.mp4, where id is a 5-digit number starting from 00001.
    
    :param directory: The path to the directory containing .mp4 files.
    :param label: The label to append to each file name.
    """
    # List all .mp4 files in the directory
    mp4_files = [f for f in os.listdir(directory) if f.endswith(".mp4")]
    
    # Sort the files to maintain consistent numbering
    mp4_files.sort()
    
    # Rename each file
    for index, filename in enumerate(mp4_files, start=1):
        new_filename = f"{index:05d}_{label}.mp4"
        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_filename)
        
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} -> {new_filename}")
    
    print("Renaming complete.")

rename_mp4_files("/workspaces/asl_detection/machine_learning/datasets/own_dataset/own_words/on", "on")


In [None]:
import os
import cv2
from collections import defaultdict

def count_frames(video_path):
    """
    Counts the total number of frames in a given video file.
    
    :param video_path: Path to the video file.
    :return: Number of frames in the video.
    """
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total_frames

def rename_videos_with_frames(directory):
    """
    Renames all .mp4 files in the specified directory to follow the naming scheme:
    <video_id>_<label>_<frame_count>.mp4
    
    The video IDs are assigned sequentially per label, with labels sorted alphabetically.
    
    :param directory: The path to the directory containing .mp4 files.
    """
    # List all .mp4 files in the directory
    mp4_files = [f for f in os.listdir(directory) if f.endswith(".mp4")]
    
    # Extract labels and sort files by label alphabetically
    label_dict = defaultdict(list)
    for file in mp4_files:
        parts = file.split("_")
        if len(parts) < 2:
            continue
        label = "_".join(parts[1:]).replace(".mp4", "")
        label_dict[label].append(file)
    
    # Sort labels alphabetically and maintain order within labels
    sorted_labels = sorted(label_dict.keys())
    sorted_files = []
    for label in sorted_labels:
        label_dict[label].sort()
        sorted_files.extend([(file, label) for file in label_dict[label]])
    
    # Assign new IDs sequentially across all videos
    for index, (filename, label) in enumerate(sorted_files, start=1):
        old_path = os.path.join(directory, filename)
        frame_count = count_frames(old_path)
        new_filename = f"{index:05d}_{label}_{frame_count}.mp4"
        new_path = os.path.join(directory, new_filename)
        
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} -> {new_filename}")
    
    print("Renaming complete.")

rename_videos_with_frames("/workspaces/asl_detection/machine_learning/datasets/own_dataset/own_words")

# Get max frames and video count

In [None]:
import os
import cv2

def get_video_statistics(directory):
    """
    Prints the total number of .mp4 videos in the directory, the maximum frame count among them,
    and lists videos that have more than 102 frames.
    
    :param directory: The path to the directory containing .mp4 files.
    """
    mp4_files = [f for f in os.listdir(directory) if f.endswith(".mp4")]
    
    total_videos = len(mp4_files)
    max_frames = 0
    videos_above_102_frames = []
    
    for file in mp4_files:
        video_path = os.path.join(directory, file)
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        cap.release()
        
        max_frames = max(max_frames, frame_count)
        
        if frame_count > 102:
            videos_above_102_frames.append((file, frame_count))
    
    print(f"Total videos: {total_videos}")
    print(f"Maximum frames in a single video: {max_frames}")
    
    if videos_above_102_frames:
        print("Videos with more than 102 frames:")
        for video, frames in videos_above_102_frames:
            print(f"{video}: {frames} frames")
    else:
        print("No videos with more than 102 frames found.")
    
get_video_statistics("/workspaces/asl_detection/machine_learning/datasets/own_dataset/own_words")