# Get all frames per video

In [None]:
import os
import cv2
import numpy as np

# Define paths
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"

# Function to count frames
def count_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

# Store results
frame_counts = []

# Process the folder
for filename in os.listdir(raw_videos_path):
    filepath = os.path.join(raw_videos_path, filename)
    if filename.endswith(".mp4"):  # If videos are in MP4 format
        parts = filename.rsplit("_", 2)  # Expected format: <video_id>_<label>_<frames>.mp4
        if len(parts) == 3 and parts[2].replace(".mp4", "").isdigit():
            video_id, label, stored_frames = parts[0], parts[1], int(parts[2].replace(".mp4", ""))
            actual_frames = count_frames(filepath)
            
            # Rename file if the stored frame count is incorrect
            new_filename = f"{video_id}_{label}_{actual_frames}.mp4"
            new_filepath = os.path.join(raw_videos_path, new_filename)
            
            if filename != new_filename:
                os.rename(filepath, new_filepath)
                print(f"Renamed: {filename} -> {new_filename}")
            
            frame_counts.append((f"{video_id}_{label}", actual_frames))

# Sort by frame count (descending)
frame_counts.sort(key=lambda x: x[1], reverse=True)

# Output results
for video, frames in frame_counts:
    print(f"{video} : {frames}")

# Check video availabiliy

In [None]:
import os
import cv2
import numpy as np

# Define paths
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"

# List for unreadable videos
bad_videos = []

# Process the folder
for filename in os.listdir(raw_videos_path):
    filepath = os.path.join(raw_videos_path, filename)
    if filename.endswith(".mp4"):  # If videos are in MP4 format
        
        # Check if the file is too small
        if os.path.getsize(filepath) < 1000:  # Files under 1 KB are likely corrupted
            bad_videos.append(filename)
            continue
        
        cap = cv2.VideoCapture(filepath)
        if not cap.isOpened():
            bad_videos.append(filename)
        else:
            valid = False
            for _ in range(5):  # Try reading multiple frames
                ret, frame = cap.read()
                if ret and frame is not None:
                    valid = True
                    break
            if not valid:
                bad_videos.append(filename)
        cap.release()

# Output unreadable videos
if bad_videos:
    print("\nVideos that cannot be opened:")
    for bad_video in bad_videos:
        print(bad_video)
else:
    print("All videos are readable.")

# Count labels and videos per label

In [None]:
import os
from collections import defaultdict

# Define paths
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"

# Dictionary to store the number of videos per label
label_counts = defaultdict(int)

# Process the folder
for filename in os.listdir(raw_videos_path):
    if filename.endswith(".mp4"):  # If videos are in MP4 format
        parts = filename.split("_")  # Expected format: <video_id>_<label>_...
        if len(parts) > 1:
            label = parts[1]  # Assumption: The label is in the second position
            label_counts[label] += 1

# Output the number of labels and videos per label
print(f"Number of labels: {len(label_counts)}")
print("\nNumber of videos per label:")
for label, count in sorted(label_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{label}: {count}")

# Get videos with more or 90 frames

In [None]:
import os

# Define paths
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"

# List for videos with more than 90 frames
videos_above_90 = []

# Process the folder
for filename in os.listdir(raw_videos_path):
    if filename.endswith(".mp4"):  # If videos are in MP4 format
        parts = filename.rsplit("_", 1)  # Expected format: <video_id>_<label>_<frames>.mp4
        if len(parts) == 2 and parts[1].replace(".mp4", "").isdigit():
            frame_count = int(parts[1].replace(".mp4", ""))
            if frame_count > 90:
                videos_above_90.append(filename)

# Output videos with more than 90 frames
if videos_above_90:
    print(f"Number of videos with more than 90 frames: {len(videos_above_90)}")
    print("\nVideos with more than 90 frames:")
    for video in videos_above_90:
        print(video)
else:
    print("No videos with more than 90 frames found.")

# Get labels after deleting all videos with less than 90 frames

In [None]:
import os
from collections import defaultdict

# Define paths
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"

# Dictionary to store the number of videos per label
label_counts = defaultdict(int)

# List of videos with more than 90 frames
videos_above_90 = set()
for filename in os.listdir(raw_videos_path):
    if filename.endswith(".mp4"):  # If videos are in MP4 format
        parts = filename.rsplit("_", 1)  # Expected format: <video_id>_<label>_<frames>.mp4
        if len(parts) == 2 and parts[1].replace(".mp4", "").isdigit():
            frame_count = int(parts[1].replace(".mp4", ""))
            if frame_count > 90:
                videos_above_90.add(filename)

# Process the folder, excluding videos with more than 90 frames
for filename in os.listdir(raw_videos_path):
    if filename.endswith(".mp4") and filename not in videos_above_90:
        parts = filename.split("_")  # Expected format: <video_id>_<label>_...
        if len(parts) > 1:
            label = parts[1]  # Assumption: The label is in the second position
            label_counts[label] += 1

# Count the number of labels with fewer than 7 videos
labels_less_than_7 = sum(1 for count in label_counts.values() if count < 7)

# Output the number of these labels
print(labels_less_than_7)

# Shorten videos with more than 90 frames (without end)

In [None]:
import os
import subprocess
import cv2
import mediapipe as mp

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
os.makedirs(shortened_videos_path, exist_ok=True)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# Function to extract frame count from filename
def get_frame_count_from_filename(filename):
    parts = filename.rsplit("_", 1)  # Expected format: <video_id>_<label>_<frames>.mp4
    if len(parts) == 2 and parts[1].replace(".mp4", "").isdigit():
        return int(parts[1].replace(".mp4", ""))
    return 0

# Process all videos with more than 90 frames
for filename in os.listdir(raw_videos_path):
    if not filename.endswith(".mp4"):
        continue
    
    frame_count = get_frame_count_from_filename(filename)
    if frame_count <= 90:
        print(f"Skipped: {filename} (only {frame_count} frames)")
        continue
    
    input_video = os.path.join(raw_videos_path, filename)
    output_video = os.path.join(shortened_videos_path, filename)
    
    cap = cv2.VideoCapture(input_video)
    if not cap.isOpened():
        print(f"Error: Could not open video: {input_video}")
        continue
    
    hand_detected = False
    start_time = None
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Detect first frame with hands
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        
        if results.multi_hand_landmarks:
            hand_detected = True
            start_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000  # Convert to seconds
            break
    
    cap.release()
    
    if not hand_detected:
        print(f"❌ No hands detected in {filename}, skipping...")
        continue
    
    # Use FFmpeg to trim video from first detected hand frame
    ffmpeg_command = [
        "ffmpeg", "-y",
        "-i", input_video,
        "-ss", str(start_time),  # Start from detected hand frame
        "-c:v", "libx264", "-preset", "fast", "-crf", "18",
        "-pix_fmt", "yuv420p",
        output_video
    ]
    
    print(f"✂️ Trimming {filename} from {start_time} seconds...")
    process = subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    if process.returncode == 0:
        print(f"✅ Trimmed and saved: {output_video}")
    else:
        print(f"❌ Error processing {output_video}")
        print(process.stderr.decode())

print("Done! Videos have been trimmed based on first hand detection frame.")

# Shorten videos with more than 90 frames (with end)

In [None]:
import os
import subprocess
import cv2
import mediapipe as mp

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
os.makedirs(shortened_videos_path, exist_ok=True)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# Function to extract frame count from filename
def get_frame_count_from_filename(filename):
    parts = filename.rsplit("_", 1)  # Expected format: <video_id>_<label>_<frames>.mp4
    if len(parts) == 2 and parts[1].replace(".mp4", "").isdigit():
        return int(parts[1].replace(".mp4", ""))
    return 0

# Process all videos with more than 90 frames
for filename in os.listdir(raw_videos_path):
    if not filename.endswith(".mp4"):
        continue
    
    frame_count = get_frame_count_from_filename(filename)
    if frame_count <= 90:
        print(f"Skipped: {filename} (only {frame_count} frames)")
        continue
    
    input_video = os.path.join(raw_videos_path, filename)
    output_video = os.path.join(shortened_videos_path, filename)
    
    cap = cv2.VideoCapture(input_video)
    if not cap.isOpened():
        print(f"Error: Could not open video: {input_video}")
        continue
    
    hand_detected = False
    start_time = None
    end_time = None
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Detect first and last frame with hands
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000  # Convert to seconds
        
        if results.multi_hand_landmarks:
            if not hand_detected:
                start_time = current_time  # First frame where hands appear
                hand_detected = True
            end_time = current_time  # Last frame where hands were detected
    
    cap.release()
    
    if not hand_detected:
        print(f"❌ No hands detected in {filename}, skipping...")
        continue
    
    if end_time is None:
        end_time = start_time + (90 / fps)  # Default to 90 frames if end_time isn't found
    
    # Use FFmpeg to trim video from first detected hand frame to last detected hand frame
    ffmpeg_trim_command = [
        "ffmpeg", "-y",
        "-i", input_video,
        "-ss", str(start_time),  # Start from detected hand frame
        "-to", str(end_time),  # Stop when hands disappear
        "-c:v", "libx264", "-preset", "fast", "-crf", "18",
        "-pix_fmt", "yuv420p",
        output_video
    ]
    
    print(f"✂️ Trimming {filename} from {start_time} to {end_time} seconds...")
    process = subprocess.run(ffmpeg_trim_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    if process.returncode == 0:
        print(f"✅ Trimmed and saved: {output_video} (from {start_time} to {end_time})")
    else:
        print(f"❌ Error processing {output_video}")
        print(process.stderr.decode())

print("Done! Videos have been trimmed from first to last detected hand frame.")

# Update frames in naming for every shorten video

In [None]:
import os
import cv2
import numpy as np

# Define paths
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/own_dataset/shortened_videos"

# Function to count frames
def count_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

# Store results
frame_counts = []

# Process the folder
for filename in os.listdir(raw_videos_path):
    filepath = os.path.join(raw_videos_path, filename)
    if filename.endswith(".mp4"):  # If videos are in MP4 format
        parts = filename.rsplit("_", 2)  # Expected format: <video_id>_<label>_<frames>.mp4
        if len(parts) == 3 and parts[2].replace(".mp4", "").isdigit():
            video_id, label, stored_frames = parts[0], parts[1], int(parts[2].replace(".mp4", ""))
            actual_frames = count_frames(filepath)
            
            # Rename file if the stored frame count is incorrect
            new_filename = f"{video_id}_{label}_{actual_frames}.mp4"
            new_filepath = os.path.join(raw_videos_path, new_filename)
            
            if filename != new_filename:
                os.rename(filepath, new_filepath)
                print(f"Renamed: {filename} -> {new_filename}")
            
            frame_counts.append((f"{video_id}_{label}", actual_frames))

# Sort by frame count (descending)
frame_counts.sort(key=lambda x: x[1], reverse=True)

# Output results
for video, frames in frame_counts:
    print(f"{video} : {frames}")

# Get all duplicate videos

In [None]:
import os

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"

# Get video IDs from shortened_videos
shortened_video_ids = {file.split("_")[0] for file in os.listdir(shortened_videos_path) if file.endswith(".mp4")}

# Count duplicates in raw_videos
duplicate_count = sum(1 for file in os.listdir(raw_videos_path) if file.endswith(".mp4") and file.split("_")[0] in shortened_video_ids)

# Print result
print(f"📊 Number of duplicate videos in raw_videos: {duplicate_count}")

# Remove duplicates from raw_videos

In [None]:
import os
import shutil

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
unused_videos_path = f"{base_path}/wlasl/raw_videos_unused"
os.makedirs(unused_videos_path, exist_ok=True)

# Get video IDs from shortened_videos
shortened_video_ids = {file.split("_")[0] for file in os.listdir(shortened_videos_path) if file.endswith(".mp4")}

# Move duplicate videos to raw_videos_unused
duplicate_count = 0
for file in os.listdir(raw_videos_path):
    if file.endswith(".mp4") and file.split("_")[0] in shortened_video_ids:
        shutil.move(os.path.join(raw_videos_path, file), os.path.join(unused_videos_path, file))
        duplicate_count += 1

# Print result
print(f"📊 Moved {duplicate_count} duplicate videos from raw_videos to raw_videos_unused.")

--> start here

# Get video count per label and max frames

In [None]:
import os
import cv2
from collections import defaultdict

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
augmented_videos_path = f"{base_path}/own_dataset/videos_augmented"

# Count videos per label and determine maximum frames
label_counts = defaultdict(int)
label_max_frames = defaultdict(int)

def process_videos(folder, is_augmented=False):
    for video_file in os.listdir(folder):
        if video_file.endswith(".mp4"):
            parts = video_file.rsplit("_", 3) if is_augmented else video_file.rsplit("_", 2)
            if len(parts) >= 3:
                label = parts[1]  # The label is the second element
                video_path = os.path.join(folder, video_file)
                
                # Open video and count frames
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                cap.release()
                
                label_counts[label] += 1
                label_max_frames[label] = max(label_max_frames[label], frame_count)

# Count videos and find maximum frames in all three folders
process_videos(raw_videos_path)
process_videos(shortened_videos_path)
process_videos(augmented_videos_path, is_augmented=True)

# Display results
for label in sorted(label_counts.keys()):
    print(f"Label: {label}, Videos: {label_counts[label]}, Maximum Frames: {label_max_frames[label]}")

# Rename augmented vdieos

In [None]:
import os
import cv2
from collections import defaultdict

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
augmented_videos_path = f"{base_path}/own_dataset/videos_augmented"

# Count videos per label and determine maximum frames
label_counts = defaultdict(int)
label_max_frames = defaultdict(int)

def process_videos(folder, is_augmented=False):
    for video_file in os.listdir(folder):
        if video_file.endswith(".mp4"):
            parts = video_file.rsplit("_", 3) if is_augmented else video_file.rsplit("_", 2)
            if len(parts) >= 3:
                label = parts[1]  # The label is the second element
                video_path = os.path.join(folder, video_file)
                
                # Open video and count frames
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                cap.release()
                
                label_counts[label] += 1
                label_max_frames[label] = max(label_max_frames[label], frame_count)

# Count videos and find maximum frames in all three folders
process_videos(raw_videos_path)
process_videos(shortened_videos_path)
process_videos(augmented_videos_path, is_augmented=True)

# Verify and update filenames in augmented videos folder
for video_file in os.listdir(augmented_videos_path):
    if video_file.endswith(".mp4"):
        parts = video_file.rsplit("_", 3)  # <video_id>_<label>_<frames>_<change>.mp4
        if len(parts) == 4:
            video_id, label, old_frames, change = parts
            old_frames = old_frames.replace(".mp4", "")
            
            video_path = os.path.join(augmented_videos_path, video_file)
            cap = cv2.VideoCapture(video_path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()
            
            # Rename file if frame count is incorrect
            if str(frame_count) != old_frames:
                new_filename = f"{video_id}_{label}_{frame_count}_{change}.mp4"
                new_filepath = os.path.join(augmented_videos_path, new_filename)
                os.rename(video_path, new_filepath)
                print(f"Renamed: {video_file} -> {new_filename}")

# Display results
for label in sorted(label_counts.keys()):
    print(f"Label: {label}, Videos: {label_counts[label]}, Maximum Frames: {label_max_frames[label]}")

print("✅ All augmented videos have been verified and renamed if necessary.")

# Normalize frames

In [None]:
import os
import cv2
import torch
import numpy as np
from collections import defaultdict

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
augmented_videos_path = f"{base_path}/own_dataset/videos_augmented"
processed_folder = f"{base_path}/own_dataset/videos_processed"
os.makedirs(processed_folder, exist_ok=True)

# Count videos per label and determine maximum frames
label_counts = defaultdict(int)
label_max_frames = defaultdict(int)
max_frames = 0

def process_videos(folder, is_augmented=False):
    global max_frames
    for video_file in os.listdir(folder):
        if video_file.endswith(".mp4"):
            parts = video_file.rsplit("_", 3) if is_augmented else video_file.rsplit("_", 2)
            if len(parts) >= 3:
                label = parts[1]  # The label is the second element
                video_path = os.path.join(folder, video_file)
                
                # Open video and count frames
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                cap.release()
                
                label_counts[label] += 1
                label_max_frames[label] = max(label_max_frames[label], frame_count)
                max_frames = max(max_frames, frame_count)

# Count videos and find maximum frames in all three folders
process_videos(raw_videos_path)
process_videos(shortened_videos_path)
process_videos(augmented_videos_path, is_augmented=True)

print(f"📏 Maximum number of frames: {max_frames}")

# Function to extract frames as Torch tensors
def extract_frames(video_path, device="cuda"):
    cap = cv2.VideoCapture(video_path)
    frames = []

    if not cap.isOpened():
        print(f"⚠️ Warning: Could not open video: {video_path}")
        return torch.zeros((1, 3, 224, 224), dtype=torch.float32, device=device)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = torch.tensor(frame, dtype=torch.float32, device=device).permute(2, 0, 1)
        frames.append(frame)
    
    cap.release()
    
    if not frames:
        print(f"⚠️ Warning: No frames extracted for {video_path}")
        return torch.zeros((1, 3, 224, 224), dtype=torch.float32, device=device)
    
    return torch.stack(frames, dim=0)

# Function to pad frames
def pad_frames(frames, target_length, device="cuda"):
    num_frames = frames.shape[0]
    
    if num_frames < target_length:
        padding = torch.zeros((target_length - num_frames, 3, 224, 224), dtype=torch.float32, device=device)
        return torch.cat((frames, padding), dim=0)
    else:
        return frames[:target_length]

# Process videos
device = "cuda" if torch.cuda.is_available() else "cpu"

for folder in [shortened_videos_path, raw_videos_path, augmented_videos_path]:
    for video_file in os.listdir(folder):
        if not video_file.endswith(".mp4"):
            continue

        video_path = os.path.join(folder, video_file)
        
        try:
            # Extract frames
            frames = extract_frames(video_path, device=device)

            # Pad/Trim to `max_frames`
            padded_frames = pad_frames(frames, max_frames, device=device)

            # Save as `.npy` file
            npy_path = os.path.join(processed_folder, video_file.replace(".mp4", ".npy"))
            np.save(npy_path, padded_frames.cpu().numpy())

            print(f"✅ {video_file} processed and saved as {npy_path}")
        
        except Exception as e:
            print(f"❌ Error processing {video_file}: {e}")

print("🚀 Normalization completed!")