# Remove unavailable videos and change names (uncommented)

In [None]:
import json
import os
import cv2
from pathlib import Path

video_folder = "/home/haggenmueller/asl_detection/machine_learning/datasets/wlasl/raw_videos"
json_file = "/home/haggenmueller/asl_detection/machine_learning/datasets/wlasl/WLASL_v0.3.json"

with open(json_file, "r", encoding="utf-8") as f:
    data = json.load(f)

video_labels = {}
for entry in data:
    gloss = entry["gloss"]
    for instance in entry["instances"]:
        video_id = str(instance["video_id"])  
        if video_id not in video_labels:
            video_labels[video_id] = []
        video_labels[video_id].append(gloss)

video_folder = Path(video_folder)

if not video_folder.exists():
    raise FileNotFoundError(f"Der Ordner {video_folder} wurde nicht gefunden.")

video_files = list(video_folder.glob("*"))


deleted_videos = []
renamed_videos = []

def is_video_valid(video_path):
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        return False  
    ret, _ = cap.read()
    cap.release()
    return ret 

for video_file in video_files:
    if not video_file.is_file():
        continue

    video_id = video_file.stem 

    if os.path.getsize(video_file) == 0 or not is_video_valid(video_file):
        deleted_videos.append(video_file.name)
        os.remove(video_file)
        continue

    # labels = video_labels.get(video_id, [])
    # if labels:
    #     new_name = f"{video_id}_{'_'.join(labels)}{video_file.suffix}"
    #     new_path = video_file.parent / new_name
    #     os.rename(video_file, new_path)
    #     renamed_videos.append((video_file.name, new_name))

print("Gelöschte Videos:", deleted_videos)
print("Umbenannte Videos:", renamed_videos)


# Add frame count to video names

In [None]:
import cv2
from pathlib import Path

video_folder = "/home/haggenmueller/asl_detection/machine_learning/datasets/wlasl/raw_videos"

if not os.path.exists(video_folder):
    raise FileNotFoundError(f"Der Ordner {video_folder} wurde nicht gefunden.")

video_files = [f for f in os.listdir(video_folder) if os.path.isfile(os.path.join(video_folder, f))]

def get_frame_count(video_path):
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        return None 
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

renamed_videos = []

for video_file in video_files:
    video_path = os.path.join(video_folder, video_file)
    video_id, ext = os.path.splitext(video_file) 
    
    frame_count = get_frame_count(video_path)
    
    if frame_count is not None:
        new_name = f"{video_id}_{frame_count}{ext}"
        new_path = os.path.join(video_folder, new_name)
        os.rename(video_path, new_path)
        renamed_videos.append((video_file, new_name))

print("Umbenannte Videos mit Frames:", renamed_videos)

# Informations about videos

In [None]:
import os
from collections import Counter

video_folder = "/home/haggenmueller/asl_detection/machine_learning/datasets/wlasl/raw_videos"

if not os.path.exists(video_folder):
    raise FileNotFoundError(f"Der Ordner {video_folder} wurde nicht gefunden.")

video_files = [f for f in os.listdir(video_folder) if os.path.isfile(os.path.join(video_folder, f))]

label_counter = Counter()

for video_file in video_files:
    filename, _ = os.path.splitext(video_file) 
    parts = filename.split("_") 

    if len(parts) == 3:  
        label = parts[1]  
        label_counter[label] += 1

print(f"Anzahl der verschiedenen Labels: {len(label_counter)}")
print("Top 10 häufigste Labels:")
for label, count in label_counter.most_common(10):
    print(f"{label}: {count}")


In [None]:
# Groups per videos per label
from collections import Counter

label_distribution = Counter(label_counter.values())

print("Anzahl der Labels mit bestimmter Anzahl von Videos:")
for num_videos, num_labels in sorted(label_distribution.items()):
    print(f"{num_labels} Labels haben {num_videos} Videos")

# Delete videos with less than min_videos_per_label

In [None]:
import os
from collections import Counter

# Directory containing videos
video_folder = "/home/haggenmueller/asl_detection/machine_learning/datasets/wlasl/raw_videos"

# Retrieve list of all videos
video_files = [f for f in os.listdir(video_folder) if os.path.isfile(os.path.join(video_folder, f))]

# Extract and count labels
label_counter = Counter()

for video_file in video_files:
    filename, _ = os.path.splitext(video_file)  # Remove file extension
    parts = filename.split("_")  # Split by "_"
    
    if len(parts) >= 3:  # Ensure correct format
        label = parts[1]  # Middle element is the label
        label_counter[label] += 1

# Minimum number of videos per label
min_videos_per_label = 8

# Delete files with too few videos
deleted_count = 0

for video_file in video_files:
    filename, _ = os.path.splitext(video_file)
    parts = filename.split("_")

    if len(parts) >= 3:
        label = parts[1]
        if label_counter[label] < min_videos_per_label:
            os.remove(os.path.join(video_folder, video_file))
            deleted_count += 1

print(f"{deleted_count} videos were deleted because their label had fewer than {min_videos_per_label} videos.")

---> start here

# Count videos per label

In [None]:
import os
import glob
from collections import defaultdict
import pandas as pd

# Define the source directories
VIDEO_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/full_dataset"
AUGMENTED_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/augmented_dataset"

def get_video_stats(directories):
    """Retrieve the number of videos per label and the maximum number of frames across multiple directories."""
    video_counts = defaultdict(int)
    max_frames = 0
    
    for directory in directories:
        for file in glob.glob(os.path.join(directory, "*.mp4")):
            filename = os.path.basename(file)
            parts = filename.rsplit("_", 3)  # Split by underscores, accounting for augmentations
            
            if len(parts) >= 3:
                video_id, label, frames_ext = parts[:3]  # Extract ID, label, and frames
                frames = int(frames_ext.split(".")[0])  # Convert frames to integer
                video_counts[label] += 1
                max_frames = max(max_frames, frames)
    
    return video_counts, max_frames

# Get video statistics from both original and augmented datasets
video_counts, max_frames = get_video_stats([VIDEO_DIR, AUGMENTED_DIR])

# Print label and video count
for label, count in sorted(video_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{label}: {count}")

print(f"Maximum number of frames in any video: {max_frames}")

print(f"Maximum number of frames in any video: {max_frames}")

# Normalize frames

In [None]:
import os
import cv2
import torch
import numpy as np

# Directories
VIDEO_DIRS = [
    "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/full_dataset",
    "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/augmented_dataset"
]
PROCESSED_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/processed_npy"
os.makedirs(PROCESSED_DIR, exist_ok=True)

# Determine the maximum number of frames if not defined
MAX_FRAMES = 102  # Fixed length for all videos
FRAME_SIZE = (128, 128)  # Resize frames to this size

# Function to extract frames as Torch tensors
def extract_frames(video_path, device="cuda"):
    cap = cv2.VideoCapture(video_path)
    frames = []

    if not cap.isOpened():
        print(f"⚠️ Warning: Could not open video: {video_path}")
        return torch.zeros((1, 3, *FRAME_SIZE), dtype=torch.float32, device=device)  # Dummy frame
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, FRAME_SIZE)  # Resize frames
        frame = torch.tensor(frame, dtype=torch.float32, device=device).permute(2, 0, 1)  # [H, W, C] → [C, H, W]
        frames.append(frame)

    cap.release()

    if not frames:
        print(f"⚠️ Warning: No frames extracted for {video_path}")
        return torch.zeros((1, 3, *FRAME_SIZE), dtype=torch.float32, device=device)  # If empty, return dummy frame

    return torch.stack(frames, dim=0)

# Function for padding with GPU support
def pad_frames(frames, target_length, device="cuda"):
    num_frames = frames.shape[0]
    if num_frames < target_length:
        padding = torch.zeros((target_length - num_frames, 3, *FRAME_SIZE), dtype=torch.float32, device=device)
        return torch.cat((frames, padding), dim=0)
    else:
        return frames[:target_length]  # Trim if too long

# Process all videos (Original + Augmented)
device = "cuda" if torch.cuda.is_available() else "cpu"

for video_dir in VIDEO_DIRS:
    for video_file in os.listdir(video_dir):
        if not video_file.endswith(".mp4"):
            continue

        video_path = os.path.join(video_dir, video_file)

        try:
            # Extract frames and load to GPU
            frames = extract_frames(video_path, device=device)

            # Pad/Trim to `MAX_FRAMES`
            padded_frames = pad_frames(frames, MAX_FRAMES, device=device)

            # Save as `.npy` file (convert back to CPU)
            npy_path = os.path.join(PROCESSED_DIR, video_file.replace(".mp4", ".npy"))
            np.save(npy_path, padded_frames.cpu().numpy())

            print(f"✅ {video_file} processed and saved as {npy_path}")

        except Exception as e:
            print(f"❌ Error processing {video_file}: {e}")

print("🚀 GPU-based normalization completed!")