# Dataset Maker

- Pulled using the information in info/dataset.txt
- Zip directory structure may be different based on OS architecture
- Output is Train, Test, and Val split containing crime and normal mp4 videos

In [5]:
import os
import shutil
import random
from pathlib import Path

# ========== CONFIGURE THESE ==========
base_dir = Path(r"C:\Users\rayaa\Downloads\ucf_crime_dataset")
anomaly_parts = [base_dir / f"Anomaly-Videos-Part-{i}" for i in range(1, 5)]
normal_train_dir = base_dir / "Training-Normal-Videos-Part-1"
normal_test_dir = base_dir / "Testing_Normal_Videos/Testing_Normal_Videos_Anomaly"

# Output root
output_root = base_dir / "ucf_crime_v2"
train_root = output_root / "Train"
val_root = output_root / "Validation"
test_root = output_root / "Test"

# Create destination dirs
for split_root in [train_root, val_root, test_root]:
    split_root.mkdir(parents=True, exist_ok=True)

def move_videos(src_videos, dest_dir):
    """Moves all listed videos to dest_dir."""
    dest_dir.mkdir(parents=True, exist_ok=True)
    for vid in src_videos:
        shutil.copy(str(vid), dest_dir / vid.name)

# ========== STEP 1: Merge and split anomaly videos ==========
print("🔄 Merging anomaly video parts...")

# Collect all class subfolders from all anomaly parts
class_to_videos = {}

for part_dir in anomaly_parts:
    if not part_dir.exists():
        continue
    for crime_class in os.listdir(part_dir):
        class_path = part_dir / crime_class
        if not class_path.is_dir():
            continue
        class_to_videos.setdefault(crime_class, [])
        class_to_videos[crime_class].extend(class_path.glob("*.mp4"))

# Split and move each crime class
print("🚚 Splitting and moving anomaly classes...")
for crime_class, videos in class_to_videos.items():
    random.shuffle(videos)
    n_total = len(videos)
    n_train = int(0.8 * n_total)
    n_val = int(0.1 * n_total)
    
    train_videos = videos[:n_train]
    val_videos = videos[n_train:n_train + n_val]
    test_videos = videos[n_train + n_val:]
    
    move_videos(train_videos, train_root / crime_class)
    move_videos(val_videos, val_root / crime_class)
    move_videos(test_videos, test_root / crime_class)
    print(f"  ✅ {crime_class}: {len(train_videos)} train, {len(val_videos)} val, {len(test_videos)} test")

# ========== STEP 2: Split Normal Training videos ==========
print("🚚 Processing Training_Normal_Videos...")

normal_train_videos = list(normal_train_dir.glob("*.mp4"))
random.shuffle(normal_train_videos)
n_total = len(normal_train_videos)
n_train = int(0.9 * n_total)

train_videos = normal_train_videos[:n_train]
val_videos = normal_train_videos[n_train:]

move_videos(train_videos, train_root / "NormalVideos")
move_videos(val_videos, val_root / "NormalVideos")

print(f"  ✅ NormalVideos: {len(train_videos)} train, {len(val_videos)} val")

# ========== STEP 3: Move Testing_Normal_Videos ==========
print("🚚 Moving Testing_Normal_Videos...")
normal_test_videos = list(normal_test_dir.glob("*.mp4"))
move_videos(normal_test_videos, test_root / "NormalVideos")
print(f"  ✅ NormalVideos: {len(normal_test_videos)} test")

print("\n🏁 All done! Final dataset at:", output_root)


🚚 Moving Testing_Normal_Videos...
  ✅ NormalVideos: 150 test

🏁 All done! Final dataset at: C:\Users\rayaa\Downloads\ucf_crime_dataset\ucf_crime_v2
