# Experiment 2 - Stage 1: Binary Crime Classification (OpenCV Data Ingestion)

## Overview
This notebook uses **Experiment 1's data ingestion approach** (direct MP4 loading with OpenCV) while keeping the original Experiment 2 model architecture.

### Changes from Original:
- ✅ **Data Ingestion**: MP4 videos loaded directly using OpenCV (instead of pre-extracted frames)
- ✅ **On-the-fly frame extraction**: No preprocessing required
- ⚠️ **Model**: Original simple 3D CNN (no attention mechanism)
- ⚠️ **Training**: Basic training loop (no gradient clipping or advanced features)

For advanced features (attention mechanism, gradient clipping), see: `experiment_2_enhanced.ipynb`

---

In [None]:
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import json
import shutil
import numpy as np
import cv2

In [None]:
class UCFCrimeBinaryDataset(Dataset):
    """
    Dataset that loads videos directly from MP4 files using OpenCV
    (matching Experiment 1's data ingestion approach)
    """
    def __init__(self, root_dir, clip_len=16, transform=None, frame_size=112):
        self.root_dir = root_dir
        self.clip_len = clip_len
        self.transform = transform
        self.frame_size = frame_size

        self.samples = []
        self._prepare_samples()

    def _prepare_samples(self):
        """Scan directory for MP4 video files"""
        for category in os.listdir(self.root_dir):
            category_path = os.path.join(self.root_dir, category)
            if not os.path.isdir(category_path):
                continue

            label = 0 if category.lower() == "normalvideos" else 1  # normal=0, crime=1
            
            # Look for .mp4 files (not directories)
            for file in os.listdir(category_path):
                if file.lower().endswith('.mp4'):
                    video_path = os.path.join(category_path, file)
                    self.samples.append((video_path, label))

    def _load_video_cv2(self, video_path):
        """Load video using OpenCV and extract frames"""
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            raise ValueError(f"Could not open video: {video_path}")

        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # Sample frames uniformly across the video
        frame_indices = np.linspace(0, total_frames - 1, self.clip_len, dtype=int)

        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if not ret:
                break
            # Convert BGR to RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Resize frame
            frame = cv2.resize(frame, (self.frame_size, self.frame_size))
            frames.append(frame)

        cap.release()

        if len(frames) == 0:
            raise ValueError(f"No frames extracted from: {video_path}")

        # Pad with last frame if needed
        while len(frames) < self.clip_len:
            frames.append(frames[-1])

        return frames

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        video_path, label = self.samples[idx]
        
        try:
            # Load frames using OpenCV
            frames = self._load_video_cv2(video_path)
            
            # Convert to tensors
            imgs = []
            for frame in frames:
                # Convert numpy array to PIL Image for transforms
                frame_pil = Image.fromarray(frame)
                if self.transform:
                    frame_tensor = self.transform(frame_pil)
                else:
                    frame_tensor = transforms.ToTensor()(frame_pil)
                imgs.append(frame_tensor)
            
            # Stack as (C, T, H, W)
            clip_tensor = torch.stack(imgs, dim=1)
            return clip_tensor, torch.tensor(label, dtype=torch.long), video_path
            
        except Exception as e:
            print(f"[LOAD FAILED] {video_path} | Error: {e}")
            # Return dummy tensor
            dummy = torch.zeros(3, self.clip_len, self.frame_size, self.frame_size)
            return dummy, torch.tensor(label, dtype=torch.long), video_path


In [None]:
class BinaryCrimeDetector(nn.Module):
    """Original simple 3D CNN for binary crime detection"""
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv3d(3, 32, (1,3,3), padding=(0,1,1)),
            nn.ReLU(inplace=True),
            nn.Conv3d(32, 32, (3,1,1), padding=(1,0,0)),
            nn.BatchNorm3d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool3d((1,2,2)),

            nn.Conv3d(32, 64, (1,3,3), padding=(0,1,1)),
            nn.ReLU(inplace=True),
            nn.Conv3d(64, 64, (3,1,1), padding=(1,0,0)),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool3d((2,2,2))
        )
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool3d(1),
            nn.Flatten(),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        return self.classifier(self.features(x))


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CLIP_LEN = 16
FRAME_SIZE = 112

# Transform pipeline (resize is now handled in dataset via OpenCV)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# UPDATE THESE PATHS TO YOUR VIDEO DIRECTORIES (should contain .mp4 files, not frame directories)
train_dir = r"C:\Users\rayaa\Downloads\ucf_crime_v2\Train"
test_dir  = r"C:\Users\rayaa\Downloads\ucf_crime_v2\Test"

train_data = UCFCrimeBinaryDataset(train_dir, clip_len=CLIP_LEN, transform=transform, frame_size=FRAME_SIZE)
test_data  = UCFCrimeBinaryDataset(test_dir, clip_len=CLIP_LEN, transform=transform, frame_size=FRAME_SIZE)

train_loader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=0)
test_loader  = DataLoader(test_data, batch_size=2, shuffle=False, num_workers=0)

print(f"Train clips: {len(train_data)}, Test clips: {len(test_data)}")
print(f"Using device: {DEVICE}")


Train clips: 1610, Test clips: 290


### Basic Training Loop
---


In [None]:
model = BinaryCrimeDetector().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

EPOCHS = 5
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for clips, labels, _ in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        clips, labels = clips.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(clips)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss / len(train_loader):.4f}")

# save checkpoint
output_dir = r"C:\Users\rayaa\Downloads\ucf_crime_v2\checkpoints"
os.makedirs(output_dir, exist_ok=True)
torch.save(model.state_dict(), os.path.join(output_dir, "binary_stage1_opencv_simple.pt"))
print(f"\n✓ Model saved to: {output_dir}/binary_stage1_opencv_simple.pt")


### Simple Inference
---


In [None]:
model.eval()
anomaly_dir = "./stage1_output/anomaly_clips"
os.makedirs(anomaly_dir, exist_ok=True)
anomaly_records = []

with torch.no_grad():
    for clips, labels, paths in tqdm(test_loader, desc="Stage 1 Inference"):
        clips = clips.to(DEVICE)
        outputs = model(clips)
        probs = torch.softmax(outputs, dim=1)
        preds = probs.argmax(dim=1).cpu().numpy()
        probs = probs[:,1].cpu().numpy()  # prob of 'crime'

        for i, pred in enumerate(preds):
            if pred == 1:
                anomaly_records.append({
                    "video_path": paths[i],
                    "confidence": float(probs[i])
                })

with open("./stage1_output/anomalies.json", "w") as f:
    json.dump(anomaly_records, f, indent=2)

print(f"Stage 1 complete — {len(anomaly_records)} crime clips detected")


Epoch 1/5:   0%|          | 0/403 [00:00<?, ?it/s]

Epoch 1/5: 100%|██████████| 403/403 [09:43<00:00,  1.45s/it]


Epoch 1 Loss: 0.6681


Epoch 2/5: 100%|██████████| 403/403 [03:52<00:00,  1.73it/s]


Epoch 2 Loss: 0.6361


Epoch 3/5: 100%|██████████| 403/403 [03:41<00:00,  1.82it/s]


Epoch 3 Loss: 0.6067


Epoch 4/5: 100%|██████████| 403/403 [03:32<00:00,  1.90it/s]


Epoch 4 Loss: 0.6061


Epoch 5/5: 100%|██████████| 403/403 [03:34<00:00,  1.88it/s]

Epoch 5 Loss: 0.5924



