# Crime Classification – CS 9548 Project
**Goal:** Exploring Machine Learning Techniques for Image Classification

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Download and Extract Dataset

In [2]:
!pip install kagglehub

import kagglehub
path = kagglehub.dataset_download("odins0n/ucf-crime-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/odins0n/ucf-crime-dataset?dataset_version_number=1...


100%|██████████| 11.0G/11.0G [08:39<00:00, 22.8MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1


## Create DataFrame of Images

In [8]:
import pathlib
import pandas as pd

# Get directories of train and test datasets
data_dir = pathlib.Path(path)
train_dir = data_dir / "Train"
test_dir = data_dir / "Test"

print("Data dir:", data_dir)
print(train_dir)
print(test_dir)

# Function to build dataframe
def build_image_df(root_dir):
    root_dir = pathlib.Path(root_dir)
    image_paths = list(root_dir.glob("*/*.png"))

    rows = []

    for p in image_paths:
        label = p.parent.name
        rows.append({"image": str(p), "label": label})

    return pd.DataFrame(rows)


# Build train and test dataframes
train = build_image_df(train_dir)
test = build_image_df(test_dir)

print(train['label'].value_counts())
print(test['label'].value_counts())

Data dir: /root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1
/root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1/Train
/root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1/Test
label
NormalVideos     947768
Stealing          44802
Robbery           41493
Burglary          39504
Arrest            26397
Shoplifting       24835
Fighting          24684
Arson             24421
RoadAccidents     23486
Abuse             19076
Explosion         18753
Vandalism         13626
Assault           10360
Shooting           7140
Name: count, dtype: int64
label
NormalVideos     64952
Burglary          7657
Shooting          7630
Shoplifting       7623
Explosion         6510
Arrest            3365
Arson             2793
RoadAccidents     2663
Assault           2657
Stealing          1984
Fighting          1231
Vandalism         1111
Robbery            835
Abuse              297
Name: count, dtype: int64


## Encode Label as Integer

In [9]:
from sklearn.preprocessing import LabelEncoder

# Create and fit label encoder on training labels
le = LabelEncoder()
train['label_idx'] = le.fit_transform(train['label'])

# Apply the same encoding to test labels
test['label_idx'] = le.transform(test['label'])

# Number of classes and mapping
num_classes = len(le.classes_)
print("Number of classes:", num_classes)
print("Class name vs label_idx:")
for index, cls in enumerate(le.classes_):
    print(index, cls)

Number of classes: 14
Class name vs label_idx:
0 Abuse
1 Arrest
2 Arson
3 Assault
4 Burglary
5 Explosion
6 Fighting
7 NormalVideos
8 RoadAccidents
9 Robbery
10 Shooting
11 Shoplifting
12 Stealing
13 Vandalism


We can see that the dataset has a large class imbalance. To mitigate this, we can do some under/oversampling, as well as data augmentation.

## Add Video Column ID

In [10]:
def extract_video_id(path_str):
    stem = pathlib.Path(path_str).stem

    # split off the last chunk
    video_id = stem.rsplit("_", 1)[0]

    return video_id

train['video_id'] = train['image'].apply(extract_video_id)
test['video_id']  = test['image'].apply(extract_video_id)

## Split Train Into Train and Val

In [11]:
from sklearn.model_selection import train_test_split
import numpy as np

unique_videos = train['video_id'].unique()

video_labels = (
    train.groupby('video_id')['label_idx'].agg(lambda x: np.bincount(x).argmax()).reindex(unique_videos)
)

# 80% train 20% val
train_vids, val_vids = train_test_split(
    unique_videos,
    test_size=0.20,
    random_state=42,
    stratify=video_labels
)

train_df_raw = train[train['video_id'].isin(train_vids)].reset_index(drop=True)
val = train[train['video_id'].isin(val_vids)].reset_index(drop=True)

print("\nNum train videos:", len(train_vids))
print("Num val videos:", len(val_vids))
print("Train frames:", len(train_df_raw))
print("Val frames:", len(val))
print("Test frames:", len(test))


Num train videos: 1288
Num val videos: 322
Train frames: 1062923
Val frames: 203422
Test frames: 111308


## Under/Oversampling, Data Augmentation

In [14]:
TARGET = 10000

def balance_dataset(df):
    balanced_parts = []

    for cls, group in df.groupby("label"):
        count = len(group)

        if count < TARGET:
            # Oversample to 10,000
            group = group.sample(
                n=TARGET,
                replace=True,
                random_state=42
            )
        else:
            # Undersample to 10,000
            group = group.sample(
                n=TARGET,
                replace=False,
                random_state=42
            )

        balanced_parts.append(group)

    return pd.concat(balanced_parts).sample(frac=1, random_state=42).reset_index(drop=True)

train = balance_dataset(train_df_raw)

In [15]:
print(train['label'].value_counts())

label
Burglary         10000
Vandalism        10000
Explosion        10000
Robbery          10000
Arson            10000
NormalVideos     10000
Abuse            10000
Stealing         10000
Assault          10000
Fighting         10000
RoadAccidents    10000
Shooting         10000
Shoplifting      10000
Arrest           10000
Name: count, dtype: int64


## Set up PyTorch

In [16]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Set constants for PyTorch
IMG_SIZE = (64, 64)
BATCH_SIZE = 32

## Data Augmentation

In [17]:
# Data augmentation to training dataset
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.9, 1.1)),
    transforms.ColorJitter(contrast=0.1),
    transforms.ToTensor()
])

# Simple transforms to val/test dataset
eval_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
])

## Create Pipeline For PyTorch

In [18]:
class CrimeDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row['image']
        label = int(row['label_idx'])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Transform if needed (might not need for val/test)
        if self.transform:
            image = self.transform(image)

        return image, label

## Create DataLoaders

In [20]:
# First convert to CrimeDataset classes
train_dataset = CrimeDataset(train, train_transform)
val_dataset = CrimeDataset(val, eval_transform)
test_dataset = CrimeDataset(test, eval_transform)

# Transform to DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [21]:
images, labels = next(iter(train_loader))
print("\nSample batch shapes -> images:", images.shape, "labels:", labels.shape)


Sample batch shapes -> images: torch.Size([32, 3, 64, 64]) labels: torch.Size([32])


## Create CNN Model

In [22]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.flatten_dim = 128 * 8 * 8

        self.fc1 = nn.Linear(self.flatten_dim, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        x = F.relu(self.conv3(x))
        x = self.pool3(x)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = Net(num_classes=num_classes).to(device)


Using device: cuda


## Training and Testing

In [23]:
import torch.optim as optim

def train(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [24]:
def test(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.inference_mode():
        for images, labels in loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
EPOCHS = 10

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = test(model, val_loader, criterion, device)

    print(
        f"Epoch {epoch}/{EPOCHS} | "
        f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}"
    )

Epoch 1/10 | Train Loss: 0.9290, Train Acc: 0.7133 | Val Loss: 2.5391, Val Acc: 0.2456
Epoch 2/10 | Train Loss: 0.3913, Train Acc: 0.8801 | Val Loss: 2.4473, Val Acc: 0.3659
Epoch 3/10 | Train Loss: 0.2841, Train Acc: 0.9130 | Val Loss: 2.6583, Val Acc: 0.4196
Epoch 4/10 | Train Loss: 0.2350, Train Acc: 0.9274 | Val Loss: 2.6409, Val Acc: 0.4719
Epoch 5/10 | Train Loss: 0.2037, Train Acc: 0.9378 | Val Loss: 2.7676, Val Acc: 0.4730
Epoch 6/10 | Train Loss: 0.1813, Train Acc: 0.9440 | Val Loss: 2.6939, Val Acc: 0.4472
Epoch 7/10 | Train Loss: 0.1656, Train Acc: 0.9484 | Val Loss: 2.8119, Val Acc: 0.4786
Epoch 8/10 | Train Loss: 0.1540, Train Acc: 0.9527 | Val Loss: 2.7903, Val Acc: 0.5236
Epoch 9/10 | Train Loss: 0.1388, Train Acc: 0.9569 | Val Loss: 3.0680, Val Acc: 0.5200
Epoch 10/10 | Train Loss: 0.1342, Train Acc: 0.9586 | Val Loss: 3.0186, Val Acc: 0.4774


In [26]:
test_loss, test_acc = test(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")


Test Loss: 4.4159, Test Acc: 0.4000
