# Crime Classification – CS 9548 Project
**Goal:** Exploring Machine Learning Techniques for Image Classification

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Download and Extract Dataset

In [2]:
!pip install kagglehub

import kagglehub
path = kagglehub.dataset_download("odins0n/ucf-crime-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/odins0n/ucf-crime-dataset?dataset_version_number=1...


100%|██████████| 11.0G/11.0G [04:52<00:00, 40.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1


## Create DataFrame of Images

In [3]:
import pathlib
import pandas as pd

# Get directories of train and test datasets
data_dir = pathlib.Path(path)
train_dir = data_dir / "Train"
test_dir = data_dir / "Test"

print("Data dir:", data_dir)
print(train_dir)
print(test_dir)

# Function to build dataframe
def build_image_df(root_dir):
    root_dir = pathlib.Path(root_dir)
    image_paths = list(root_dir.glob("*/*.png"))

    rows = []

    for p in image_paths:
        label = p.parent.name
        rows.append({"image": str(p), "label": label})

    return pd.DataFrame(rows)


# Build train and test dataframes
train_df = build_image_df(train_dir)
test_df = build_image_df(test_dir)

print(train_df['label'].value_counts())
print(test_df['label'].value_counts())

Data dir: /root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1
/root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1/Train
/root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1/Test
label
NormalVideos     947768
Stealing          44802
Robbery           41493
Burglary          39504
Arrest            26397
Shoplifting       24835
Fighting          24684
Arson             24421
RoadAccidents     23486
Abuse             19076
Explosion         18753
Vandalism         13626
Assault           10360
Shooting           7140
Name: count, dtype: int64
label
NormalVideos     64952
Burglary          7657
Shooting          7630
Shoplifting       7623
Explosion         6510
Arrest            3365
Arson             2793
RoadAccidents     2663
Assault           2657
Stealing          1984
Fighting          1231
Vandalism         1111
Robbery            835
Abuse              297
Name: count, dtype: int64


## Encode Label as Integer

In [4]:
from sklearn.preprocessing import LabelEncoder

# Create and fit label encoder on training labels
le = LabelEncoder()
train_df['label_idx'] = le.fit_transform(train_df['label'])

# Apply the same encoding to test labels
test_df['label_idx']  = le.transform(test_df['label'])

# Number of classes and mapping
num_classes = len(le.classes_)
print("Number of classes:", num_classes)
print("Class name vs label_idx:")
for index, cls in enumerate(le.classes_):
    print(index, cls)

Number of classes: 14
Class name vs label_idx:
0 Abuse
1 Arrest
2 Arson
3 Assault
4 Burglary
5 Explosion
6 Fighting
7 NormalVideos
8 RoadAccidents
9 Robbery
10 Shooting
11 Shoplifting
12 Stealing
13 Vandalism


We can see that the dataset has a large class imbalance. To mitigate this, we can do some under/oversampling, as well as data augmentation.

## Add Video Column ID

In [6]:
def extract_video_id(path_str):
    stem = pathlib.Path(path_str).stem

    # split off the last chunk
    video_id = stem.rsplit("_", 1)[0]

    return video_id

train_df['video_id'] = train_df['image'].apply(extract_video_id)
test_df['video_id']  = test_df['image'].apply(extract_video_id)

## Split Train Into Train and Val

In [7]:
from sklearn.model_selection import train_test_split
import numpy as np

unique_videos = train_df['video_id'].unique()

video_labels = (
    train_df.groupby('video_id')['label_idx'].agg(lambda x: np.bincount(x).argmax()).reindex(unique_videos)
)

# 80% train 20% val
train_vids, val_vids = train_test_split(
    unique_videos,
    test_size=0.20,
    random_state=42,
    stratify=video_labels
)

train_df_raw = train_df[train_df['video_id'].isin(train_vids)].reset_index(drop=True)
val_df = train_df[train_df['video_id'].isin(val_vids)].reset_index(drop=True)

print("\nNum train videos:", len(train_vids))
print("Num val videos:", len(val_vids))
print("Train frames:", len(train_df_raw))
print("Val frames:", len(val_df))
print("Test frames:", len(test_df))


Num train videos: 1288
Num val videos: 322
Train frames: 883856
Val frames: 382489
Test frames: 111308


# No Over/undersampling

In [8]:
train_df_used = train_df_raw

print("\nTrain label counts (unbalanced):")
print(train_df_used['label'].value_counts())
print("\nVal label counts:")
print(val_df['label'].value_counts())
print("\nTest label counts:")
print(test_df['label'].value_counts())



Train label counts (unbalanced):
label
NormalVideos     625905
Robbery           34936
Stealing          32803
Burglary          28297
Shoplifting       22300
Arrest            22250
Arson             21754
Fighting          20078
RoadAccidents     18275
Explosion         18146
Abuse             14442
Vandalism         10055
Assault            8419
Shooting           6196
Name: count, dtype: int64

Val label counts:
label
NormalVideos     321863
Stealing          11999
Burglary          11207
Robbery            6557
RoadAccidents      5211
Abuse              4634
Fighting           4606
Arrest             4147
Vandalism          3571
Arson              2667
Shoplifting        2535
Assault            1941
Shooting            944
Explosion           607
Name: count, dtype: int64

Test label counts:
label
NormalVideos     64952
Burglary          7657
Shooting          7630
Shoplifting       7623
Explosion         6510
Arrest            3365
Arson             2793
RoadAccidents     2663
A

In [9]:
# Compute class counts (sorted by label index)
class_counts = train_df_used['label_idx'].value_counts().sort_index().values
num_classes = len(class_counts)

# Inverse-frequency style weights
# weight_c = total_samples / (num_classes * count_c)

class_weights = class_counts.sum() / (num_classes * class_counts)
print("Class weights:", class_weights)

Class weights: [ 4.37145627  2.83741894  2.90211324  7.49882069  2.23106942  3.47914534
  3.14436555  0.10086606  3.45458667  1.80709215 10.18924652  2.83105701
  1.92459749  6.27872416]


## Set up PyTorch

In [10]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Set constants for PyTorch
IMG_SIZE = (64, 64)
BATCH_SIZE = 32

## Data Augmentation

In [11]:
# Data augmentation to training dataset
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.9, 1.1)),
    transforms.ColorJitter(contrast=0.1),
    transforms.ToTensor()
])

# Simple transforms to val/test dataset
eval_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
])

## Create Pipeline For PyTorch

In [12]:
class CrimeDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row['image']
        label = int(row['label_idx'])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Transform if needed (might not need for val/test)
        if self.transform:
            image = self.transform(image)

        return image, label

## Create DataLoaders

In [13]:
# First convert to CrimeDataset classes
train_dataset = CrimeDataset(train_df_used, train_transform)
val_dataset = CrimeDataset(val_df, eval_transform)
test_dataset = CrimeDataset(test_df, eval_transform)

# Transform to DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [14]:
images, labels = next(iter(train_loader))
print("\nSample batch shapes -> images:", images.shape, "labels:", labels.shape)


Sample batch shapes -> images: torch.Size([32, 3, 64, 64]) labels: torch.Size([32])


## Create CNN Model

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.flatten_dim = 128 * 8 * 8

        self.fc1 = nn.Linear(self.flatten_dim, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        x = F.relu(self.conv3(x))
        x = self.pool3(x)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)


model = Net(num_classes=num_classes).to(device)

# Move class weights to same device
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

# Class-weighted loss
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

# Add L2 regularization via weight_decay
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

Using device: cuda


## Training and Testing

In [17]:
import torch.optim as optim

def train(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [18]:
def test(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.inference_mode():
        for images, labels in loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [19]:
EPOCHS = 10

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = test(model, val_loader, criterion, device)

    print(
        f"Epoch {epoch}/{EPOCHS} | "
        f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}"
    )

Epoch 1/10 | Train Loss: 0.7112, Train Acc: 0.6996 | Val Loss: 1.6973, Val Acc: 0.5628
Epoch 2/10 | Train Loss: 0.3005, Train Acc: 0.8547 | Val Loss: 1.6444, Val Acc: 0.5499
Epoch 3/10 | Train Loss: 0.2643, Train Acc: 0.8698 | Val Loss: 1.7042, Val Acc: 0.5943
Epoch 4/10 | Train Loss: 0.2490, Train Acc: 0.8797 | Val Loss: 1.5557, Val Acc: 0.6285
Epoch 5/10 | Train Loss: 0.2429, Train Acc: 0.8872 | Val Loss: 1.4687, Val Acc: 0.6129
Epoch 6/10 | Train Loss: 0.2343, Train Acc: 0.8944 | Val Loss: 1.8157, Val Acc: 0.6669
Epoch 7/10 | Train Loss: 0.2313, Train Acc: 0.8990 | Val Loss: 1.9775, Val Acc: 0.5599
Epoch 8/10 | Train Loss: 0.2285, Train Acc: 0.9039 | Val Loss: 1.9020, Val Acc: 0.5519
Epoch 9/10 | Train Loss: 0.2255, Train Acc: 0.9059 | Val Loss: 1.3952, Val Acc: 0.6643
Epoch 10/10 | Train Loss: 0.2239, Train Acc: 0.9099 | Val Loss: 1.4501, Val Acc: 0.6530


In [20]:
test_loss, test_acc = test(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")


Test Loss: 3.1888, Test Acc: 0.4805
