# Crime Classification – CS 9548 Project
**Goal:** Exploring Machine Learning Techniques for Image Classification

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Download and Extract Dataset

In [None]:
!pip install kagglehub

import kagglehub
path = kagglehub.dataset_download("odins0n/ucf-crime-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/odins0n/ucf-crime-dataset?dataset_version_number=1...


100%|██████████| 11.0G/11.0G [08:24<00:00, 23.4MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1


## Create DataFrame of Images

In [30]:
import pathlib
import pandas as pd

# Get directories of train and test datasets
data_dir = pathlib.Path(path)
train_dir = data_dir / "Train"
test_dir = data_dir / "Test"

print("Data dir:", data_dir)
print(train_dir)
print(test_dir)

# Function to build dataframe
def build_image_df(root_dir):
    root_dir = pathlib.Path(root_dir)
    image_paths = list(root_dir.glob("*/*.png"))

    rows = []

    for p in image_paths:
        label = p.parent.name
        rows.append({"image": str(p), "label": label})

    return pd.DataFrame(rows)


# Build train and test dataframes
train_df = build_image_df(train_dir)
test_df = build_image_df(test_dir)

print(train_df['label'].value_counts())
print(test_df['label'].value_counts())

Data dir: /root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1
/root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1/Train
/root/.cache/kagglehub/datasets/odins0n/ucf-crime-dataset/versions/1/Test
label
NormalVideos     947768
Stealing          44802
Robbery           41493
Burglary          39504
Arrest            26397
Shoplifting       24835
Fighting          24684
Arson             24421
RoadAccidents     23486
Abuse             19076
Explosion         18753
Vandalism         13626
Assault           10360
Shooting           7140
Name: count, dtype: int64
label
NormalVideos     64952
Burglary          7657
Shooting          7630
Shoplifting       7623
Explosion         6510
Arrest            3365
Arson             2793
RoadAccidents     2663
Assault           2657
Stealing          1984
Fighting          1231
Vandalism         1111
Robbery            835
Abuse              297
Name: count, dtype: int64


## Encode Label as Integer

In [31]:
from sklearn.preprocessing import LabelEncoder

# Create and fit label encoder on training labels
le = LabelEncoder()
train_df['label_idx'] = le.fit_transform(train_df['label'])

# Apply the same encoding to test labels
test_df['label_idx']  = le.transform(test_df['label'])

# Number of classes and mapping
num_classes = len(le.classes_)
print("Number of classes:", num_classes)
print("Class name vs label_idx:")
for index, cls in enumerate(le.classes_):
    print(index, cls)

Number of classes: 14
Class name vs label_idx:
0 Abuse
1 Arrest
2 Arson
3 Assault
4 Burglary
5 Explosion
6 Fighting
7 NormalVideos
8 RoadAccidents
9 Robbery
10 Shooting
11 Shoplifting
12 Stealing
13 Vandalism


We can see that the dataset has a large class imbalance. To mitigate this, we can do some under/oversampling, as well as data augmentation.

## Add Video Column ID

In [32]:
def extract_video_id(path_str):
    stem = pathlib.Path(path_str).stem

    # split off the last chunk
    video_id = stem.rsplit("_", 1)[0]

    return video_id

train_df['video_id'] = train_df['image'].apply(extract_video_id)
test_df['video_id']  = test_df['image'].apply(extract_video_id)

## Split Train Into Train and Val

In [33]:
from sklearn.model_selection import train_test_split
import numpy as np

unique_videos = train_df['video_id'].unique()

video_labels = (
    train_df.groupby('video_id')['label_idx'].agg(lambda x: np.bincount(x).argmax()).reindex(unique_videos)
)

# 80% train 20% val
train_vids, val_vids = train_test_split(
    unique_videos,
    test_size=0.20,
    random_state=42,
    stratify=video_labels
)

train_df_raw = train_df[train_df['video_id'].isin(train_vids)].reset_index(drop=True)
val_df = train_df[train_df['video_id'].isin(val_vids)].reset_index(drop=True)

print("\nNum train videos:", len(train_vids))
print("Num val videos:", len(val_vids))
print("Train frames:", len(train_df_raw))
print("Val frames:", len(val_df))
print("Test frames:", len(test_df))


Num train videos: 1288
Num val videos: 322
Train frames: 1047139
Val frames: 219206
Test frames: 111308


# No Over/undersampling

In [34]:
train_df_used = train_df_raw

# Compute class counts (sorted by label index)
class_counts = train_df_used['label_idx'].value_counts().sort_index().values
num_classes = len(class_counts)

# Inverse-frequency style weights
# weight_c = total_samples / (num_classes * count_c)

class_weights = class_counts.sum() / (num_classes * class_counts)
print("Class weights:", class_weights)



Class weights: [ 5.12931305  3.38963305  7.25115297  8.35238893  2.28098084  4.14587012
  3.52976134  0.09373604  4.24396521  2.32587981 12.88025536  3.62610379
  2.11436446  7.73561308]


In [35]:
print(train_df_used['label'].value_counts())

label
NormalVideos     797939
Stealing          35375
Burglary          32791
Robbery           32158
Arrest            22066
Fighting          21190
Shoplifting       20627
Explosion         18041
RoadAccidents     17624
Abuse             14582
Arson             10315
Vandalism          9669
Assault            8955
Shooting           5807
Name: count, dtype: int64


## Set up PyTorch

In [36]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Set constants for PyTorch
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

## Data Augmentation

In [37]:
# Data augmentation to training dataset
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.9, 1.1)),
    transforms.ColorJitter(contrast=0.1),
    transforms.ToTensor()
])

# Simple transforms to val/test dataset
eval_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
])

## Create Pipeline For PyTorch

In [38]:
class CrimeDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row['image']
        label = int(row['label_idx'])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Transform if needed (might not need for val/test)
        if self.transform:
            image = self.transform(image)

        return image, label

## Create DataLoaders

In [39]:
# First convert to CrimeDataset classes
train_dataset = CrimeDataset(train_df_used, train_transform)
val_dataset = CrimeDataset(val_df, eval_transform)
test_dataset = CrimeDataset(test_df, eval_transform)

# Transform to DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

In [40]:
images, labels = next(iter(train_loader))
print("\nSample batch shapes -> images:", images.shape, "labels:", labels.shape)


Sample batch shapes -> images: torch.Size([32, 3, 224, 224]) labels: torch.Size([32])


## Create ResNet18 Model

In [41]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Move class weights to same device
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

# Class-weighted loss
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

# Add L2 regularization via weight_decay
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

Using device: cuda


## Training and Testing

In [42]:
import torch.optim as optim

def train(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [43]:
def test(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.inference_mode():
        for images, labels in loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [44]:
EPOCHS = 10

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = test(model, val_loader, criterion, device)

    print(
        f"Epoch {epoch}/{EPOCHS} | "
        f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}"
    )

Epoch 1/10 | Train Loss: 0.5465, Train Acc: 0.8152 | Val Loss: 2.8190, Val Acc: 0.3609
Epoch 2/10 | Train Loss: 0.1849, Train Acc: 0.9194 | Val Loss: 2.7386, Val Acc: 0.3583
Epoch 3/10 | Train Loss: 0.1518, Train Acc: 0.9314 | Val Loss: 2.3686, Val Acc: 0.5070
Epoch 4/10 | Train Loss: 0.1392, Train Acc: 0.9378 | Val Loss: 2.7792, Val Acc: 0.4467
Epoch 5/10 | Train Loss: 0.1307, Train Acc: 0.9408 | Val Loss: 2.4532, Val Acc: 0.4441
Epoch 6/10 | Train Loss: 0.1264, Train Acc: 0.9425 | Val Loss: 2.5276, Val Acc: 0.4784
Epoch 7/10 | Train Loss: 0.1218, Train Acc: 0.9451 | Val Loss: 2.6224, Val Acc: 0.4469
Epoch 8/10 | Train Loss: 0.1191, Train Acc: 0.9455 | Val Loss: 2.6254, Val Acc: 0.4016
Epoch 9/10 | Train Loss: 0.1155, Train Acc: 0.9467 | Val Loss: 2.6037, Val Acc: 0.4247
Epoch 10/10 | Train Loss: 0.1146, Train Acc: 0.9468 | Val Loss: 2.6309, Val Acc: 0.4016


In [45]:
test_loss, test_acc = test(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")


Test Loss: 3.2868, Test Acc: 0.4576


In [46]:
print('hi')

hi
