In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision.models.video import r3d_18
from torchvision import transforms
import cv2
from PIL import Image
import os


In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

class PreloadedDatasetWithStrideRAM(Dataset):
    def __init__(self, root_dir, categories, sequence_length=16, stride=8, transform=None):
        """
        Args:
            root_dir (str): Root directory containing category folders.
            categories (list): List of category names.
            sequence_length (int): Number of consecutive frames in each sequence.
            stride (int): Step size for overlapping sequences.
            transform (callable): Transformation to apply to each frame.
        """
        self.data = []  # Store all sequences in RAM
        self.labels = []  # Corresponding labels for each sequence
        self.sequence_length = sequence_length
        self.transform = transform

        # Preload all frames into RAM
        frame_cache = {}
        for label, category in enumerate(categories):
            category_path = os.path.join(root_dir, category)
            frame_files = sorted([os.path.join(category_path, f) for f in os.listdir(category_path) if f.endswith(".png")])
            print(f"Preloading category: {category} ({len(frame_files)} frames)")

            # Load all frames for this category into RAM
            frames = [self.transform(Image.open(frame).convert("RGB")) for frame in frame_files]
            frame_cache[category] = frames

            # Create sequences with stride
            for i in range(0, len(frames) - sequence_length + 1, stride):
                self.data.append(frames[i:i + sequence_length])  # Add sequence of frames (already preloaded)
                self.labels.append(label)  # Add label for the sequence

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Retrieve the sequence and label
        sequence = self.data[idx]
        label = self.labels[idx]

        # Stack frames into a tensor of shape (C, T, H, W)
        return torch.stack(sequence, dim=1), label


In [3]:
# Define categories
categories = ["Abuse", "Arson", "Burglary", "Explosion", 
              "Fighting", "RoadAccidents", "Robbery", 
              "Shoplifting", "Stealing"]

# Transformation
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Load the dataset into RAM
train_dataset = PreloadedDatasetWithStrideRAM(
    root_dir="Train",
    categories=categories,
    sequence_length=16,
    stride=8,
    transform=transform
)

print(f"Total training sequences: {len(train_dataset)}")


Preloading category: Abuse (19076 frames)
Preloading category: Arson (24421 frames)
Preloading category: Burglary (39504 frames)
Preloading category: Explosion (18753 frames)
Preloading category: Fighting (24684 frames)
Preloading category: RoadAccidents (23486 frames)
Preloading category: Robbery (41493 frames)
Preloading category: Shoplifting (24835 frames)
Preloading category: Stealing (44802 frames)
Total training sequences: 32619


In [4]:
from torch.utils.data import DataLoader

# Define DataLoader parameters
batch_size = 32  # Adjust based on GPU memory

# Create the DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)  # No workers since data is in RAM

print("DataLoader created successfully.")


DataLoader created successfully.


In [None]:
import torch
import torch.nn as nn
from torchvision.models.video import r3d_18
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler

# Categories in your dataset
categories = ["Abuse", "Arson", "Burglary", "Explosion", 
              "Fighting", "RoadAccidents", "Robbery", 
              "Shoplifting", "Stealing"]
num_classes = len(categories)

# Load the pretrained ResNet3D model
model = r3d_18(pretrained=True)

# Modify the final fully connected layer to match the number of classes
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),  # Add dropout before the final layer
    nn.Linear(model.fc.in_features, num_classes)
)

# Move the model to GPU/CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


# Unfreeze layer2, layer3, layer4, and fc layers
for name, param in model.named_parameters():
    if "layer2" in name or "layer3" in name or "layer4" in name or "fc" in name:
        param.requires_grad = True  # Trainable
    else:
        param.requires_grad = False  # Frozen

# Print trainable layers to verify
print("Trainable layers:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)


# Class imbalance handling with CrossEntropyLoss
# Define weights based on the number of frames in each category
class_counts = [19076, 24421, 39504, 18753, 24684, 23486, 41493, 24835, 44802]  # Replace with your actual counts
class_weights = torch.tensor([sum(class_counts) / c for c in class_counts], dtype=torch.float).to(device)

# Use weighted CrossEntropyLoss
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Optimizer with weight decay
optimizer = optim.AdamW([
    {"params": model.layer2.parameters(), "lr": 0.0001},  # Lower LR for layer2
    {"params": model.layer3.parameters(), "lr": 0.0001},  # Lower LR for layer3
    {"params": model.layer4.parameters(), "lr": 0.0001},  # Lower LR for layer4
    {"params": model.fc.parameters(), "lr": 0.0005},      # Higher LR for fc
], weight_decay=1e-3)


# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Mixed Precision Training
scaler = GradScaler('cuda')

print("Model setup complete with dropout, class weights, and weight decay.")


Trainable layers:
layer3.0.conv1.0.weight
layer3.0.conv1.1.weight
layer3.0.conv1.1.bias
layer3.0.conv2.0.weight
layer3.0.conv2.1.weight
layer3.0.conv2.1.bias
layer3.0.downsample.0.weight
layer3.0.downsample.1.weight
layer3.0.downsample.1.bias
layer3.1.conv1.0.weight
layer3.1.conv1.1.weight
layer3.1.conv1.1.bias
layer3.1.conv2.0.weight
layer3.1.conv2.1.weight
layer3.1.conv2.1.bias
layer4.0.conv1.0.weight
layer4.0.conv1.1.weight
layer4.0.conv1.1.bias
layer4.0.conv2.0.weight
layer4.0.conv2.1.weight
layer4.0.conv2.1.bias
layer4.0.downsample.0.weight
layer4.0.downsample.1.weight
layer4.0.downsample.1.bias
layer4.1.conv1.0.weight
layer4.1.conv1.1.weight
layer4.1.conv1.1.bias
layer4.1.conv2.0.weight
layer4.1.conv2.1.weight
layer4.1.conv2.1.bias
fc.1.weight
fc.1.bias
Model setup complete with dropout, class weights, and weight decay.


  scaler = GradScaler('cuda')


In [12]:
from tqdm import tqdm
import os
from torch.cuda.amp import autocast, GradScaler

# Initialize GradScaler for mixed precision
scaler = GradScaler()

# Training configuration
num_epochs = 20
save_dir = "checkpoints"
os.makedirs(save_dir, exist_ok=True)  # Ensure save directory exists

for epoch in range(1, num_epochs + 1):
    print(f"Starting Epoch {epoch}/{num_epochs}")
    model.train()
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    # Training loop
    progress_bar = tqdm(train_loader, desc=f"Training Epoch {epoch}")
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        # Mixed precision forward pass
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        # Scaled backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # Update running metrics
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        train_correct += (preds == labels).sum().item()
        train_total += labels.size(0)

        # Update progress bar
        progress_bar.set_postfix(loss=loss.item())

    # Calculate epoch metrics
    train_loss = running_loss / len(train_loader)
    train_accuracy = train_correct / train_total
    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Train Accuracy = {train_accuracy:.4f}")

    # Save model checkpoint
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'epoch': epoch,
    }
    save_path = os.path.join(save_dir, f"resnet3d_checkpoint_epoch_{epoch}.pth")
    torch.save(checkpoint, save_path)
    print(f"Model checkpoint saved at {save_path}")

    # Step the scheduler
    scheduler.step()

print("Training complete.")


  scaler = GradScaler()


Starting Epoch 1/20


  with autocast():
Training Epoch 1: 100%|██████████| 1020/1020 [06:16<00:00,  2.71it/s, loss=0.00395]


Epoch 1: Train Loss = 0.0374, Train Accuracy = 0.9886
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_1.pth
Starting Epoch 2/20


Training Epoch 2: 100%|██████████| 1020/1020 [06:27<00:00,  2.63it/s, loss=0.00354] 


Epoch 2: Train Loss = 0.0188, Train Accuracy = 0.9945
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_2.pth
Starting Epoch 3/20


Training Epoch 3: 100%|██████████| 1020/1020 [06:31<00:00,  2.61it/s, loss=0.00643] 


Epoch 3: Train Loss = 0.0135, Train Accuracy = 0.9954
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_3.pth
Starting Epoch 4/20


Training Epoch 4: 100%|██████████| 1020/1020 [06:05<00:00,  2.79it/s, loss=0.00127] 


Epoch 4: Train Loss = 0.0245, Train Accuracy = 0.9929
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_4.pth
Starting Epoch 5/20


Training Epoch 5: 100%|██████████| 1020/1020 [06:41<00:00,  2.54it/s, loss=0.0167]  


Epoch 5: Train Loss = 0.0025, Train Accuracy = 0.9993
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_5.pth
Starting Epoch 6/20


Training Epoch 6: 100%|██████████| 1020/1020 [06:05<00:00,  2.79it/s, loss=0.000141]


Epoch 6: Train Loss = 0.0008, Train Accuracy = 0.9998
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_6.pth
Starting Epoch 7/20


Training Epoch 7: 100%|██████████| 1020/1020 [06:36<00:00,  2.57it/s, loss=6.04e-5] 


Epoch 7: Train Loss = 0.0004, Train Accuracy = 0.9998
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_7.pth
Starting Epoch 8/20


Training Epoch 8: 100%|██████████| 1020/1020 [06:05<00:00,  2.79it/s, loss=4.6e-5]  


Epoch 8: Train Loss = 0.0004, Train Accuracy = 0.9999
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_8.pth
Starting Epoch 9/20


Training Epoch 9: 100%|██████████| 1020/1020 [06:42<00:00,  2.53it/s, loss=0.0028]  


Epoch 9: Train Loss = 0.0004, Train Accuracy = 0.9998
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_9.pth
Starting Epoch 10/20


Training Epoch 10: 100%|██████████| 1020/1020 [06:22<00:00,  2.67it/s, loss=1.97e-5] 


Epoch 10: Train Loss = 0.0003, Train Accuracy = 0.9999
Model checkpoint saved at checkpoints\resnet3d_checkpoint_epoch_10.pth
Starting Epoch 11/20


Training Epoch 11:   3%|▎         | 27/1020 [00:08<05:17,  3.12it/s, loss=7.86e-5] 


KeyboardInterrupt: 

TESTLOADER AND MODEL EVALUATION

In [9]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import torch
import torchvision.transforms as transforms

class TestDataset(Dataset):
    def __init__(self, root_dir, categories, sequence_length=16, transform=None):
        """
        Args:
            root_dir (str): Root directory containing category folders.
            categories (list): List of category names (subfolder names).
            sequence_length (int): Number of consecutive frames in each sequence.
            transform (callable, optional): Transformations to apply to each frame.
        """
        self.data = []
        self.labels = []
        self.sequence_length = sequence_length
        self.transform = transform

        for label, category in enumerate(categories):
            category_path = os.path.join(root_dir, category)
            frame_files = sorted([os.path.join(category_path, f) for f in os.listdir(category_path) if f.endswith(".png")])
            print(f"Loading category: {category} ({len(frame_files)} frames)")

            # Create non-overlapping sequences
            for i in range(0, len(frame_files) - sequence_length + 1, sequence_length):
                sequence = frame_files[i:i + sequence_length]
                self.data.append(sequence)
                self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load a sequence of frames
        sequence = self.data[idx]
        label = self.labels[idx]

        # Apply transformations to each frame
        frames = [self.transform(Image.open(frame).convert("RGB")) for frame in sequence]
        return torch.stack(frames, dim=1), label  # Shape: (C, T, H, W)

# Define the categories in the test dataset
categories = ["Abuse", "Arson", "Burglary", "Explosion", 
              "Fighting", "RoadAccidents", "Robbery", 
              "Shoplifting", "Stealing"]

# Path to the test dataset
test_root = "Test"

# Define transformations for test data
test_transform = transforms.Compose([
    transforms.Resize((112, 112)),  # Resize to match the model input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize values
])

# Create the test dataset
test_dataset = TestDataset(root_dir=test_root, categories=categories, sequence_length=16, transform=test_transform)

# Create the DataLoader for testing
batch_size = 32  # Adjust as per your system's memory capacity
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

print(f"Test dataset contains {len(test_dataset)} sequences.")


Loading category: Abuse (297 frames)
Loading category: Arson (2793 frames)
Loading category: Burglary (7657 frames)
Loading category: Explosion (6510 frames)
Loading category: Fighting (1231 frames)
Loading category: RoadAccidents (2663 frames)
Loading category: Robbery (835 frames)
Loading category: Shoplifting (7623 frames)
Loading category: Stealing (1984 frames)
Test dataset contains 1970 sequences.


In [10]:
# Evaluate the model on the test dataset
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Mixed precision inference (optional)
        with autocast('cuda'):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        test_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        test_correct += (preds == labels).sum().item()
        test_total += labels.size(0)

# Calculate test metrics
test_loss /= len(test_loader)
test_accuracy = test_correct / test_total
print(f"Test Loss = {test_loss:.4f}, Test Accuracy = {test_accuracy:.4f}")


Test Loss = 3.4985, Test Accuracy = 0.2563


LOADING MODEL AND EVALUATION

In [13]:
import torch
from torchvision.models.video import r3d_18

# Define the categories (used to initialize the model)
categories = ["Abuse", "Arson", "Burglary", "Explosion", 
              "Fighting", "RoadAccidents", "Robbery", 
              "Shoplifting", "Stealing"]
num_classes = len(categories)

# Initialize the model
model = r3d_18(pretrained=False)  # Pretrained=False because we're loading weights
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(model.fc.in_features, num_classes)
)

# Move model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Load the checkpoint
checkpoint_path = "checkpoints/resnet3d_checkpoint_epoch_10.pth"  # Replace with your file
checkpoint = torch.load(checkpoint_path, map_location=device)

# Load model weights
model.load_state_dict(checkpoint['model_state_dict'])

# Set model to evaluation mode
model.eval()

print(f"Checkpoint loaded from {checkpoint_path}")


  checkpoint = torch.load(checkpoint_path, map_location=device)


Checkpoint loaded from checkpoints/resnet3d_checkpoint_epoch_10.pth


In [14]:
# Evaluate on the test dataset
model.eval()  # Set model to evaluation mode
test_loss = 0.0
test_correct = 0
test_total = 0

# Ensure no gradients are calculated during evaluation
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)  # Compute test loss
        test_loss += loss.item()

        # Compute accuracy
        _, preds = torch.max(outputs, 1)
        test_correct += (preds == labels).sum().item()
        test_total += labels.size(0)

# Calculate test metrics
test_loss /= len(test_loader)
test_accuracy = test_correct / test_total

print(f"Test Loss = {test_loss:.4f}, Test Accuracy = {test_accuracy:.4f}")


Test Loss = 4.9459, Test Accuracy = 0.1756


ANALYSIS OF DATA AND METRICS

In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report
import numpy as np

# Evaluate the model on test dataset
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        # Store predictions and true labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Compute metrics
precision = precision_score(all_labels, all_preds, average='weighted')
recall = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')
conf_matrix = confusion_matrix(all_labels, all_preds)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

# Classification report for detailed per-class metrics
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=categories))


Precision: 0.2978
Recall: 0.1756
F1-Score: 0.1659
Confusion Matrix:
[[  1   0   0   0   1   9   6   0   1]
 [ 52  48  30   0   2  30   6   0   6]
 [  7  13  48  45   1 155 112   0  97]
 [ 71  14  17  44  18 147  67   6  22]
 [  4   2   6   0  24   6  20  12   2]
 [  0   8   1   2  13 124   6   4   8]
 [  2   0   0   0   0  13  22   0  15]
 [ 14   1  17   2   2   5 382   1  52]
 [ 29   3   1   0   0  36  21   0  34]]

Classification Report:
               precision    recall  f1-score   support

        Abuse       0.01      0.06      0.01        18
        Arson       0.54      0.28      0.37       174
     Burglary       0.40      0.10      0.16       478
    Explosion       0.47      0.11      0.18       406
     Fighting       0.39      0.32      0.35        76
RoadAccidents       0.24      0.75      0.36       166
      Robbery       0.03      0.42      0.06        52
  Shoplifting       0.04      0.00      0.00       476
     Stealing       0.14      0.27      0.19       124

    