In [1]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F  # Import this for functional operations
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.cuda.amp import GradScaler, autocast


In [33]:
# **Dataset Class with Preloading**
class PreloadedDataset(Dataset):
    def __init__(self, root_dir, categories, sequence_length=8, transform=None):
        """
        Args:
            root_dir (str): Root directory containing category folders.
            categories (list): List of category names (subfolder names).
            sequence_length (int): Number of consecutive frames in each sequence.
            transform (callable, optional): Transform to apply to each frame.
        """
        self.data = []
        self.labels = []
        self.sequence_length = sequence_length
        self.transform = transform

        for label, category in enumerate(categories):
            category_path = os.path.join(root_dir, category)
            if not os.path.exists(category_path):
                print(f"Category folder does not exist: {category_path}")
                continue

            # Load all PNGs into memory
            print(f"Preloading category: {category}")
            frame_files = sorted([f for f in os.listdir(category_path) if f.endswith(".png")])
            for file in frame_files:
                img = Image.open(os.path.join(category_path, file)).convert("RGB")
                if self.transform:
                    img = self.transform(img)
                self.data.append(img)  # Add preprocessed image
                self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Create sequences on-the-fly from preloaded data
        start_idx = idx
        end_idx = start_idx + self.sequence_length

        # Handle edge cases by padding with zeros
        if end_idx > len(self.data):
            sequence = self.data[start_idx:] + [torch.zeros_like(self.data[0])] * (end_idx - len(self.data))
        else:
            sequence = self.data[start_idx:end_idx]

        # Stack into tensor of shape (C, T, H, W)
        sequence = torch.stack(sequence, dim=1)
        label = self.labels[idx]
        return sequence, label

In [35]:
# **Model Definition**
class Simple3DCNN(nn.Module):
    def __init__(self, num_classes):
        super(Simple3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool3d(kernel_size=2, stride=2)
        flattened_feature_size = 16 * 4 * 56 * 56 
        self.fc = nn.Linear(flattened_feature_size, num_classes)



    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x


In [10]:
# **Prepare Datasets and DataLoaders**
categories = ["Abuse", "Arson", "Assault", "Burglary", "Explosion", "Fighting",
              "NormalVideos", "RoadAccidents", "Robbery", "Shooting", "Shoplifting",
              "Stealing", "Vandalism"]

train_root = "Train"
test_root = "Test"

transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:

train_dataset = PreloadedDataset(root_dir=train_root, categories=categories, sequence_length=8, transform=transform)
test_dataset = PreloadedDataset(root_dir=test_root, categories=categories, sequence_length=8, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)  # Preloading, so no workers
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

In [None]:
# **Model Setup**
num_classes = len(categories)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Simple3DCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

scaler = GradScaler()  # Mixed precision scaler


  scaler = GradScaler()  # Mixed precision scaler


In [7]:
# **Training Loop**
num_epochs = 5
save_dir = "models"  # Folder to save models
os.makedirs(save_dir, exist_ok=True)

In [None]:
for epoch in range(num_epochs):
    print(f"Starting Epoch {epoch + 1}/{num_epochs}")
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}")

    for batch_idx, (inputs, labels) in enumerate(progress_bar):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        # Mixed precision training
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        # Backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        # Update progress bar
        progress_bar.set_postfix(loss=loss.item())

        # Debug every 10 batches
        if batch_idx % 10 == 0:
            print(f"Batch {batch_idx + 1}/{len(train_loader)}, Loss: {loss.item():.4f}")

    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs} completed with Average Loss: {epoch_loss:.4f}")

    # Save model checkpoint
    torch.save(model.state_dict(), os.path.join(save_dir, f"simple_3dcnn_epoch_{epoch + 1}.pth"))
    print(f"Model saved for Epoch {epoch + 1}")

    # Step scheduler
    scheduler.step()

print("Training complete.")

MODEL EVALUATION

In [9]:
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 13.01%


LOADING MODEL


In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define your model architecture (same as during training)
class Simple3DCNN(nn.Module):
    def __init__(self, num_classes, sequence_length=8):
        super(Simple3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool3d(kernel_size=2, stride=2)
       
        # Dynamically compute flattened feature size
        with torch.no_grad():
            dummy_input = torch.zeros(1, 3, sequence_length, 112, 112)  # (batch_size, channels, sequence_length, height, width)
            x = self.pool(torch.relu(self.conv1(dummy_input)))
            self.flattened_feature_size = x.numel()

        self.fc = nn.Linear(self.flattened_feature_size, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        #print(f"Shape before flattening: {x.shape}")  # Debug
        x = x.view(x.size(0), -1)  # Flatten
        #print(f"Shape after flattening: {x.shape}")  # Debug
        x = self.fc(x)
        return x




# Define categories and device
categories = ["Abuse", "Arson", "Assault", "Burglary", "Explosion", "Fighting",
              "NormalVideos", "RoadAccidents", "Robbery", "Shooting", "Shoplifting",
              "Stealing", "Vandalism"]
num_classes = len(categories)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model
model = Simple3DCNN(num_classes=num_classes).to(device)

# Path to the saved model
saved_model_path = "model_final_simple\simple_3dcnn_epoch_5.pth"  # Replace with your actual saved file name

# Load the saved state dictionary
model.load_state_dict(torch.load(saved_model_path))

# Set the model to evaluation mode
model.eval()

print("Model loaded successfully and set to evaluation mode.")


Model loaded successfully and set to evaluation mode.


  saved_model_path = "model_final_simple\simple_3dcnn_epoch_5.pth"  # Replace with your actual saved file name
  model.load_state_dict(torch.load(saved_model_path))


VIDEO PROCESSING PIPELINE

In [10]:
import os
import cv2
import torch
import torchvision.transforms as transforms
from PIL import Image
import torch.nn.functional as F

In [11]:
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [12]:
# Function to extract frames from a video
def extract_frames(video_path, sequence_length=8):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        frame = transform(frame)
        frames.append(frame)
    cap.release()

    # Split frames into sequences
    frame_sequences = []
    for i in range(0, len(frames) - sequence_length + 1, sequence_length):
        frame_sequences.append(torch.stack(frames[i:i + sequence_length], dim=1))  # Shape: (C, T, H, W)

    return frame_sequences


In [13]:
# Load the trained model
model = Simple3DCNN(num_classes=13)
model.load_state_dict(torch.load("model_final_simple\simple_3dcnn_epoch_5.pth"))  # Update with your saved model path
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


  model.load_state_dict(torch.load("model_final_simple\simple_3dcnn_epoch_5.pth"))  # Update with your saved model path
  model.load_state_dict(torch.load("model_final_simple\simple_3dcnn_epoch_5.pth"))  # Update with your saved model path


In [14]:
# Prediction function
def predict_video(video_path):
    frame_sequences = extract_frames(video_path, sequence_length=8)
    predictions = []
    with torch.no_grad():
        for frame_sequence in frame_sequences:
            frame_sequence = frame_sequence.unsqueeze(0).to(device)  # Add batch dimension
            outputs = model(frame_sequence)
            _, predicted = torch.max(outputs, 1)
            predictions.append(predicted.item())
    # Return the most common predicted class
    return max(set(predictions), key=predictions.count)

In [15]:
# Run prediction
video_path = "output_folder_video\RoadAccidents001_x264.avi"  # Replace with your video path
predicted_class = predict_video(video_path)
print(f"Predicted Class: {predicted_class}")

Predicted Class: 11


  video_path = "output_folder_video\RoadAccidents001_x264.avi"  # Replace with your video path


In [16]:
category_mapping = ["Abuse", "Arson", "Assault", "Burglary", "Explosion",
                    "Fighting", "NormalVideos", "RoadAccidents", "Robbery",
                    "Shooting", "Shoplifting", "Stealing", "Vandalism"]
print(f"Predicted Category: {category_mapping[predicted_class]}")


Predicted Category: Stealing
