In [1]:
import os
import cv2 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm 
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
DATASET_PATH = "/kaggle/input/anti-theft-dataset/Shop DataSet"
CLASSES = ["non shop lifters", "shop lifters"]
LABEL_MAP = {label: i for i, label in enumerate(CLASSES)}


video_data = []
for class_name in CLASSES:
    class_dir = os.path.join(DATASET_PATH, class_name)
    if not os.path.isdir(class_dir):
        print(f"Warning: Directory not found at {class_dir}")
        continue
    for video_file in os.listdir(class_dir):
        video_path = os.path.join(class_dir, video_file)
        video_data.append({'path': video_path, 'label': LABEL_MAP[class_name]})


df = pd.DataFrame(video_data)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)


train_df, test_df = train_test_split(
    df, test_size=0.20, random_state=42, stratify=df['label']
)

print(f"Total videos: {len(df)}")
print(f"Training set size: {len(train_df)}")
print(f"Test set size: {len(test_df)}")
df.head()

Total videos: 855
Training set size: 684
Test set size: 171


Unnamed: 0,path,label
0,/kaggle/input/anti-theft-dataset/Shop DataSet/...,0
1,/kaggle/input/anti-theft-dataset/Shop DataSet/...,0
2,/kaggle/input/anti-theft-dataset/Shop DataSet/...,0
3,/kaggle/input/anti-theft-dataset/Shop DataSet/...,0
4,/kaggle/input/anti-theft-dataset/Shop DataSet/...,1


In [3]:
IMG_SIZE = 128
MAX_FRAMES = 20 

train_transform = transforms.Compose([
    transforms.ToTensor(),
    # Augmentations are applied here
    transforms.RandomHorizontalFlip(p=0.5), # Randomly flip the video horizontally
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    # Resize and Normalize are applied last
    transforms.Resize((IMG_SIZE, IMG_SIZE), antialias=True),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# For validation/testing, we only resize and normalize, no augmentation
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), antialias=True),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def extract_frames(video_path, transform):
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, MAX_FRAMES, dtype=int)

    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(transform(frame))
        else:
            frames.append(torch.zeros((3, IMG_SIZE, IMG_SIZE)))

    cap.release()
    return torch.stack(frames)

In [4]:
class VideoDataset(Dataset):
    # Accept a transform
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        video_path = self.df.iloc[idx]['path']
        label = self.df.iloc[idx]['label']
        
        # Use the passed transform
        frames = extract_frames(video_path, self.transform) # Shape: (Time, Channels, H, W)
        
        # --- THIS IS THE FIX ---
        # Permute the dimensions to (Channels, Time, H, W) for the 3D CNN
        frames = frames.permute(1, 0, 2, 3) 
        
        return frames, torch.tensor(label, dtype=torch.float32)

# Create Datasets with the correct transforms
train_dataset = VideoDataset(train_df, transform=train_transform)
test_dataset = VideoDataset(test_df, transform=test_transform)

# Create DataLoader objects (with num_workers for performance)
BATCH_SIZE = 8
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

In [5]:
class CNN3D(nn.Module):
    def __init__(self, num_classes=1):
        super(CNN3D, self).__init__()
        
        self.conv_layer1 = nn.Sequential(
            nn.Conv3d(3, 16, kernel_size=(3, 3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool3d((1, 2, 2)) # Reduce H, W but not Time
        )
        self.conv_layer2 = nn.Sequential(
            nn.Conv3d(16, 32, kernel_size=(3, 3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)) # Reduce T, H, W
        )
        self.conv_layer3 = nn.Sequential(
            nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)) # Reduce T, H, W
        )
        
        # Adaptive pooling to handle any remaining dimensions
        self.adaptive_pool = nn.AdaptiveAvgPool3d(1)
        
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes)
        )

    def forward(self, x):
        # x shape: (batch_size, C, time_steps, H, W)
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = self.conv_layer3(out)
        
        out = self.adaptive_pool(out)
        
        # Flatten the features for the fully connected layer
        out = out.view(out.size(0), -1) 
        
        out = self.fc(out)
        return out

# Instantiate the model and move it to the device
model = CNN3D().to(device)
print("Model created: 3D CNN (From Scratch)")
print(model)

Model created: 3D CNN (From Scratch)
CNN3D(
  (conv_layer1): Sequential(
    (0): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU()
    (2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (conv_layer2): Sequential(
    (0): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU()
    (2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (conv_layer3): Sequential(
    (0): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): ReLU()
    (2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (adaptive_pool): AdaptiveAvgPool3d(output_size=1)
  (fc): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [None]:
criterion = nn.BCEWithLogitsLoss() 
optimizer = optim.Adam(model.parameters(), lr=0.0001) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train() 
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    for inputs, labels in tqdm(loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        preds = torch.sigmoid(outputs) > 0.5
        correct_predictions += (preds == labels).sum().item()
        total_samples += labels.size(0)
        
    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    return epoch_loss, epoch_acc

def validate_one_epoch(model, loader, criterion, device):
    model.eval() 
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    with torch.no_grad(): 
        for inputs, labels in tqdm(loader, desc="Validation"):
            inputs, labels = inputs.to(device), labels.to(device).unsqueeze(1)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * inputs.size(0)
            preds = torch.sigmoid(outputs) > 0.5
            correct_predictions += (preds == labels).sum().item()
            total_samples += labels.size(0)
            
    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    return epoch_loss, epoch_acc


NUM_EPOCHS = 20
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
best_val_acc = 0.0

for epoch in range(NUM_EPOCHS):
    print(f"\n--- Epoch {epoch+1}/{NUM_EPOCHS} ---")
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate_one_epoch(model, test_loader, criterion, device)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}   | Val Acc: {val_acc:.4f}")
    
    scheduler.step(val_loss)
    
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        print("--- Best model saved! ---")


--- Epoch 1/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6859 | Train Acc: 0.5556
Val Loss: 0.6708   | Val Acc: 0.6199
--- Best model saved! ---

--- Epoch 2/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6712 | Train Acc: 0.6213
Val Loss: 0.6710   | Val Acc: 0.6199

--- Epoch 3/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6774 | Train Acc: 0.6199
Val Loss: 0.6691   | Val Acc: 0.6199

--- Epoch 4/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6707 | Train Acc: 0.6213
Val Loss: 0.6649   | Val Acc: 0.6199

--- Epoch 5/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6694 | Train Acc: 0.6213
Val Loss: 0.6644   | Val Acc: 0.6199

--- Epoch 6/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6683 | Train Acc: 0.6213
Val Loss: 0.6674   | Val Acc: 0.6199

--- Epoch 7/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6658 | Train Acc: 0.6213
Val Loss: 0.6681   | Val Acc: 0.6199

--- Epoch 8/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6702 | Train Acc: 0.6213
Val Loss: 0.6651   | Val Acc: 0.6199

--- Epoch 9/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6646 | Train Acc: 0.6199
Val Loss: 0.6661   | Val Acc: 0.6199

--- Epoch 10/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6651 | Train Acc: 0.6213
Val Loss: 0.6635   | Val Acc: 0.6199

--- Epoch 11/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6695 | Train Acc: 0.6213
Val Loss: 0.6670   | Val Acc: 0.6199

--- Epoch 12/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6648 | Train Acc: 0.6228
Val Loss: 0.6632   | Val Acc: 0.6199

--- Epoch 13/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6687 | Train Acc: 0.6213
Val Loss: 0.6649   | Val Acc: 0.6199

--- Epoch 14/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]

Validation:   0%|          | 0/22 [00:00<?, ?it/s]

Train Loss: 0.6667 | Train Acc: 0.6213
Val Loss: 0.6656   | Val Acc: 0.6199

--- Epoch 15/20 ---


Training:   0%|          | 0/86 [00:00<?, ?it/s]