In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import random

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print(f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

Using device: cpu
GPU name: CPU


In [1]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive
Using device: cpu


## Stage 1: Dataset Extraction

In [2]:
# Simple one-liner extraction (if you just want to quickly extract)
import zipfile
zip_path = "/content/drive/MyDrive/project/Shop DataSet.zip"
extract_path = "/content/drive/MyDrive/project"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
    print(f"Extracted to {extract_path}")

Extracted to /content/drive/MyDrive/project


In [15]:
# Define dataset path
dataset_path = '/content/drive/MyDrive/project/Shop DataSet'

# Check dataset structure
def explore_dataset(path):
    print("Dataset structure:")
    for root, dirs, files in os.walk(path):
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files[:5]:  # Show first 5 files
            print(f"{subindent}{file}")
        if len(files) > 5:
            print(f"{subindent}... and {len(files) - 5} more files")

explore_dataset(dataset_path)

Dataset structure:
Shop DataSet/
  non shop lifters/
    shop_lifter_n_0.mp4
    shop_lifter_n_0_1.mp4
    shop_lifter_n_1.mp4
    shop_lifter_n_1_1.mp4
    shop_lifter_n_10.mp4
    ... and 526 more files
  shop lifters/
    shop_lifter_0.mp4
    shop_lifter_1.mp4
    shop_lifter_10.mp4
    shop_lifter_100.mp4
    shop_lifter_101.mp4
    ... and 319 more files


## Stage 2: Dataset Class and Data Loading

In [16]:
class VideoDataset(Dataset):
    def __init__(self, video_paths, labels, transform=None, sequence_length=16, frame_size=(112, 112)):
        self.video_paths = video_paths
        self.labels = labels
        self.transform = transform
        self.sequence_length = sequence_length
        self.frame_size = frame_size

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        label = self.labels[idx]

        # Read video frames
        frames = self.load_video_frames(video_path)

        # Apply transformations
        if self.transform:
            frames = torch.stack([self.transform(frame) for frame in frames])
        else:
            frames = torch.stack([transforms.ToTensor()(frame) for frame in frames])

        return frames, label

    def load_video_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_interval = max(total_frames // self.sequence_length, 1)

        frame_count = 0
        while len(frames) < self.sequence_length:
            ret, frame = cap.read()
            if not ret:
                break

            if frame_count % frame_interval == 0:
                # Resize frame
                frame = cv2.resize(frame, self.frame_size)
                # Convert BGR to RGB
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames.append(frame)

            frame_count += 1

        cap.release()

        # If we don't have enough frames, duplicate the last frame
        while len(frames) < self.sequence_length:
            frames.append(frames[-1])

        return frames[:self.sequence_length]

# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

##  Load and prepare dataset

In [21]:
def load_dataset(dataset_path):
    video_paths = []
    labels = []

    # Define class folders
    class_folders = ['non shop lifters', 'shop lifters']
    class_mapping = {'non shop lifters': 0, 'shop lifters': 1}

    for class_folder in class_folders:
        class_path = os.path.join(dataset_path, class_folder)
        if os.path.exists(class_path):
            for video_file in os.listdir(class_path):
                if video_file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                    video_paths.append(os.path.join(class_path, video_file))
                    labels.append(class_mapping[class_folder])

    return video_paths, labels

# Load all video paths and labels
video_paths, labels = load_dataset(dataset_path)

print(f"Total videos: {len(video_paths)}")
print(f"Class distribution: {np.unique(labels, return_counts=True)}")

# Split dataset into train and validation
train_paths, val_paths, train_labels, val_labels = train_test_split(
    video_paths, labels, test_size=0.2, random_state=42, stratify=labels
)

print(f"Training samples: {len(train_paths)}")
print(f"Validation samples: {len(val_paths)}")

Total videos: 855
Class distribution: (array([0, 1]), array([531, 324]))
Training samples: 684
Validation samples: 171


 ## Create data loaders

In [22]:
# Create datasets
train_dataset = VideoDataset(train_paths, train_labels, transform=transform, sequence_length=16)
val_dataset = VideoDataset(val_paths, val_labels, transform=transform, sequence_length=16)

# Create data loaders
batch_size = 8

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Test one batch
for frames, labels in train_loader:
    print(f"Batch frames shape: {frames.shape}")  # Should be [batch_size, sequence_length, channels, height, width]
    print(f"Batch labels shape: {labels.shape}")
    break

Batch frames shape: torch.Size([8, 16, 3, 112, 112])
Batch labels shape: torch.Size([8])


## Stage 3: 3DCNN Model Architecture

In [23]:
class Simple3DCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(Simple3DCNN, self).__init__()

        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn1 = nn.BatchNorm3d(64)
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2))

        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn2 = nn.BatchNorm3d(128)
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2))

        self.conv3 = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn3 = nn.BatchNorm3d(256)
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2))

        self.conv4 = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.bn4 = nn.BatchNorm3d(512)
        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2))

        self.global_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        # Input shape: [batch_size, channels, depth, height, width]
        x = x.permute(0, 2, 1, 3, 4)  # Change from [B, D, C, H, W] to [B, C, D, H, W]

        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)

        x = torch.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)

        x = torch.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)

        x = torch.relu(self.bn4(self.conv4(x)))
        x = self.pool4(x)

        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)

        return x

# Initialize model
model = Simple3DCNN(num_classes=2).to(device)
print(model)

# Test model with sample input
sample_input = torch.randn(2, 16, 3, 112, 112).to(device)  # [batch, sequence, channels, height, width]
sample_output = model(sample_input)
print(f"Sample output shape: {sample_output.shape}")

Simple3DCNN(
  (conv1): Conv3d(3, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn2): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn3): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn4): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, 

## Define 3DCNN model


In [24]:
def setup_data_loaders(data_dir, batch_size=8, frames_per_clip=16, img_size=112, use_opencv=True):
    # Get all video paths and labels
    video_paths, labels = get_video_paths_and_labels(data_dir)

    print(f"Total videos found: {len(video_paths)}")
    print(f"Class distribution: {np.bincount(labels)}")

    # Split data into training and validation
    train_paths, val_paths, train_labels, val_labels = train_test_split(
        video_paths, labels, test_size=0.2, random_state=42, stratify=labels
    )

    print(f"Training samples: {len(train_paths)}")
    print(f"Validation samples: {len(val_paths)}")

    # Define transformations for PIL Images
    train_transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
    ])

    # Create datasets
    train_dataset = VideoDataset(train_paths, train_labels, train_transform, frames_per_clip, use_opencv=use_opencv)
    val_dataset = VideoDataset(val_paths, val_labels, val_transform, frames_per_clip, use_opencv=use_opencv)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader

## Define training setup

In [25]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Training function
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(dataloader, desc='Training')
    for batch_idx, (frames, labels) in enumerate(pbar):
        frames = frames.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(frames)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        pbar.set_postfix({
            'Loss': f'{running_loss/(batch_idx+1):.4f}',
            'Acc': f'{100.*correct/total:.2f}%'
        })

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# Validation function
def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Validation')
        for batch_idx, (frames, labels) in enumerate(pbar):
            frames = frames.to(device)
            labels = labels.to(device)

            outputs = model(frames)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            pbar.set_postfix({
                'Loss': f'{running_loss/(batch_idx+1):.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

## Train the model

In [26]:
# Training parameters
num_epochs = 20
best_val_acc = 0.0
train_losses = []
val_losses = []
train_accs = []
val_accs = []

print("Starting training...")
for epoch in range(num_epochs):
    print(f'\nEpoch {epoch+1}/{num_epochs}')
    print('-' * 50)

    # Training phase
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)

    # Validation phase
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)

    # Update learning rate
    scheduler.step()

    # Store metrics
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_3dcnn_model.pth')
        print(f'New best model saved with validation accuracy: {val_acc:.2f}%')

print(f'\nTraining completed. Best validation accuracy: {best_val_acc:.2f}%')

Starting training...

Epoch 1/20
--------------------------------------------------


Training: 100%|██████████| 86/86 [09:50<00:00,  6.86s/it, Loss=0.4829, Acc=78.36%]
Validation: 100%|██████████| 22/22 [00:57<00:00,  2.62s/it, Loss=5.0423, Acc=61.99%]


Train Loss: 0.4829, Train Acc: 78.36%
Val Loss: 5.0423, Val Acc: 61.99%
New best model saved with validation accuracy: 61.99%

Epoch 2/20
--------------------------------------------------


Training: 100%|██████████| 86/86 [09:45<00:00,  6.80s/it, Loss=0.1441, Acc=94.44%]
Validation: 100%|██████████| 22/22 [00:57<00:00,  2.60s/it, Loss=12.5060, Acc=38.01%]


Train Loss: 0.1441, Train Acc: 94.44%
Val Loss: 12.5060, Val Acc: 38.01%

Epoch 3/20
--------------------------------------------------


Training: 100%|██████████| 86/86 [09:42<00:00,  6.77s/it, Loss=0.0566, Acc=98.39%]
Validation: 100%|██████████| 22/22 [00:57<00:00,  2.62s/it, Loss=6.1658, Acc=38.01%]


Train Loss: 0.0566, Train Acc: 98.39%
Val Loss: 6.1658, Val Acc: 38.01%

Epoch 4/20
--------------------------------------------------


Training: 100%|██████████| 86/86 [09:41<00:00,  6.76s/it, Loss=0.0686, Acc=98.25%]
Validation: 100%|██████████| 22/22 [00:58<00:00,  2.65s/it, Loss=17.3077, Acc=38.01%]


Train Loss: 0.0686, Train Acc: 98.25%
Val Loss: 17.3077, Val Acc: 38.01%

Epoch 5/20
--------------------------------------------------


Training: 100%|██████████| 86/86 [09:43<00:00,  6.78s/it, Loss=0.0504, Acc=98.68%]
Validation: 100%|██████████| 22/22 [00:57<00:00,  2.61s/it, Loss=0.1517, Acc=95.91%]


Train Loss: 0.0504, Train Acc: 98.68%
Val Loss: 0.1517, Val Acc: 95.91%
New best model saved with validation accuracy: 95.91%

Epoch 6/20
--------------------------------------------------


Training: 100%|██████████| 86/86 [09:43<00:00,  6.78s/it, Loss=0.0377, Acc=99.12%]
Validation: 100%|██████████| 22/22 [00:59<00:00,  2.72s/it, Loss=0.0014, Acc=100.00%]


Train Loss: 0.0377, Train Acc: 99.12%
Val Loss: 0.0014, Val Acc: 100.00%
New best model saved with validation accuracy: 100.00%

Epoch 7/20
--------------------------------------------------


Training:  94%|█████████▍| 81/86 [09:23<00:34,  6.96s/it, Loss=0.0308, Acc=99.23%]


KeyboardInterrupt: 

## Plot training results

In [None]:
# Plot training history
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Training Accuracy')
plt.plot(val_accs, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()

## Save the final model

In [1]:
# Save the final model
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'train_losses': train_losses,
    'val_losses': val_losses,
    'train_accs': train_accs,
    'val_accs': val_accs,
    'best_val_acc': best_val_acc
}, 'final_3dcnn_model.pth')

print("Model saved successfully!")

# Save to Google Drive
import shutil
shutil.copy('final_3dcnn_model.pth', '/content/drive/MyDrive/project/final_3dcnn_model.pth')
shutil.copy('best_3dcnn_model.pth', '/content/drive/MyDrive/project/best_3dcnn_model.pth')
print("Models copied to Google Drive!")

NameError: name 'torch' is not defined