In [1]:
import os
from glob import glob
from PIL import Image
import torch
from torch.utils.data import Dataset

class YOLODataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.transform = transform

        # Verify directories exist
        if not os.path.exists(image_dir):
            raise FileNotFoundError(f"Image directory not found: {image_dir}")
        if not os.path.exists(label_dir):
            raise FileNotFoundError(f"Label directory not found: {label_dir}")

        # Recursively find all images and labels
        self.image_paths = glob(os.path.join(image_dir, '**/*.jpg'), recursive=True)
        self.label_paths = glob(os.path.join(label_dir, '**/*.txt'), recursive=True)

        # Create dicts for matching files by basename (without extension)
        images_dict = {os.path.splitext(os.path.basename(p))[0]: p for p in self.image_paths}
        labels_dict = {os.path.splitext(os.path.basename(p))[0]: p for p in self.label_paths}

        # Find common keys present in both images and labels
        common_keys = sorted(set(images_dict.keys()) & set(labels_dict.keys()))

        if len(common_keys) == 0:
            raise RuntimeError(f"No matching image-label pairs found in {image_dir} and {label_dir}!")

        # Store pairs of matching image and label paths
        self.pairs = [(images_dict[k], labels_dict[k]) for k in common_keys]

        # Statistics: Count bounding boxes
        total_boxes = 0
        for _, label_path in self.pairs:
            with open(label_path, 'r') as f:
                boxes = [line.strip().split() for line in f.readlines() if line.strip() and len(line.strip().split()) == 5]
                total_boxes += len(boxes)
        print(f"[Stats] Training dataset: {len(self.pairs)} image-label pairs, {total_boxes} total bounding boxes")

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img_path, label_path = self.pairs[idx]

        # Load image with error handling
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None, None

        # Load labels in YOLO format: class, x_center, y_center, width, height (normalized)
        boxes = []
        try:
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    if line.strip() == '':
                        continue
                    parts = line.strip().split()
                    if len(parts) != 5:
                        print(f"Skipping malformed label in {label_path}: {line.strip()}")
                        continue
                    cls, x, y, w, h = map(float, parts)
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                        print(f"Warning: Invalid box coordinates in {label_path}: {[cls, x, y, w, h]}")
                        continue
                    boxes.append([cls, x, y, w, h])
        except Exception as e:
            print(f"Error reading label file {label_path}: {e}")
            return None, None

        if len(boxes) == 0:
            boxes = torch.zeros((0, 5), dtype=torch.float32)
        else:
            boxes = torch.tensor(boxes, dtype=torch.float32)

        if self.transform:
            img = self.transform(img)

        return img, boxes

In [2]:
from torchvision import transforms
import torch

image_dir = "F:\human behaviour detection\widerface\WIDER_train\images"
label_dir = "F:\human behaviour detection\widerface\WIDER_train\labels"

# Basic transform for testing
transform = transforms.ToTensor()

try:
    dataset = YOLODataset(image_dir, label_dir, transform=transform)
except Exception as e:
    print(f"Failed to create dataset: {e}")
    raise

print(f"Total pairs: {len(dataset)}")

# Calculate average boxes per image
total_boxes = 0
for i in range(len(dataset)):
    _, boxes = dataset[i]
    if boxes is None:
        continue
    total_boxes += len(boxes)
avg_boxes = total_boxes / len(dataset) if len(dataset) > 0 else 0
print(f"[Stats] Average bounding boxes per image: {avg_boxes:.2f}")

if len(dataset) > 0:
    img, boxes = dataset[0]
    if img is None or boxes is None:
        print("Failed to load first sample.")
    else:
        size_info = img.shape if hasattr(img, 'shape') else "Unknown size attribute"
        print(f"Image type: {type(img)}, size: {size_info}")
        print(f"Labels shape: {boxes.shape}, example labels: {boxes[:5]}")
else:
    print("Dataset is empty, no samples to show.")

[Stats] Training dataset: 12876 image-label pairs, 159367 total bounding boxes
Total pairs: 12876
[Stats] Average bounding boxes per image: 12.38
Image type: <class 'torch.Tensor'>, size: torch.Size([3, 732, 1024])
Labels shape: torch.Size([2, 5]), example labels: tensor([[0.0000, 0.1240, 0.5273, 0.0098, 0.0164],
        [0.0000, 0.2461, 0.5260, 0.0117, 0.0191]])


In [3]:
from torchvision import transforms
from torch.utils.data import DataLoader
import torch

IMG_SIZE = 416

transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

try:
    dataset = YOLODataset(image_dir, label_dir, transform=transform)
except Exception as e:
    print(f"Failed to create dataset: {e}")
    raise

def custom_collate_fn(batch):
    # Filter out None entries (failed loads)
    batch = [item for item in batch if item[0] is not None and item[1] is not None]
    if len(batch) == 0:
        raise ValueError("Batch is empty after filtering failed loads.")
    images = torch.stack([item[0] for item in batch])
    labels = [item[1] for item in batch]
    return images, labels

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    dataloader = DataLoader(
        dataset,
        batch_size=32,
        shuffle=True,
        num_workers=0,
        pin_memory=(device.type == 'cuda'),
        persistent_workers=False,
        collate_fn=custom_collate_fn
    )
except Exception as e:
    print(f"Failed to create DataLoader: {e}")
    raise

# Test one batch
try:
    images, labels = next(iter(dataloader))
    images = images.to(device)
    print(f"Batch images shape: {images.shape}")
    print(f"Number of label tensors: {len(labels)}")
    print(f"Example label shape for first image: {labels[0].shape}")
except Exception as e:
    print(f"Error loading batch: {e}")
    raise

[Stats] Training dataset: 12876 image-label pairs, 159367 total bounding boxes
Batch images shape: torch.Size([32, 3, 416, 416])
Number of label tensors: 32
Example label shape for first image: torch.Size([6, 5])


In [6]:
import torch
import torch.nn as nn

class TinyYOLOv2Debug(nn.Module):
    def __init__(self, num_classes=1, B=2, S=13):
        super(TinyYOLOv2Debug, self).__init__()
        self.S = S
        self.B = B
        self.C = num_classes
        self.anchors = torch.tensor([[1.5, 1.5], [3.0, 3.0]])  # Match inference and loss

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2)
        )
        self.conv5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2)
        )
        self.conv6 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
        )
        self.conv7 = nn.Sequential(
            nn.Conv2d(512, 1024, 3, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1)
        )
        self.conv8 = nn.Sequential(
            nn.Conv2d(1024, 1024, 3, padding=1),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1)
        )
        self.conv9 = nn.Conv2d(1024, self.B * (5 + self.C), 1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.conv8(x)
        x = self.conv9(x)
        print(f"After conv9 (final conv): {x.shape}")
        x = x.permute(0, 2, 3, 1)
        print(f"After permute: {x.shape}")
        return x

In [7]:
import torch
import torch.nn as nn

class YOLOLoss(nn.Module):
    def __init__(self, S=13, B=2, C=1, lambda_coord=5, lambda_noobj=0.5):
        super(YOLOLoss, self).__init__()
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.mse = nn.MSELoss(reduction='mean')
        self.anchors = torch.tensor([[1.5, 1.5], [3.0, 3.0]])  # Match model and inference

    def compute_iou(self, box1, box2):
        x1, y1, w1, h1 = box1
        x2, y2, w2, h2 = box2
        x1_min, y1_min = x1 - w1/2, y1 - h1/2
        x1_max, y1_max = x1 + w1/2, y1 + h1/2
        x2_min, y2_min = x2 - w2/2, y2 - h2/2
        x2_max, y2_max = x2 + w2/2, y2 + h2/2

        inter_xmin = torch.max(x1_min, x2_min)
        inter_ymin = torch.max(y1_min, y2_min)
        inter_xmax = torch.min(x1_max, x2_max)
        inter_ymax = torch.min(y1_max, y2_max)

        inter_area = torch.clamp(inter_xmax - inter_xmin, min=0) * torch.clamp(inter_ymax - inter_ymin, min=0)
        area1 = w1 * h1
        area2 = w2 * h2
        union_area = area1 + area2 - inter_area
        return inter_area / union_area

    def forward(self, predictions, targets):
        batch_size = predictions.size(0)
        device = predictions.device
        self.anchors = self.anchors.to(device)

        predictions = predictions.reshape(batch_size, self.S, self.S, self.B, 5 + self.C)

        coord_loss = 0
        conf_loss = 0
        class_loss = 0
        num_objects = 0

        for b in range(batch_size):
            target_tensor = torch.zeros(self.S, self.S, self.B, 5 + self.C, device=device)

            for box in targets[b]:
                cls, x, y, w, h = box
                i = int(x * self.S)
                j = int(y * self.S)
                if i >= self.S or j >= self.S or i < 0 or j < 0:
                    continue
                gt_box = torch.tensor([x, y, w, h], device=device)

                best_iou = 0
                best_k = 0
                for k in range(self.B):
                    anchor = self.anchors[k]
                    anchor_box = torch.tensor([x, y, anchor[0], anchor[1]], device=device)
                    iou = self.compute_iou(gt_box, anchor_box)
                    if iou > best_iou:
                        best_iou = iou
                        best_k = k

                target_tensor[j, i, best_k, 0] = 1
                target_tensor[j, i, best_k, 1:5] = torch.tensor([x * self.S - i, y * self.S - j, w, h], device=device)
                target_tensor[j, i, best_k, 5 + int(cls)] = 1
                num_objects += 1

            obj_mask = target_tensor[..., 0]
            noobj_mask = 1 - obj_mask

            pred_x = torch.sigmoid(predictions[b][..., 1])
            pred_y = torch.sigmoid(predictions[b][..., 2])
            pred_w = self.anchors[:, 0] * torch.exp(predictions[b][..., 3])
            pred_h = self.anchors[:, 1] * torch.exp(predictions[b][..., 4])
            pred_coords = torch.stack([pred_x, pred_y, pred_w, pred_h], dim=-1)

            if obj_mask.sum() > 0:
                coord_loss += self.lambda_coord * self.mse(
                    pred_coords[obj_mask == 1],
                    target_tensor[obj_mask == 1][:, 1:5]
                )
                class_loss += self.mse(
                    predictions[b][obj_mask == 1][:, 5:],
                    target_tensor[obj_mask == 1][:, 5:]
                )

            conf_loss += self.mse(
                torch.sigmoid(predictions[b][..., 0])[obj_mask == 1],
                target_tensor[..., 0][obj_mask == 1]
            )
            conf_loss += self.lambda_noobj * self.mse(
                torch.sigmoid(predictions[b][..., 0])[noobj_mask == 1],
                target_tensor[..., 0][noobj_mask == 1]
            )

        if num_objects == 0:
            print("[Debug] No objects in batch, returning conf_loss only")
            return conf_loss

        total_loss = (coord_loss + conf_loss + class_loss) / max(num_objects, 1)
        print(f"[Debug] Loss components - Coord: {coord_loss:.4f}, Conf: {conf_loss:.4f}, Class: {class_loss:.4f}, Total: {total_loss:.4f}")
        return total_loss

In [8]:
import torch
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

try:
    model = TinyYOLOv2Debug(num_classes=1, B=2, S=13).to(device)
    criterion = YOLOLoss(S=13, B=2, C=1).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
except Exception as e:
    print(f"Error initializing model, criterion, or optimizer: {e}")
    raise

model.train()

try:
    images, labels = next(iter(dataloader))
except Exception as e:
    print(f"Error loading batch from dataloader: {e}")
    raise

images = images.to(device)
labels = [label.to(device) for label in labels]

assert images.size(0) == len(labels), "Batch size mismatch between images and labels"

try:
    outputs = model(images)
    print(f"Model output shape: {outputs.shape}")
except Exception as e:
    print(f"Error during model forward pass: {e}")
    raise

try:
    loss = criterion(outputs, labels)
    print(f"Initial loss (untrained): {loss.item():.4f}")
except Exception as e:
    print(f"Error computing loss: {e}")
    raise

try:
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
except Exception as e:
    print(f"Error during optimization step: {e}")
    raise

print("Completed one training step successfully.")

Using device: cpu
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
Model output shape: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 167.2659, Conf: 13.7766, Class: 34.9846, Total: 1.2415
Initial loss (untrained): 1.2415
Completed one training step successfully.


In [9]:
import os
from glob import glob
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class YOLOValidDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.transform = transform

        if not os.path.exists(image_dir):
            raise FileNotFoundError(f"Validation image directory not found: {image_dir}")
        if not os.path.exists(label_dir):
            raise FileNotFoundError(f"Validation label directory not found: {label_dir}")

        self.image_paths = glob(os.path.join(image_dir, '**/*.jpg'), recursive=True)
        self.label_paths = glob(os.path.join(label_dir, '**/*.txt'), recursive=True)

        images_dict = {os.path.splitext(os.path.basename(p))[0]: p for p in self.image_paths}
        labels_dict = {os.path.splitext(os.path.basename(p))[0]: p for p in self.label_paths}

        common_keys = sorted(set(images_dict.keys()) & set(labels_dict.keys()))

        if len(common_keys) == 0:
            raise RuntimeError(f"No matching validation image-label pairs found in {image_dir} and {label_dir}!")

        self.pairs = [(images_dict[k], labels_dict[k]) for k in common_keys]

        total_boxes = 0
        for _, label_path in self.pairs:
            with open(label_path, 'r') as f:
                boxes = [line.strip().split() for line in f.readlines() if line.strip() and len(line.strip().split()) == 5]
                total_boxes += len(boxes)
        print(f"[Stats] Validation dataset: {len(self.pairs)} image-label pairs, {total_boxes} total bounding boxes")

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img_path, label_path = self.pairs[idx]
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading validation image {img_path}: {e}")
            return None, None

        boxes = []
        try:
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    if line.strip() == '':
                        continue
                    parts = line.strip().split()
                    if len(parts) != 5:
                        print(f"Skipping malformed label in {label_path}: {line.strip()}")
                        continue
                    cls, x, y, w, h = map(float, parts)
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                        print(f"Warning: Invalid box coordinates in {label_path}: {[cls, x, y, w, h]}")
                        continue
                    boxes.append([cls, x, y, w, h])
        except Exception as e:
            print(f"Error reading validation label file {label_path}: {e}")
            return None, None

        if len(boxes) == 0:
            boxes = torch.zeros((0, 5), dtype=torch.float32)
        else:
            boxes = torch.tensor(boxes, dtype=torch.float32)

        if self.transform:
            img = self.transform(img)

        return img, boxes

val_image_dir = r"F:\human behaviour detection\widerface\WIDER_val\images"
val_label_dir = r"F:\human behaviour detection\widerface\WIDER_val\labels"

print(f"Using validation images from: {val_image_dir}")
print(f"Using validation labels from: {val_label_dir}")

val_transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor(),
])

try:
    valid_dataset = YOLOValidDataset(val_image_dir, val_label_dir, transform=val_transform)
except Exception as e:
    print(f"Failed to create validation dataset: {e}")
    raise

def valid_collate_fn(batch):
    batch = [item for item in batch if item[0] is not None and item[1] is not None]
    if len(batch) == 0:
        raise ValueError("Validation batch is empty after filtering failed loads.")
    images = torch.stack([item[0] for item in batch])
    labels = [item[1] for item in batch]
    return images, labels

try:
    valid_dataloader = DataLoader(
        valid_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=0,
        pin_memory=torch.cuda.is_available(),
        collate_fn=valid_collate_fn
    )
except Exception as e:
    print(f"Failed to create validation DataLoader: {e}")
    raise

print(f"Validation samples: {len(valid_dataset)}")
if len(valid_dataset) > 0:
    try:
        images, labels = next(iter(valid_dataloader))
        print(f"Validation batch images shape: {images.shape}")
        print(f"Validation batch labels count: {len(labels)}")
        if len(labels) > 0:
            print(f"Example label shape for first image: {labels[0].shape}")
        else:
            print("No labels in the first validation batch.")
    except Exception as e:
        print(f"Error loading validation batch: {e}")
        raise
else:
    print("Validation dataset is empty!")

Using validation images from: F:\human behaviour detection\widerface\WIDER_val\images
Using validation labels from: F:\human behaviour detection\widerface\WIDER_val\labels
[Stats] Validation dataset: 3226 image-label pairs, 39697 total bounding boxes
Validation samples: 3226
Validation batch images shape: torch.Size([32, 3, 416, 416])
Validation batch labels count: 32
Example label shape for first image: torch.Size([39, 5])


In [None]:
import time
import torch
import torch.optim as optim

EPOCHS = 12  # Reduced to 12 epochs for CPU
best_val_loss = float('inf')
patience = 5
counter = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

try:
    model = TinyYOLOv2Debug(num_classes=1, B=2, S=13).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    criterion = YOLOLoss(S=13, B=2, C=1).to(device)
except Exception as e:
    print(f"Error initializing training components: {e}")
    raise

# Print dataset stats
print(f"[Stats] Training batches: {len(dataloader)}")
print(f"[Stats] Validation batches: {len(valid_dataloader)}")

total_start_time = time.time()
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0.0
    start_time = time.time()

    try:
        for batch_idx, (images, targets) in enumerate(dataloader):
            images = images.to(device)
            targets = [t.to(device) for t in targets]

            optimizer.zero_grad()
            predictions = model(images)
            loss = criterion(predictions, targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            epoch_loss += loss.item()

            if (batch_idx + 1) % 10 == 0:
                print(f"Epoch [{epoch+1}/{EPOCHS}], Step [{batch_idx+1}/{len(dataloader)}], Loss: {loss.item():.4f}")
    except Exception as e:
        print(f"Error during training epoch {epoch+1}: {e}")
        raise

    avg_loss = epoch_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{EPOCHS}] Training Loss: {avg_loss:.4f}")

    model.eval()
    val_loss = 0.0
    try:
        with torch.no_grad():
            for images, labels in valid_dataloader:
                images = images.to(device)
                labels = [label.to(device) for label in labels]
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
    except Exception as e:
        print(f"Error during validation epoch {epoch+1}: {e}")
        raise

    val_loss /= len(valid_dataloader)
    print(f"Epoch [{epoch+1}/{EPOCHS}] Validation Loss: {val_loss:.4f}")

    scheduler.step()

    if val_loss < best_val_loss:
        improvement = best_val_loss - val_loss if best_val_loss != float('inf') else 0
        best_val_loss = val_loss
        counter = 0
        try:
            torch.save(model.state_dict(), "C:/widerface/best_model.pth")
            print(f"Best model saved with validation loss: {best_val_loss:.4f} (Improvement: {improvement:.4f})")
        except Exception as e:
            print(f"Error saving model: {e}")
            raise
    else:
        counter += 1
        print(f"No improvement in validation loss. Patience counter: {counter}/{patience}")
        if counter >= patience:
            print("Early stopping triggered.")
            break

    print(f"Epoch [{epoch+1}/{EPOCHS}] completed in {(time.time() - start_time):.2f}s")

print(f"Training completed in {(time.time() - total_start_time):.2f}s. Best model saved at C:/widerface/best_model.pth")


Using device: cpu
[Stats] Training batches: 403
[Stats] Validation batches: 101
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 176.4161, Conf: 11.4210, Class: 60.5766, Total: 0.6353
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 3706.6863, Conf: 10.1005, Class: 27.8180, Total: 7.7208
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 1157.9519, Conf: 10.5094, Class: 30.8645, Total: 4.1788
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 42166.5430, Conf: 10.1773, Class: 31.3156, Total: 150.2065
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 1704.4755, Conf: 9.6820, Class: 40

cell 2 output 
[Stats] Training dataset: 12876 image-label pairs, 159367 total bounding boxes
Total pairs: 12876
[Stats] Average bounding boxes per image: 12.38
Image type: <class 'torch.Tensor'>, size: torch.Size([3, 732, 1024])
Labels shape: torch.Size([2, 5]), example labels: tensor([[0.0000, 0.1240, 0.5273, 0.0098, 0.0164],
        [0.0000, 0.2461, 0.5260, 0.0117, 0.0191]])
cell 3 output 
[Stats] Training dataset: 12876 image-label pairs, 159367 total bounding boxes
Batch images shape: torch.Size([32, 3, 416, 416])
Number of label tensors: 32
Example label shape for first image: torch.Size([6, 5])
cell 6 output
Using device: cpu
After conv9 (final conv): torch.Size([32, 12, 13, 13])
After permute: torch.Size([32, 13, 13, 12])
Model output shape: torch.Size([32, 13, 13, 12])
[Debug] Loss components - Coord: 167.2659, Conf: 13.7766, Class: 34.9846, Total: 1.2415
Initial loss (untrained): 1.2415
Completed one training step successfully.                                                                                                                   
cell 7 output 
Using validation images from: F:\human behaviour detection\widerface\WIDER_val\images
Using validation labels from: F:\human behaviour detection\widerface\WIDER_val\labels
[Stats] Validation dataset: 3226 image-label pairs, 39697 total bounding boxes
Validation samples: 3226
Validation batch images shape: torch.Size([32, 3, 416, 416])
Validation batch labels count: 32
Example label shape for first image: torch.Size([39, 5])

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

criterion = YOLOLoss(S=13, B=2, C=1).to(device)

val_loss = 0.0

try:
    with torch.no_grad():
        for images, labels in valid_dataloader:
            images = images.to(device)
            labels = [label.to(device) for label in labels]
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
except Exception as e:
    print(f"Error during validation: {e}")
    raise

val_loss /= len(valid_dataloader)
print(f"Final Validation Loss: {val_loss:.4f}")

In [None]:
import os
import torch
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes
import torchvision.transforms.functional as F
from torchvision import transforms

model_path = r"C:\widerface\best_model.pth"
test_image_dir = r"F:\human behaviour detection\widerface\WIDER_test\images"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    model = TinyYOLOv2Debug(num_classes=1, B=2, S=13)
    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
    print("Checkpoint keys:", checkpoint.keys())
    if 'model_state_dict' in checkpoint:
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()
except Exception as e:
    print(f"Error loading model: {e}")
    raise

transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor(),
])

def decode_yolo_predictions(preds, conf_thresh=0.05, iou_thresh=0.5, img_size=416):
    batch_size, S, _, num_attribs = preds.shape
    B, C = 2, 1
    cell_size = img_size / S

    anchors = torch.tensor([[1.5, 1.5], [3.0, 3.0]], device=preds.device)

    preds = preds.view(batch_size, S, S, B, 5 + C)
    confidences = torch.sigmoid(preds[..., 0])
    box_coords = preds[..., 1:5]
    class_probs = torch.sigmoid(preds[..., 5:])
    conf_scores = confidences * class_probs[..., 0]

    all_boxes = []
    all_scores = []
    all_classes = []

    for b in range(batch_size):
        boxes = []
        scores = []
        classes = []

        for i in range(S):
            for j in range(S):
                for k in range(B):
                    if conf_scores[b, i, j, k] > conf_thresh:
                        x_center = (torch.sigmoid(box_coords[b, i, j, k, 0]) + j) * cell_size
                        y_center = (torch.sigmoid(box_coords[b, i, j, k, 1]) + i) * cell_size
                        anchor_w = anchors[k, 0]
                        anchor_h = anchors[k, 1]
                        w = anchor_w * torch.exp(box_coords[b, i, j, k, 2]) * cell_size
                        h = anchor_h * torch.exp(box_coords[b, i, j, k, 3]) * cell_size

                        w = torch.abs(w)
                        h = torch.abs(h)
                        if w <= 0 or h <= 0:
                            continue

                        x1 = x_center - w / 2
                        y1 = y_center - h / 2
                        x2 = x_center + w / 2
                        y2 = y_center + h / 2

                        boxes.append([x1, y1, x2, y2])
                        scores.append(conf_scores[b, i, j, k])
                        classes.append(0)

        if boxes:
            boxes = torch.tensor(boxes, device=preds.device)
            scores = torch.tensor(scores, device=preds.device)
            classes = torch.tensor(classes, device=preds.device, dtype=torch.long)

            if boxes.shape[0] > 0:
                boxes_normalized = boxes / img_size
                keep = torchvision.ops.nms(boxes_normalized, scores, iou_thresh)
                boxes = boxes[keep]
                scores = scores[keep]
                classes = classes[keep]
            else:
                boxes = torch.empty((0, 4), device=preds.device)
                scores = torch.empty((0,), device=preds.device)
                classes = torch.empty((0,), device=preds.device, dtype=torch.long)
        else:
            boxes = torch.empty((0, 4), device=preds.device)
            scores = torch.empty((0,), device=preds.device)
            classes = torch.empty((0,), device=preds.device, dtype=torch.long)

        all_boxes.append(boxes)
        all_scores.append(scores)
        all_classes.append(classes)

    return all_boxes[0], all_scores[0], all_classes[0]

def visualize_prediction(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        print(f"Error loading test image {image_path}: {e}")
        return

    orig_w, orig_h = image.size
    img_tensor = transform(image).unsqueeze(0).to(device)

    try:
        with torch.no_grad():
            preds = model(img_tensor)
    except Exception as e:
        print(f"Error during inference on {image_path}: {e}")
        return

    boxes, scores, classes = decode_yolo_predictions(preds, conf_thresh=0.05)

    print(f"[Stats] {os.path.basename(image_path)}: {len(boxes)} detections")

    if boxes.numel() == 0:
        print(f"No detections in: {os.path.basename(image_path)}")
        return

    scale_x = orig_w / 416
    scale_y = orig_h / 416
    boxes[:, 0] *= scale_x
    boxes[:, 2] *= scale_x
    boxes[:, 1] *= scale_y
    boxes[:, 3] *= scale_y

    img_draw = F.to_tensor(image).mul(255).to(torch.uint8)
    img_with_boxes = draw_bounding_boxes(img_draw, boxes=boxes, colors="red", width=2)

    plt.figure(figsize=(8, 8))
    plt.imshow(img_with_boxes.permute(1, 2, 0).cpu())
    plt.axis('off')
    plt.title(os.path.basename(image_path))
    plt.show()

if not os.path.exists(test_image_dir):
    raise FileNotFoundError(f"Test image directory not found: {test_image_dir}")

image_extensions = {'.jpg', '.jpeg', '.png'}
test_image_paths = [
    os.path.join(test_image_dir, f)
    for f in os.listdir(test_image_dir)
    if os.path.splitext(f)[1].lower() in image_extensions
]

total_detections = 0
for img_path in test_image_paths:
    visualize_prediction(img_path)
    boxes, _, _ = decode_yolo_predictions(model(transform(Image.open(img_path).convert("RGB")).unsqueeze(0).to(device)), conf_thresh=0.05)
    total_detections += len(boxes)

print(f"[Stats] Total detections across {len(test_image_paths)} test images: {total_detections}")

In [None]:
import cv2
import torch
import torchvision.transforms as T
from torchvision.utils import draw_bounding_boxes

transform = T.Compose([
    T.ToPILImage(),
    T.Resize((416, 416)),
    T.ToTensor(),
])

class TinyYOLOv2Debug(torch.nn.Module):
    def __init__(self, num_classes=1, B=2, S=13):
        super(TinyYOLOv2Debug, self).__init__()
        self.S = S
        self.B = B
        self.C = num_classes
        self.anchors = torch.tensor([[1.5, 1.5], [3.0, 3.0]])

        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 16, 3, padding=1),
            torch.nn.BatchNorm2d(16),
            torch.nn.LeakyReLU(0.1),
            torch.nn.MaxPool2d(2, 2)
        )
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv2d(16, 32, 3, padding=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.LeakyReLU(0.1),
            torch.nn.MaxPool2d(2, 2)
        )
        self.conv3 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, 3, padding=1),
            torch.nn.BatchNorm2d(64),
            torch.nn.LeakyReLU(0.1),
            torch.nn.MaxPool2d(2, 2)
        )
        self.conv4 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, 3, padding=1),
            torch.nn.BatchNorm2d(128),
            torch.nn.LeakyReLU(0.1),
            torch.nn.MaxPool2d(2, 2)
        )
        self.conv5 = torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, 3, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.LeakyReLU(0.1),
            torch.nn.MaxPool2d(2, 2)
        )
        self.conv6 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 512, 3, padding=1),
            torch.nn.BatchNorm2d(512),
            torch.nn.LeakyReLU(0.1),
        )
        self.conv7 = torch.nn.Sequential(
            torch.nn.Conv2d(512, 1024, 3, padding=1),
            torch.nn.BatchNorm2d(1024),
            torch.nn.LeakyReLU(0.1)
        )
        self.conv8 = torch.nn.Sequential(
            torch.nn.Conv2d(1024, 1024, 3, padding=1),
            torch.nn.BatchNorm2d(1024),
            torch.nn.LeakyReLU(0.1)
        )
        self.conv9 = torch.nn.Conv2d(1024, self.B * (5 + self.C), 1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.conv8(x)
        x = self.conv9(x)
        x = x.permute(0, 2, 3, 1)
        return x

def decode_yolo_predictions(preds, conf_thresh=0.05, iou_thresh=0.5, img_size=416):
    batch_size, S, _, num_attribs = preds.shape
    B, C = 2, 1
    cell_size = img_size / S

    anchors = torch.tensor([[1.5, 1.5], [3.0, 3.0]], device=preds.device)

    preds = preds.view(batch_size, S, S, B, 5 + C)
    confidences = torch.sigmoid(preds[..., 0])
    box_coords = preds[..., 1:5]
    class_probs = torch.sigmoid(preds[..., 5:])
    conf_scores = confidences * class_probs[..., 0]

    all_boxes = []
    all_scores = []
    all_classes = []

    for b in range(batch_size):
        boxes = []
        scores = []
        classes = []

        for i in range(S):
            for j in range(S):
                for k in range(B):
                    if conf_scores[b, i, j, k] > conf_thresh:
                        x_center = (torch.sigmoid(box_coords[b, i, j, k, 0]) + j) * cell_size
                        y_center = (torch.sigmoid(box_coords[b, i, j, k, 1]) + i) * cell_size
                        anchor_w = anchors[k, 0]
                        anchor_h = anchors[k, 1]
                        w = anchor_w * torch.exp(box_coords[b, i, j, k, 2]) * cell_size
                        h = anchor_h * torch.exp(box_coords[b, i, j, k, 3]) * cell_size

                        w = torch.abs(w)
                        h = torch.abs(h)
                        if w <= 0 or h <= 0:
                            continue

                        x1 = x_center - w / 2
                        y1 = y_center - h / 2
                        x2 = x_center + w / 2
                        y2 = y_center + h / 2

                        boxes.append([x1, y1, x2, y2])
                        scores.append(conf_scores[b, i, j, k])
                        classes.append(0)

        if boxes:
            boxes = torch.tensor(boxes, device=preds.device)
            scores = torch.tensor(scores, device=preds.device)
            classes = torch.tensor(classes, device=preds.device, dtype=torch.long)

            if boxes.shape[0] > 0:
                boxes_normalized = boxes / img_size
                keep = torchvision.ops.nms(boxes_normalized, scores, iou_thresh)
                boxes = boxes[keep]
                scores = scores[keep]
                classes = classes[keep]
            else:
                boxes = torch.empty((0, 4), device=preds.device)
                scores = torch.empty((0,), device=preds.device)
                classes = torch.empty((0,), device=preds.device, dtype=torch.long)
        else:
            boxes = torch.empty((0, 4), device=preds.device)
            scores = torch.empty((0,), device=preds.device)
            classes = torch.empty((0,), device=preds.device, dtype=torch.long)

        all_boxes.append(boxes)
        all_scores.append(scores)
        all_classes.append(classes)

    return all_boxes[0], all_scores[0], all_classes[0]

def realtime_inference(model, device, conf_thresh=0.05, iou_thresh=0.5):
    model.eval()
    try:
        cap = cv2.VideoCapture(0)
        if not cap.isOpened():
            raise RuntimeError("Error: Could not open webcam")
    except Exception as e:
        print(f"Webcam error: {e}")
        return

    total_frames = 0
    total_detections = 0

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Failed to grab frame")
                break

            total_frames += 1
            frame_h, frame_w = frame.shape[:2]
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            input_tensor = transform(rgb_frame).unsqueeze(0).to(device)

            try:
                with torch.no_grad():
                    preds = model(input_tensor)
                    boxes, scores, classes = decode_yolo_predictions(preds, conf_thresh, iou_thresh)
            except Exception as e:
                print(f"Error during inference: {e}")
                continue

            total_detections += len(boxes)
            print(f"[Stats] Frame {total_frames}: {len(boxes)} detections")

            img_tensor = torch.from_numpy(rgb_frame).permute(2, 0, 1).to(device)
            img_tensor = img_tensor.to(torch.uint8)

            if boxes.numel() > 0:
                valid_mask = (boxes[:, 2] > boxes[:, 0]) & (boxes[:, 3] > boxes[:, 1])
                boxes = boxes[valid_mask]
                scores = scores[valid_mask]
                classes = classes[valid_mask]

                scale_x = frame_w / 416
                scale_y = frame_h / 416
                boxes[:, 0] *= scale_x
                boxes[:, 2] *= scale_x
                boxes[:, 1] *= scale_y
                boxes[:, 3] *= scale_y

                if boxes.numel() > 0:
                    boxes = boxes.to(torch.int)
                    img_with_boxes = draw_bounding_boxes(img_tensor, boxes=boxes, colors="red", width=2)
                    display_frame = img_with_boxes.permute(1, 2, 0).cpu().numpy()
                    display_frame = cv2.cvtColor(display_frame, cv2.COLOR_RGB2BGR)
                else:
                    display_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
                    cv2.putText(display_frame, "No faces detected", (10, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            else:
                display_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
                cv2.putText(display_frame, "No faces detected", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            fps = cap.get(cv2.CAP_PROP_FPS) or 30
            cv2.putText(display_frame, f"FPS: {fps:.1f}", (10, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            cv2.imshow('YOLOv2 Face Detection', display_frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        cap.release()
        cv2.destroyAllWindows()
        print(f"[Stats] Total frames: {total_frames}, Total detections: {total_detections}")

if __name__ == "__main__":
    model_path = r"C:\widerface\best_model.pth"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = TinyYOLOv2Debug(num_classes=1, B=2, S=13)
    try:
        checkpoint = torch.load(model_path, map_location=device, weights_only=True)
        if 'model_state_dict' in checkpoint:
            model.load_state_dict(checkpoint['model_state_dict'])
        else:
            model.load_state_dict(checkpoint)
        print(f"Model loaded successfully from {model_path}")
    except Exception as e:
        print(f"Error loading model: {e}")
        exit()

    model.to(device)
    print(f"Running on device: {device}")

    realtime_inference(model, device, conf_thresh=0.05, iou_thresh=0.5)