In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/rcupdataset-yoloformat/README.dataset.txt
/kaggle/input/rcupdataset-yoloformat/README.roboflow.txt
/kaggle/input/rcupdataset-yoloformat/data.yaml
/kaggle/input/rcupdataset-yoloformat/valid/labels/rgb_raw_1623432272-690304_jpg.rf.d1d5c99dd8809f0de7cf4576921490a5.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/frame0298_jpg.rf.61ce4adf2e4323db102da52d0acc1be6.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/day5_dataset_053_png.rf.8828289ea4229729ce63f9a8bd284608.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/containers_017_png.rf.bbd9ebd099c339898d762436df60f095.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/rgb_raw_1624015147-642744_jpg.rf.f943ee09e9702d835a0df5d1a1453689.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/m20_100_014_jpg.rf.aa5244890c4764461777eaf08ad269d0.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/bearing_017_jpg.rf.910ec381ed11a6c84396a7f97872cc8e.txt
/kaggle/input/rcupdataset-yoloformat/valid/labels/frame_ver4_205

In [2]:
import yaml

# Load the data from the YAML file
with open('/kaggle/input/rcupdataset-yoloformat/data.yaml', 'r') as file:
    data = yaml.safe_load(file)

# Extract paths
train_path = data['train']
val_path = data['val']
test_path = data.get('test', None)  # test might be optional

print(f"Train images path: {train_path}")
print(f"Validation images path: {val_path}")
if test_path:
    print(f"Test images path: {test_path}")

Train images path: ../train/images
Validation images path: ../valid/images
Test images path: ../test/images


In [3]:
from torch.utils.data import Dataset, DataLoader
import os

print(f"Train image directory: {train_path}")
print(f"Validation image directory: {val_path}")

Train image directory: ../train/images
Validation image directory: ../valid/images


In [4]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
import os

class YoloDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.image_files = sorted(os.listdir(image_dir))
        self.label_files = sorted(os.listdir(label_dir))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        img = Image.open(img_path).convert("RGB")

        # Load annotations in YOLO format
        label_path = os.path.join(self.label_dir, self.label_files[idx])
        boxes = []
        labels = []

        with open(label_path, "r") as file:
            for line in file:
                class_id, x_center, y_center, width, height = map(float, line.split())
                labels.append(int(class_id))

                # Convert from YOLO format to COCO format
                img_w, img_h = img.size
                x_min = (x_center - width / 2) * img_w
                y_min = (y_center - height / 2) * img_h
                box_w = width * img_w
                box_h = height * img_h
                boxes.append([x_min, y_min, x_min + box_w, y_min + box_h])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            img = self.transforms(img)

        return img, target

# Example usage:
image_dir = "/kaggle/input/rcupdataset-yoloformat/train/images"  # Directory with images
label_dir = "/kaggle/input/rcupdataset-yoloformat/train/labels"  # Directory with YOLO annotations
dataset = YoloDataset(image_dir, label_dir, transforms=F.to_tensor)


In [5]:
def get_model(num_classes):
    # Load a pre-trained Faster R-CNN model with ResNet-50 backbone
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    # Update the classifier head with the number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model

num_classes = 26  #25 classes + background
model = get_model(num_classes)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 227MB/s] 


In [1]:
from torch.utils.data import DataLoader
import torch.optim as optim

# DataLoader
data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Model, optimizer, and training loop setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {losses.item()}")

print("Training complete!")


NameError: name 'dataset' is not defined

In [7]:
import torch
import numpy as np
from collections import defaultdict

class YOLOStyleMAP:
    def __init__(self, num_classes, iou_thresholds=[0.5]):
        self.num_classes = num_classes
        self.iou_thresholds = iou_thresholds
        self.reset()
    
    def reset(self):
        """Reset accumulated statistics"""
        self.stats = []  # List to store [true_positives, pred_scores, pred_labels]
    
    def box_iou(self, box1, box2):
        """
        Calculate IoU between two boxes
        boxes format: [x1, y1, x2, y2]
        """
        box1 = box1.cpu()
        box2 = box2.cpu()
        
        # Get the coordinates of bounding boxes
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]

        # Get the intersection rectangle
        inter_rect_x1 = torch.max(b1_x1, b2_x1)
        inter_rect_y1 = torch.max(b1_y1, b2_y1)
        inter_rect_x2 = torch.min(b1_x2, b2_x2)
        inter_rect_y2 = torch.min(b1_y2, b2_y2)

        # Intersection area
        inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, min=0) * \
                    torch.clamp(inter_rect_y2 - inter_rect_y1, min=0)

        # Union Area
        b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
        b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
        
        union = b1_area + b2_area - inter_area + 1e-6
        iou = inter_area / union
        
        return iou
    
    def update(self, predictions):
        """Update statistics with new predictions"""
        for pred in predictions:
            pred_boxes = pred['boxes']
            pred_scores = pred['scores']
            pred_labels = pred['labels']
            
            self.stats.append([pred_boxes, pred_scores, pred_labels])
    
    def compute(self):
        """Compute mAP for all classes"""
        maps = []
        for iou_threshold in self.iou_thresholds:
            ap_per_class = []
            
            for class_id in range(self.num_classes):
                true_positives = []
                scores = []
                
                for pred_boxes, pred_scores, pred_labels in self.stats:
                    # Get predictions for this class
                    class_mask = pred_labels == class_id
                    if not class_mask.any():
                        continue
                        
                    class_boxes = pred_boxes[class_mask]
                    class_scores = pred_scores[class_mask]
                    
                    # Sort by score
                    sorted_indices = torch.argsort(class_scores, descending=True)
                    class_boxes = class_boxes[sorted_indices]
                    class_scores = class_scores[sorted_indices]
                    
                    # Mark duplicates
                    duplicate_mask = torch.zeros(len(class_boxes), dtype=torch.bool)
                    for i in range(len(class_boxes)):
                        if duplicate_mask[i]:
                            continue
                        # Check IoU with other boxes
                        for j in range(i + 1, len(class_boxes)):
                            if self.box_iou(class_boxes[i], class_boxes[j]) > iou_threshold:
                                duplicate_mask[j] = True
                    
                    # Add non-duplicate detections to results
                    true_positives.extend([not dup for dup in duplicate_mask])
                    scores.extend(class_scores[~duplicate_mask].tolist())
                
                if not scores:
                    ap_per_class.append(0)
                    continue
                
                # Compute precision-recall curve
                true_positives = np.array(true_positives)
                scores = np.array(scores)
                
                # Sort by score
                sorted_indices = np.argsort(-scores)
                true_positives = true_positives[sorted_indices]
                
                # Compute cumulative true positives
                tp_cumsum = np.cumsum(true_positives)
                total_detections = len(true_positives)
                
                # Compute precision and recall
                precision = tp_cumsum / (np.arange(total_detections) + 1)
                recall = tp_cumsum / (tp_cumsum[-1] + 1e-6)
                
                # Compute average precision
                ap = 0
                for r in np.linspace(0, 1, 11):  # 11-point interpolation
                    if not scores.any():
                        continue
                    mask = recall >= r
                    if mask.any():
                        ap += np.max(precision[mask]) / 11
                
                ap_per_class.append(ap)
            
            maps.append(np.mean(ap_per_class))
        
        # Return mAP averaged over IoU thresholds
        return np.mean(maps)

# Modified evaluation code
def evaluate_model(model, data_loader, device, num_classes):
    # Initialize metric
    map_metric = YOLOStyleMAP(num_classes=num_classes)
    
    # Evaluation loop
    model.eval()
    with torch.no_grad():
        for images, *_ in data_loader:
            images = [img.to(device) for img in images]
            predictions = model(images)
            
            # Update metrics
            map_metric.update(predictions)
        
        # Compute final mAP
        map_score = map_metric.compute()
        
        torch.save(model.state_dict(), 'model.pth')
        
    return map_score

# Usage example:
num_classes = 25 
map_score = evaluate_model(model, data_loader, device, num_classes)
print(f"mAP@0.5: {map_score:.4f}")

torch.save(model.state_dict(), 'model.pth')

mAP@0.5: 0.8727


In [8]:
import torch
import numpy as np
from collections import defaultdict

class YOLOStyleMAP:
    def __init__(self, num_classes, iou_thresholds=None):
        # Set IoU thresholds from 0.5 to 0.95 with a step of 0.05 if not provided
        self.num_classes = num_classes
        self.iou_thresholds = iou_thresholds if iou_thresholds else np.arange(0.5, 1.0, 0.05)
        self.reset()
    
    def reset(self):
        """Reset accumulated statistics"""
        self.stats = []  # List to store [true_positives, pred_scores, pred_labels]
    
    def box_iou(self, box1, box2):
        """
        Calculate IoU between two boxes
        boxes format: [x1, y1, x2, y2]
        """
        box1 = box1.cpu()
        box2 = box2.cpu()
        
        # Get the coordinates of bounding boxes
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]

        # Get the intersection rectangle
        inter_rect_x1 = torch.max(b1_x1, b2_x1)
        inter_rect_y1 = torch.max(b1_y1, b2_y1)
        inter_rect_x2 = torch.min(b1_x2, b2_x2)
        inter_rect_y2 = torch.min(b1_y2, b2_y2)

        # Intersection area
        inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, min=0) * \
                    torch.clamp(inter_rect_y2 - inter_rect_y1, min=0)

        # Union Area
        b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
        b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
        
        union = b1_area + b2_area - inter_area + 1e-6
        iou = inter_area / union
        
        return iou
    
    def update(self, predictions):
        """Update statistics with new predictions"""
        for pred in predictions:
            pred_boxes = pred['boxes']
            pred_scores = pred['scores']
            pred_labels = pred['labels']
            
            self.stats.append([pred_boxes, pred_scores, pred_labels])
    
    def compute(self):
        """Compute mAP for all classes and IoU thresholds"""
        maps = []
        for iou_threshold in self.iou_thresholds:
            ap_per_class = []
            
            for class_id in range(self.num_classes):
                true_positives = []
                scores = []
                
                for pred_boxes, pred_scores, pred_labels in self.stats:
                    # Get predictions for this class
                    class_mask = pred_labels == class_id
                    if not class_mask.any():
                        continue
                        
                    class_boxes = pred_boxes[class_mask]
                    class_scores = pred_scores[class_mask]
                    
                    # Sort by score
                    sorted_indices = torch.argsort(class_scores, descending=True)
                    class_boxes = class_boxes[sorted_indices]
                    class_scores = class_scores[sorted_indices]
                    
                    # Mark duplicates
                    duplicate_mask = torch.zeros(len(class_boxes), dtype=torch.bool)
                    for i in range(len(class_boxes)):
                        if duplicate_mask[i]:
                            continue
                        # Check IoU with other boxes
                        for j in range(i + 1, len(class_boxes)):
                            if self.box_iou(class_boxes[i], class_boxes[j]) > iou_threshold:
                                duplicate_mask[j] = True
                    
                    # Add non-duplicate detections to results
                    true_positives.extend([not dup for dup in duplicate_mask])
                    scores.extend(class_scores[~duplicate_mask].tolist())
                
                if not scores:
                    ap_per_class.append(0)
                    continue
                
                # Compute precision-recall curve
                true_positives = np.array(true_positives)
                scores = np.array(scores)
                
                # Sort by score
                sorted_indices = np.argsort(-scores)
                true_positives = true_positives[sorted_indices]
                
                # Compute cumulative true positives
                tp_cumsum = np.cumsum(true_positives)
                total_detections = len(true_positives)
                
                # Compute precision and recall
                precision = tp_cumsum / (np.arange(total_detections) + 1)
                recall = tp_cumsum / (tp_cumsum[-1] + 1e-6)
                
                # Compute average precision
                ap = 0
                for r in np.linspace(0, 1, 11):  # 11-point interpolation
                    if not scores.any():
                        continue
                    mask = recall >= r
                    if mask.any():
                        ap += np.max(precision[mask]) / 11
                
                ap_per_class.append(ap)  
            maps.append(np.mean(ap_per_class))
        
        # Return mAP averaged over IoU thresholds
        return np.mean(maps)

# Modified evaluation code
def evaluate_model(model, data_loader, device, num_classes):
    # Initialize metric
    map_metric = YOLOStyleMAP(num_classes=num_classes)
    
    # Evaluation loop
    model.eval()
    with torch.no_grad():
        for images, *_ in data_loader:
            images = [img.to(device) for img in images]
            predictions = model(images)
            
            # Update metrics
            map_metric.update(predictions)
        
        # Compute final mAP
        map_score = map_metric.compute()
        
    return map_score

# Usage example:
num_classes = 25 
map_score = evaluate_model(model, data_loader, device, num_classes)
print(f"mAP@[0.5:0.95]: {map_score:.4f}")


mAP@[0.5:0.95]: 0.8727


In [9]:
model.eval()
with torch.no_grad():
    for images, _ in data_loader:
        images = [img.to(device) for img in images]
        predictions = model(images)

        for prediction in predictions:
            print("Boxes:", prediction["boxes"])
            print("Labels:", prediction["labels"])

Boxes: tensor([[122.8239, 111.5097, 168.2398, 175.6754],
        [217.9513, 159.8420, 246.1437, 204.1642],
        [ 85.6476, 233.4259, 107.5903, 264.6145],
        [243.0186, 235.7713, 266.1961, 263.9336],
        [147.4974, 203.4732, 175.6845, 248.3092],
        [  4.0426,  98.8138,  95.5285, 258.1229],
        [218.8875, 123.4639, 269.1006, 180.9919],
        [252.1228, 128.0131, 372.8555, 284.0926],
        [218.9099, 122.2912, 264.2052, 199.6346],
        [216.9163, 123.3130, 267.0181, 186.5845],
        [252.5060, 121.5478, 380.9397, 286.0861],
        [251.7303, 114.2923, 385.3769, 281.5461],
        [247.1301, 114.3035, 384.2124, 266.6703],
        [215.2848, 124.4963, 268.6215, 193.3953],
        [  0.0000,  96.4780,  90.4702, 272.6057],
        [248.8779, 133.1281, 387.1260, 263.7239],
        [255.5217, 119.9892, 381.1946, 281.3893],
        [254.5932, 114.7754, 378.2238, 277.9346]], device='cuda:0')
Labels: tensor([11, 15,  6,  6, 17, 24, 10, 23, 15, 18, 10, 24,  8, 17, 15,

In [None]:
/kaggle/working/model.pth