In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.14.2


In [3]:
import os
import glob
import xmltodict
import torch
import torch.nn as nn
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.ops import nms, box_iou
from sklearn.metrics import precision_recall_curve, average_precision_score
import seaborn as sns
from collections import defaultdict
import pandas as pd
from tqdm import tqdm

  check_for_updates()


In [4]:
# VOC Classes
VOC_CLASSES = [
    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]

class VOCDataset(Dataset):
    def __init__(self, root_dir, transforms=None):
        self.root_dir = root_dir
        self.transforms = transforms
        self.image_paths = sorted(glob.glob(os.path.join(root_dir, "JPEGImages", "*.jpg")))
        self.annotation_paths = sorted(glob.glob(os.path.join(root_dir, "Annotations", "*.xml")))

        self.class_names = VOC_CLASSES
        self.class_dict = {k: v for v, k in enumerate(self.class_names)}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        anno_path = self.annotation_paths[idx]

        img = Image.open(img_path).convert("RGB")
        img = np.array(img)

        with open(anno_path) as f:
            anno = xmltodict.parse(f.read())["annotation"]

        boxes = []
        labels = []

        objects = anno.get("object", [])
        if not isinstance(objects, list):
            objects = [objects]

        for obj in objects:
            label = self.class_dict[obj["name"]]
            bbox = obj["bndbox"]
            box = [
                float(bbox["xmin"]),
                float(bbox["ymin"]),
                float(bbox["xmax"]),
                float(bbox["ymax"])
            ]
            boxes.append(box)
            labels.append(label)

        if self.transforms:
            transformed = self.transforms(image=img, bboxes=boxes, class_labels=labels)
            img = transformed["image"]
            boxes = transformed["bboxes"]
            labels = transformed["class_labels"]

        target = {"boxes": torch.tensor(boxes, dtype=torch.float32),
                  "labels": torch.tensor(labels, dtype=torch.int64),
                  "image_path": img_path}

        return img, target

In [5]:



class YOLOResNet(nn.Module):
    def __init__(self, num_classes=20, S=7, B=2):
        super(YOLOResNet, self).__init__()
        self.S = S
        self.B = B
        self.C = num_classes

        resnet = models.resnet34(pretrained=False)
        self.backbone = nn.Sequential(
            *list(resnet.children())[:-2],
            nn.AdaptiveAvgPool2d((S, S))
        )
        self.head = nn.Sequential(
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(1024, self.C + self.B * 5, kernel_size=1)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x.permute(0, 2, 3, 1)


In [6]:
def load_model(model_path, device):
    """Load the trained model"""
    model = YOLOResNet(S=7)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model


In [7]:
def decode_yolo_output(output, conf_threshold=0.5, nms_threshold=0.4, img_size=448):
    """Decode YOLO output to bounding boxes"""
    batch_size = output.shape[0]
    all_boxes = []
    
    for b in range(batch_size):
        boxes = []
        scores = []
        labels = []
        
        pred = output[b]  # [7, 7, 30]
        
        for row in range(7):
            for col in range(7):
                cell = pred[row, col]
                
                # Get class probabilities
                class_probs = cell[10:]
                class_id = torch.argmax(class_probs).item()
                class_score = class_probs[class_id].item()
                
                # Check both bounding boxes in the cell
                for box_idx in range(2):
                    x, y, w, h, conf = cell[box_idx*5:(box_idx+1)*5]
                    
                    # Calculate final confidence
                    final_conf = conf.item() * class_score
                    
                    if final_conf > conf_threshold:
                        # Convert to absolute coordinates
                        center_x = (col + x.item()) / 7
                        center_y = (row + y.item()) / 7
                        width = w.item()
                        height = h.item()
                        
                        # Convert to corner coordinates
                        x1 = (center_x - width / 2) * img_size
                        y1 = (center_y - height / 2) * img_size
                        x2 = (center_x + width / 2) * img_size
                        y2 = (center_y + height / 2) * img_size
                        
                        # Clamp to image boundaries
                        x1 = max(0, min(x1, img_size))
                        y1 = max(0, min(y1, img_size))
                        x2 = max(0, min(x2, img_size))
                        y2 = max(0, min(y2, img_size))
                        
                        if x2 > x1 and y2 > y1:  # Valid box
                            boxes.append([x1, y1, x2, y2])
                            scores.append(final_conf)
                            labels.append(class_id)
        
        if len(boxes) > 0:
            boxes = torch.tensor(boxes)
            scores = torch.tensor(scores)
            labels = torch.tensor(labels)
            
            # Apply NMS
            keep = nms(boxes, scores, nms_threshold)
            boxes = boxes[keep]
            scores = scores[keep]
            labels = labels[keep]
        else:
            boxes = torch.empty((0, 4))
            scores = torch.empty(0)
            labels = torch.empty(0, dtype=torch.long)
        
        all_boxes.append({
            'boxes': boxes,
            'scores': scores,
            'labels': labels
        })
    
    return all_boxes


In [8]:
def calculate_iou(box1, box2):
    """Calculate IoU between two boxes"""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    if x2 <= x1 or y2 <= y1:
        return 0.0
    
    intersection = (x2 - x1) * (y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0.0

In [9]:
def evaluate_detection_metrics(model, dataloader, device, iou_thresholds=[0.5, 0.75], conf_threshold=0.5):
    """Comprehensive evaluation of object detection metrics"""
    model.eval()
    
    # Storage for all predictions and ground truths
    all_predictions = []
    all_ground_truths = []
    
    # Per-class metrics storage
    class_metrics = defaultdict(lambda: {
        'tp': defaultdict(int), 'fp': defaultdict(int), 'fn': defaultdict(int),
        'predictions': [], 'ground_truths': []
    })
    
    print("🔄 Running evaluation...")
    
    with torch.no_grad():
        for batch_idx, (imgs, targets) in enumerate(tqdm(dataloader, desc="Evaluating")):
            imgs = torch.stack(imgs).to(device)
            outputs = model(imgs)
            
            # Decode predictions
            predictions = decode_yolo_output(outputs.cpu(), conf_threshold=conf_threshold)
            
            for i in range(len(predictions)):
                pred = predictions[i]
                target = targets[i]
                
                # Store for overall metrics
                all_predictions.append(pred)
                all_ground_truths.append(target)
                
                # Store per-class data
                for label in target['labels']:
                    class_id = label.item()
                    class_metrics[class_id]['ground_truths'].extend(target['boxes'][target['labels'] == label])
                
                for j, label in enumerate(pred['labels']):
                    class_id = label.item()
                    class_metrics[class_id]['predictions'].append({
                        'box': pred['boxes'][j],
                        'score': pred['scores'][j].item()
                    })
    
    # Calculate metrics for different IoU thresholds
    results = {}
    
    for iou_thresh in iou_thresholds:
        print(f"\n📊 Calculating metrics for IoU threshold: {iou_thresh}")
        
        # Overall metrics
        total_tp, total_fp, total_fn = 0, 0, 0
        all_precisions, all_recalls = [], []
        class_aps = []
        
        # Per-class evaluation
        class_results = {}
        
        for class_id in range(len(VOC_CLASSES)):
            class_name = VOC_CLASSES[class_id]
            
            # Get predictions and ground truths for this class
            pred_data = []
            gt_boxes = []
            
            # Collect all predictions for this class across all images
            for img_idx, pred in enumerate(all_predictions):
                mask = pred['labels'] == class_id
                if mask.sum() > 0:
                    for j in range(mask.sum()):
                        pred_data.append({
                            'image_id': img_idx,
                            'box': pred['boxes'][mask][j],
                            'score': pred['scores'][mask][j].item()
                        })
            
            # Collect all ground truths for this class
            for img_idx, target in enumerate(all_ground_truths):
                mask = target['labels'] == class_id
                if mask.sum() > 0:
                    for box in target['boxes'][mask]:
                        gt_boxes.append({
                            'image_id': img_idx,
                            'box': box,
                            'used': False
                        })
            
            # Sort predictions by confidence
            pred_data.sort(key=lambda x: x['score'], reverse=True)
            
            # Calculate TP, FP for this class
            tp, fp = 0, 0
            
            for pred in pred_data:
                best_iou = 0
                best_gt_idx = -1
                
                # Find best matching ground truth
                for gt_idx, gt in enumerate(gt_boxes):
                    if gt['image_id'] == pred['image_id'] and not gt['used']:
                        iou = calculate_iou(pred['box'], gt['box'])
                        if iou > best_iou:
                            best_iou = iou
                            best_gt_idx = gt_idx
                
                # Check if it's a true positive
                if best_iou >= iou_thresh:
                    tp += 1
                    gt_boxes[best_gt_idx]['used'] = True
                else:
                    fp += 1
            
            fn = len(gt_boxes) - tp
            
            # Calculate precision, recall, F1
            precision = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall = tp / (tp + fn) if (tp + fn) > 0 else 0
            f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
            
            # Calculate AP using precision-recall curve
            if len(pred_data) > 0 and len(gt_boxes) > 0:
                # Create binary labels for AP calculation
                y_true = []
                y_scores = []
                
                for pred in pred_data:
                    best_iou = 0
                    for gt in gt_boxes:
                        if gt['image_id'] == pred['image_id']:
                            iou = calculate_iou(pred['box'], gt['box'])
                            best_iou = max(best_iou, iou)
                    
                    y_true.append(1 if best_iou >= iou_thresh else 0)
                    y_scores.append(pred['score'])
                
                if len(set(y_true)) > 1:  # Need both positive and negative samples
                    ap = average_precision_score(y_true, y_scores)
                else:
                    ap = 0.0
            else:
                ap = 0.0
            
            class_results[class_name] = {
                'tp': tp, 'fp': fp, 'fn': fn,
                'precision': precision, 'recall': recall, 'f1': f1, 'ap': ap,
                'num_predictions': len(pred_data), 'num_ground_truths': len(gt_boxes)
            }
            
            class_aps.append(ap)
            total_tp += tp
            total_fp += fp
            total_fn += fn
        
        # Overall metrics
        overall_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
        overall_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
        overall_f1 = 2 * overall_precision * overall_recall / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0
        mAP = np.mean(class_aps)
        
        results[f'iou_{iou_thresh}'] = {
            'overall': {
                'mAP': mAP,
                'precision': overall_precision,
                'recall': overall_recall,
                'f1': overall_f1,
                'total_tp': total_tp,
                'total_fp': total_fp,
                'total_fn': total_fn
            },
            'per_class': class_results
        }
    
    return results


In [10]:
def print_detailed_results(results):
    """Print detailed evaluation results"""
    print("\n" + "="*80)
    print("📈 COMPREHENSIVE OBJECT DETECTION EVALUATION RESULTS")
    print("="*80)
    
    for iou_key, metrics in results.items():
        iou_thresh = iou_key.split('_')[1]
        print(f"\n🎯 IoU Threshold: {iou_thresh}")
        print("-" * 50)
        
        overall = metrics['overall']
        print(f"📊 Overall Metrics:")
        print(f"   mAP:       {overall['mAP']:.4f}")
        print(f"   Precision: {overall['precision']:.4f}")
        print(f"   Recall:    {overall['recall']:.4f}")
        print(f"   F1-Score:  {overall['f1']:.4f}")
        print(f"   Total TP:  {overall['total_tp']}")
        print(f"   Total FP:  {overall['total_fp']}")
        print(f"   Total FN:  {overall['total_fn']}")
        
        print(f"\n📋 Per-Class Results:")
        print(f"{'Class':<15} {'AP':<8} {'Prec':<8} {'Rec':<8} {'F1':<8} {'TP':<5} {'FP':<5} {'FN':<5}")
        print("-" * 70)
        
        for class_name, class_metrics in metrics['per_class'].items():
            print(f"{class_name:<15} "
                  f"{class_metrics['ap']:<8.4f} "
                  f"{class_metrics['precision']:<8.4f} "
                  f"{class_metrics['recall']:<8.4f} "
                  f"{class_metrics['f1']:<8.4f} "
                  f"{class_metrics['tp']:<5} "
                  f"{class_metrics['fp']:<5} "
                  f"{class_metrics['fn']:<5}")


In [11]:







def create_evaluation_plots(results, save_dir="evaluation_plots"):
    """Create visualization plots for the evaluation results"""
    os.makedirs(save_dir, exist_ok=True)
    
    # Extract data for plotting
    iou_50_results = results['iou_0.5']['per_class']
    iou_75_results = results['iou_0.75']['per_class']
    
    # Prepare data
    classes = list(iou_50_results.keys())
    ap_50 = [iou_50_results[cls]['ap'] for cls in classes]
    ap_75 = [iou_75_results[cls]['ap'] for cls in classes]
    precision_50 = [iou_50_results[cls]['precision'] for cls in classes]
    recall_50 = [iou_50_results[cls]['recall'] for cls in classes]
    f1_50 = [iou_50_results[cls]['f1'] for cls in classes]
    
    # 1. AP comparison plot
    plt.figure(figsize=(15, 8))
    x = np.arange(len(classes))
    width = 0.35
    
    plt.bar(x - width/2, ap_50, width, label='AP@0.5', alpha=0.8)
    plt.bar(x + width/2, ap_75, width, label='AP@0.75', alpha=0.8)
    
    plt.xlabel('Classes')
    plt.ylabel('Average Precision')
    plt.title('Average Precision per Class (IoU 0.5 vs 0.75)')
    plt.xticks(x, classes, rotation=45, ha='right')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"{save_dir}/ap_comparison.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # 2. Precision-Recall-F1 plot
    plt.figure(figsize=(15, 8))
    x = np.arange(len(classes))
    width = 0.25
    
    plt.bar(x - width, precision_50, width, label='Precision', alpha=0.8)
    plt.bar(x, recall_50, width, label='Recall', alpha=0.8)
    plt.bar(x + width, f1_50, width, label='F1-Score', alpha=0.8)
    
    plt.xlabel('Classes')
    plt.ylabel('Score')
    plt.title('Precision, Recall, and F1-Score per Class (IoU@0.5)')
    plt.xticks(x, classes, rotation=45, ha='right')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"{save_dir}/precision_recall_f1.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # 3. Summary metrics heatmap
    metrics_data = []
    for cls in classes:
        metrics_data.append([
            iou_50_results[cls]['ap'],
            iou_50_results[cls]['precision'],
            iou_50_results[cls]['recall'],
            iou_50_results[cls]['f1']
        ])
    
    plt.figure(figsize=(8, 12))
    sns.heatmap(metrics_data, 
                xticklabels=['AP@0.5', 'Precision', 'Recall', 'F1'],
                yticklabels=classes,
                annot=True, fmt='.3f', cmap='Blues')
    plt.title('Detection Metrics Heatmap')
    plt.tight_layout()
    plt.savefig(f"{save_dir}/metrics_heatmap.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"📊 Plots saved to {save_dir}/")



In [12]:
def save_results_to_csv(results, filename="detection_results.csv"):
    """Save results to CSV file"""
    data = []
    
    for iou_key, metrics in results.items():
        iou_thresh = iou_key.split('_')[1]
        
        # Add overall metrics
        overall = metrics['overall']
        data.append({
            'IoU_Threshold': iou_thresh,
            'Class': 'Overall',
            'mAP': overall['mAP'],
            'AP': overall['mAP'],
            'Precision': overall['precision'],
            'Recall': overall['recall'],
            'F1_Score': overall['f1'],
            'TP': overall['total_tp'],
            'FP': overall['total_fp'],
            'FN': overall['total_fn']
        })
        
        # Add per-class metrics
        for class_name, class_metrics in metrics['per_class'].items():
            data.append({
                'IoU_Threshold': iou_thresh,
                'Class': class_name,
                'mAP': '',
                'AP': class_metrics['ap'],
                'Precision': class_metrics['precision'],
                'Recall': class_metrics['recall'],
                'F1_Score': class_metrics['f1'],
                'TP': class_metrics['tp'],
                'FP': class_metrics['fp'],
                'FN': class_metrics['fn']
            })
    
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"💾 Results saved to {filename}")



In [13]:
def main():
    # Configuration
    MODEL_PATH = "/kaggle/input/resnetyoloobject-detection/pytorch/default/3/best_model_new.pth"  # Path to your trained model
    DATA_PATH = "/kaggle/input/pascal-voc-2007/VOCtest_06-Nov-2007/VOCdevkit/VOC2007"  # Path to VOC dataset
    CONF_THRESHOLD = 0.3
    IoU_THRESHOLDS = [0.5, 0.75, 0.9]
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"🔧 Using device: {device}")
    
    # Load model
    print("📥 Loading trained model...")
    model = load_model(MODEL_PATH, device)
    print("✅ Model loaded successfully!")
    
    # Setup dataset and dataloader
    transform = A.Compose([
        A.Resize(448, 448),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    dataset = VOCDataset(DATA_PATH, transforms=transform)
    dataloader = DataLoader(dataset, batch_size=8, shuffle=False, 
                           collate_fn=lambda x: tuple(zip(*x)))
    
    print(f"📊 Dataset loaded: {len(dataset)} images")
    
    # Run evaluation
    results = evaluate_detection_metrics(
        model, dataloader, device, 
        iou_thresholds=IoU_THRESHOLDS,
        conf_threshold=CONF_THRESHOLD
    )
    
    # Print results
    print_detailed_results(results)
    
    # Create plots
    create_evaluation_plots(results)
    
    # Save to CSV
    save_results_to_csv(results)
    
    # Print summary
    print("\n" + "="*80)
    print("🎯 SUMMARY")
    print("="*80)
    for iou_key, metrics in results.items():
        iou_thresh = iou_key.split('_')[1]
        overall = metrics['overall']
        print(f"IoU@{iou_thresh}: mAP={overall['mAP']:.4f}, "
              f"Precision={overall['precision']:.4f}, "
              f"Recall={overall['recall']:.4f}, "
              f"F1={overall['f1']:.4f}")

if __name__ == "__main__":
    main()

🔧 Using device: cuda
📥 Loading trained model...




✅ Model loaded successfully!
📊 Dataset loaded: 4952 images
🔄 Running evaluation...


Evaluating: 100%|██████████| 619/619 [03:45<00:00,  2.74it/s]



📊 Calculating metrics for IoU threshold: 0.5

📊 Calculating metrics for IoU threshold: 0.75

📊 Calculating metrics for IoU threshold: 0.9

📈 COMPREHENSIVE OBJECT DETECTION EVALUATION RESULTS

🎯 IoU Threshold: 0.5
--------------------------------------------------
📊 Overall Metrics:
   mAP:       0.7629
   Precision: 0.6248
   Recall:    0.3212
   F1-Score:  0.4243
   Total TP:  4810
   Total FP:  2889
   Total FN:  10166

📋 Per-Class Results:
Class           AP       Prec     Rec      F1       TP    FP    FN   
----------------------------------------------------------------------
aeroplane       0.8253   0.6706   0.3666   0.4740   114   56    197  
bicycle         0.9110   0.5860   0.3239   0.4172   126   89    263  
bird            0.6836   0.5833   0.2795   0.3779   161   115   415  
boat            0.4587   0.3960   0.1501   0.2177   59    90    334  
bottle          0.5536   0.3588   0.0715   0.1193   47    84    610  
bus             0.9251   0.7890   0.3386   0.4738   86    23 