In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import io
from PIL import Image
from tqdm import tqdm
import glob
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.optim as optim
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class SingleFileDetectionDataset(Dataset):
    def __init__(self, file_path):
        self.df = pd.read_parquet(file_path)
        self.transform = transforms.Compose([
            transforms.ToTensor(),
        ])
        
        # Define the class mapping
        self.class_mapping = {
            0: 0,  # background
            1: 1,  # person
            2: 2,  # bicycle
            3: 3,  # car
            4: 4,  # motorcycle
            75: 5, # clock
            68: 6, # cell phone
            10: 7, # traffic light
            12: 8, # stop sign
            6: 9,  # bus
            44: 10 # knife
        }

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # Decode the PNG image data
        image = Image.open(io.BytesIO(row['image']))
        
        # Convert to RGB if it's not already
        if image.mode != 'RGB':
            image = image.convert('RGB')
        
        # Apply the transform to convert to tensor
        image = self.transform(image)

        boxes = []
        labels = []
        for ann in row['annotations']:
            # Extract bounding box coordinates
            x, y, w, h = ann['bbox']
            
            # Convert to (x1, y1, x2, y2) format and ensure positive width and height
            x1 = round(x)
            y1 = round(y)
            x2 = round(x + max(1, w))  # Ensure width is at least 1
            y2 = round(y + max(1, h))  # Ensure height is at least 1
            
            # Append the corrected bounding box
            boxes.append([x1, y1, x2, y2])
            
            # Remap the class ID
            original_class_id = ann['category_id']
            remapped_class_id = self.class_mapping.get(original_class_id, 0)  # Default to background if not found
            labels.append(remapped_class_id)
        
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
        }
        return image, target

# The rest of the code remains the same

def collate_fn(batch):
    return tuple(zip(*batch))

def load_model(num_classes):
    model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True,trainable_backbone_layers=6)
    
    # Replace the classifier with a new one for our number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

def calculate_iou(box1, box2):
    # Calculate IoU between two bounding boxes
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection

    return intersection / union if union > 0 else 0

def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    num_batches = 0
    total_iou = 0
    num_boxes = 0
    
    for images, targets in tqdm(dataloader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
        num_batches += 1

        # Calculate IoU
        with torch.no_grad():
            predictions = model(images)
            for pred, target in zip(predictions, targets):
                pred_boxes = pred['boxes']
                true_boxes = target['boxes']
                for pred_box in pred_boxes:
                    ious = [calculate_iou(pred_box.cpu(), true_box.cpu()) for true_box in true_boxes]
                    if ious:
                        total_iou += max(ious)
                        num_boxes += 1
    
    avg_loss = total_loss / num_batches
    avg_iou = total_iou / num_boxes if num_boxes > 0 else 0
    return avg_loss, avg_iou

def main():
    num_classes = 11  # Number of classes in filtered_classes
    num_epochs = 40  # Increase total number of epochs
    batch_size = 4
    learning_rate = 0.0001
    start_epoch = 1  # Start from the 10th epoch
    
    # Load the model
    model = load_model(num_classes)
    
    # Load the saved state dict from the 10th epoch
    checkpoint_path = f'fasterrcnn_mobilenet_v3_epoch_{start_epoch}.pth'
    if os.path.exists(checkpoint_path):
        model.load_state_dict(torch.load(checkpoint_path))
        print(f"Loaded checkpoint from {checkpoint_path}")
    else:
        print(f"No checkpoint found at {checkpoint_path}. Starting from scratch.")
        start_epoch = 0

    model = model.to(device)
    
    # Setup optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=learning_rate)
    
    # Get list of all parquet files
    all_files = glob.glob('./filter 640/filtered_dataset_train_640_*.parquet')
    
    # Training loop
    for epoch in range(start_epoch, num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        epoch_loss = 0
        epoch_iou = 0
        
        for file_path in all_files:
            print(f"Training on file: {file_path}")
            # Load dataset for current file
            train_dataset = SingleFileDetectionDataset(file_path)
            train_dataloader = DataLoader(
                train_dataset, 
                batch_size=batch_size, 
                shuffle=True, 
                num_workers=2, 
                collate_fn=collate_fn
            )
            
            # Train on current file
            loss, iou = train_one_epoch(model, train_dataloader, optimizer, device)
            epoch_loss += loss
            epoch_iou += iou
            print(f"File Loss: {loss:.4f}, File IoU: {iou:.4f}")
        
        avg_epoch_loss = epoch_loss / len(all_files)
        avg_epoch_iou = epoch_iou / len(all_files)
        print(f"Epoch Average Loss: {avg_epoch_loss:.4f}, Epoch Average IoU: {avg_epoch_iou:.4f}")
        
        # Save checkpoint
        torch.save(model.state_dict(), f'fasterrcnn_mobilenet_v3_epoch_{epoch+1}.pth')

if __name__ == "__main__":
    main()

Using device: cuda


In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import io
from PIL import Image
from tqdm import tqdm
import glob
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_320_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.optim as optim
import matplotlib.pyplot as plt
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from sklearn.metrics import precision_recall_curve, average_precision_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ... [Keep your existing SingleFileDetectionDataset, collate_fn, and load_model functions] ...

def evaluate(model, dataloader, device):
    model.eval()
    metric = MeanAveragePrecision()
    all_preds = []
    all_targets = []
    print(len(dataloader))
    
    with torch.no_grad():
        for images, targets in tqdm(dataloader):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)
            
            for i in range(len(outputs)):
                all_preds.append({
                    'boxes': outputs[i]['boxes'].cpu(),
                    'scores': outputs[i]['scores'].cpu(),
                    'labels': outputs[i]['labels'].cpu()
                })
                all_targets.append({
                    'boxes': targets[i]['boxes'].cpu(),
                    'labels': targets[i]['labels'].cpu()
                })
    
    metric.update(all_preds, all_targets)
    results = metric.compute()
    
    return results, all_preds, all_targets

def plot_precision_recall_curve(precisions, recalls, ap, class_id):
    plt.figure(figsize=(8, 6))
    plt.plot(recalls, precisions, label=f'Precision-Recall curve (AP = {ap:.2f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve for Class {class_id}')
    plt.legend()
    plt.savefig(f'precision_recall_curve_class_{class_id}.png')
    plt.close()

def main():
    num_classes = 11
    batch_size = 2
    
    # Load the trained model
    model = load_model(num_classes)
    model.load_state_dict(torch.load('./run 2/fasterrcnn_mobilenet_v3_epoch_38.pth'))  # Load the last epoch's weights
    model = model.to(device)
    model.eval()
    
    # Load test dataset
    test_dataset = SingleFileDetectionDataset('/home/muhammadfasi/Downloads/FYP/scripts/testing/filtered_test_256.parquet')
    test_dataloader = DataLoader(
        test_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=2, 
        collate_fn=collate_fn
    )
    
    # Evaluate the model
    results, all_preds, all_targets = evaluate(model, test_dataloader, device)
    # Print mAP results
    print(f"mAP: {results['map']:.4f}")
    print(f"mAP (small): {results['map_small']:.4f}")
    print(f"mAP (medium): {results['map_medium']:.4f}")
    print(f"mAP (large): {results['map_large']:.4f}")
    
    # Calculate and plot precision-recall curve for each class
    for class_id in range(1, num_classes):  # Exclude background class
        y_true = []
        y_scores = []
        
        for pred, target in zip(all_preds, all_targets):
            pred_masks = pred['labels'] == class_id
            target_masks = target['labels'] == class_id
            
            y_true.extend(target_masks.tolist())
            y_scores.extend(pred['scores'][pred_masks].tolist())
        
        if len(y_true) > 0 and len(y_scores) > 0:
            precisions, recalls, thresholds = precision_recall_curve(y_true, y_scores)
            ap = average_precision_score(y_true, y_scores)
            plot_precision_recall_curve(precisions, recalls, ap, class_id)
    
    # Plot mAP
    plt.figure(figsize=(10, 6))
    plt.bar(['Overall', 'Small', 'Medium', 'Large'], 
            [results['map'], results['map_small'], results['map_medium'], results['map_large']])
    plt.title('Mean Average Precision (mAP)')
    plt.ylabel('mAP')
    plt.savefig('map_results.png')
    plt.close()

if __name__ == "__main__":
    main()

Using device: cuda




1591


100%|██████████| 1591/1591 [00:33<00:00, 47.79it/s]


mAP: 0.2244
mAP (small): 0.0910
mAP (medium): 0.3568
mAP (large): 0.4584


ValueError: Found input variables with inconsistent numbers of samples: [15853, 34083]