In [5]:
print("start import")
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchmetrics.detection import IntersectionOverUnion
from torchmetrics.detection import MeanAveragePrecision
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import cv2
import numpy as np
from tqdm import tqdm
import warnings
print("end import")

warnings.filterwarnings('ignore')
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'
torch.backends.cudnn.benchmark = True

class DigitDataset:
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.images = [f for f in sorted(os.listdir(image_dir)) if f.endswith(('.jpg', '.jpeg', '.png'))]
        print(f"Found {len(self.images)} images")
        
    def __getitem__(self, idx):
        try:
            image_path = os.path.join(self.image_dir, self.images[idx])
            image = cv2.imread(image_path)
            if image is None:
                raise ValueError(f"Failed to load image: {image_path}")
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            # Apply CLAHE preprocessing
            lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
            l, a, b = cv2.split(lab)
            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
            cl = clahe.apply(l)
            enhanced = cv2.merge((cl,a,b))
            image = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
            
            label_path = os.path.join(self.label_dir, self.images[idx].replace('.jpg', '.txt'))
            boxes = []
            labels = []
            
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    class_id, x, y, w, h = map(float, line.strip().split())
                    adjusted_class_id = int(class_id) + 1
                    
                    x1 = max(0, (x - w/2) * image.shape[1])
                    y1 = max(0, (y - h/2) * image.shape[0])
                    x2 = min(image.shape[1], (x + w/2) * image.shape[1])
                    y2 = min(image.shape[0], (y + h/2) * image.shape[0])
                    
                    if x2 <= x1 or y2 <= y1 or (x2 - x1) < 6 or (y2 - y1) < 6:
                        continue
                    
                    boxes.append([x1, y1, x2, y2])
                    labels.append(adjusted_class_id)
            
            if not boxes:
                return self.__getitem__((idx + 1) % len(self))
            
            boxes = np.array(boxes, dtype=np.float32)
            labels = np.array(labels, dtype=np.int64)
            
            if self.transforms:
                transformed = self.transforms(
                    image=image,
                    bboxes=boxes,
                    class_labels=labels
                )
                image = transformed['image']
                boxes = transformed['bboxes']
                labels = transformed['class_labels']
            
            target = {
                'boxes': torch.as_tensor(boxes, dtype=torch.float32),
                'labels': torch.as_tensor(labels, dtype=torch.int64)
            }
            
            return image, target
        except Exception as e:
            print(f"Error processing image {self.images[idx]}: {str(e)}")
            return self.__getitem__((idx + 1) % len(self))

    def __len__(self):
        return len(self.images)

def get_transform():
    return A.Compose([
        A.Resize(640, 640, always_apply=True),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ], bbox_params=A.BboxParams(
        format='pascal_voc',
        label_fields=['class_labels'],
        min_visibility=0.6
    ))


def evaluate_model(model, data_loader, device, confidence_threshold=0.5):
    model.eval()
    IOU_metric = IntersectionOverUnion(class_metrics=True)
    MAP_metric = MeanAveragePrecision(iou_type="bbox", box_format ="xyxy")
    
    preds_list = []
    target_list = []
    print("\nEvaluating model...")
    with torch.no_grad():
        for images, targets in tqdm(data_loader):
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            outputs = model(images)
            #print("out: ", len(outputs))
            #print("targ: ", len(targets))
            for pred, target in zip(outputs, targets):
                
                
                pred_boxes = pred['boxes'].cpu()
                pred_labels = pred['labels'].cpu()
                pred_scores = pred['scores'].cpu()
                
                # Filter predictions by confidence threshold
                mask = pred_scores > confidence_threshold
                pred_boxes = pred_boxes[mask]
                pred_labels = pred_labels[mask]
                pred_scores = pred_scores[mask]
                
                
                
                
                pred_dict = {
                    'boxes': pred_boxes,
                    'labels': pred_labels,
                    'scores': pred_scores
                }
                
                target_boxes = target['boxes'].cpu()
                target_labels = target['labels'].cpu()
                
                target_dict = {
                    'boxes': target_boxes,
                    'labels': target_labels
                }
                #print()
                #print("predL: ", pred_labels)
                #print("targL: ", target_labels)
                #print("predB: ", pred_boxes)
                #print("targB: ", target_boxes)
                #print()
                
                preds_list.append(pred_dict)
                target_list.append(target_dict)
            
            
    
    print("Len pred", len(preds_list))
    print("Len target", len(target_list))
    IOU_metric(preds_list, target_list)
    MAP_metric(preds_list, target_list)
    
    
    return IOU_metric, MAP_metric

def save_predictions_to_txt(model, data_loader, device, output_dir, confidence_threshold=0.5):
    os.makedirs(output_dir, exist_ok=True)
    
    model.eval()
    print("\nSaving predictions...")
    
    current_idx = 0  # Keep track of global image index
    with torch.no_grad():
        for images, targets in tqdm(data_loader):
            images = [image.to(device) for image in images]
            outputs = model(images)
            
            for pred in outputs:
                # Get image filename using global index
                image_filename = data_loader.dataset.images[current_idx]
                output_filename = os.path.join(output_dir, os.path.splitext(image_filename)[0] + '.txt')
                
                # Filter predictions by confidence
                pred_boxes = pred['boxes'].cpu().numpy()
                pred_scores = pred['scores'].cpu().numpy()
                pred_labels = pred['labels'].cpu().numpy()
                
                mask = pred_scores > confidence_threshold
                pred_boxes = pred_boxes[mask]
                pred_labels = pred_labels[mask]
                
                # Save predictions in YOLO format
                with open(output_filename, 'w') as f:
                    for box, label in zip(pred_boxes, pred_labels):
                        # Convert Pascal VOC format to YOLO format
                        x1, y1, x2, y2 = box
                        width = x2 - x1
                        height = y2 - y1
                        x_center = x1 + width / 2
                        y_center = y1 + height / 2
                        
                        # Normalize coordinates by image size (640x640)
                        x_center /= 640
                        y_center /= 640
                        width /= 640
                        height /= 640
                        f.write(f"{label-1} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
                
                current_idx += 1  # Increment global index after each image
    
    print(f"\nPredictions saved to {output_dir}")

def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    model = fasterrcnn_resnet50_fpn_v2(
        weights='DEFAULT',
        box_score_thresh=0.01,
        box_nms_thresh=0.45,
        box_detections_per_img=3,
        rpn_pre_nms_top_n_train=2000,
        rpn_post_nms_top_n_train=1000,
        rpn_pre_nms_top_n_test=1000,
        rpn_post_nms_top_n_test=500,
        rpn_score_thresh=0.01,
        rpn_nms_thresh=0.7,
        min_size=1024,
        max_size=1600
    )
    
    num_classes = 11
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    print("Loading model checkpoint...")
    checkpoint = torch.load('best_model.pth', map_location=device, weights_only=True)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    
    val_dataset = DigitDataset(
        image_dir='data/val/images',
        label_dir='data/val/labels',
        transforms=get_transform()
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=2,
        collate_fn=lambda x: tuple(zip(*x)),
        pin_memory=True,
        persistent_workers=True
    )
    
    IOU_metric, MAP_metric = evaluate_model(model, val_loader, device)
    mean_iou = IOU_metric.compute()['iou'].item()
    mAP = MAP_metric.compute()['map_50'].item()
    print("\nEvaluation Results:")
    print(f'Mean IoU: {mean_iou:.4f}')
    print(f'mAP @ 0.5: {mAP:.4f}')
    print(f'Number of validation samples: {len(val_dataset)}')
    
    save_predictions_to_txt(model, val_loader, device, 'data/output/labels')

if __name__ == '__main__':
    main()

start import
end import
Using device: cuda
Loading model checkpoint...
Found 762 images

Evaluating model...


100%|██████████| 96/96 [00:18<00:00,  5.19it/s]


Len pred 762
Len target 762

Evaluation Results:
Mean IoU: 0.7033
mAP @ 0.5: 0.9525
Number of validation samples: 762

Saving predictions...


100%|██████████| 96/96 [00:17<00:00,  5.36it/s]


Predictions saved to data/output/labels



