In [1]:
import numpy as np
import pandas as pd 
import os
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm

import random
import torch
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
import xml.etree.ElementTree as ET

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from PIL import Image

import albumentations as A 
from albumentations.pytorch.transforms import ToTensorV2

In [None]:
def set_seeds(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seeds();

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
import os
import shutil
import csv
from random import sample
import xml.etree.ElementTree as ET
import cv2  # For reading image dimensions

def copy_images_and_create_xml(input_folder, dest_folder, annotation_csv, annotation_dest_folder, num_images=1300):
    # Ensure the destination folders exist
    os.makedirs(dest_folder, exist_ok=True)
    os.makedirs(annotation_dest_folder, exist_ok=True)

    # Load annotations from CSV into a dictionary
    annotations = {}
    with open(annotation_csv, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            annotations[row['image']] = row

    # Get all files in the input folder that start with "CAT_CAT"
    all_files = [f for f in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, f)) and f.startswith("CAT_CAT")]
    
    # Randomly select 'num_images' from the list of files, ensuring we don't exceed the number available
    selected_files = sample(all_files, min(num_images, len(all_files)))

    # Copy each selected file to the destination folder and create XML
    for file_name in selected_files:
        if file_name in annotations:
            source_path = os.path.join(input_folder, file_name)
            img = cv2.imread(source_path)  # Read the image to get its dimensions
            if img is not None:
                img_height, img_width = img.shape[:2]
                if is_bounding_box_valid(annotations[file_name], img_width, img_height):
                    destination_path = os.path.join(dest_folder, file_name)
                    shutil.copy(source_path, destination_path)
                    create_xml_file(file_name, annotations[file_name], destination_path, annotation_dest_folder)
                else:
                    print("not valid bb")

def is_bounding_box_valid(annotation_data, img_width, img_height):
    xmin = float(annotation_data['x'])
    ymin = float(annotation_data['y'])
    xmax = xmin + float(annotation_data['width'])
    ymax = ymin + float(annotation_data['height'])
    # Normalize coordinates
    xmin /= img_width
    xmax /= img_width
    ymin /= img_height
    ymax /= img_height
    # Check if bounding box is within the image bounds
    return 0 <= xmin < xmax <= 1 and 0 <= ymin < ymax <= 1

def create_xml_file(image_name, annotation_data, image_path, dest_folder):
    base_name = os.path.splitext(image_name)[0]
    
    xmin = int(float(annotation_data['x']))
    ymin = int(float(annotation_data['y']))
    xmax = xmin + int(float(annotation_data['width']))
    ymax = ymin + int(float(annotation_data['height']))
    
    root = ET.Element("annotation")
    ET.SubElement(root, "filename").text = image_name
    ET.SubElement(root, "path").text = image_path
    obj = ET.SubElement(root, "object")
    ET.SubElement(obj, "name").text = "cat"
    bndbox = ET.SubElement(obj, "bndbox")
    ET.SubElement(bndbox, "xmin").text = str(xmin)
    ET.SubElement(bndbox, "ymin").text = str(ymin)
    ET.SubElement(bndbox, "xmax").text = str(xmax)
    ET.SubElement(bndbox, "ymax").text = str(ymax)
    
    tree = ET.ElementTree(root)
    xml_file_path = os.path.join(dest_folder, f"{base_name}.xml")
    tree.write(xml_file_path)

# Example usage
input_folder = '/kaggle/input/cat-faces-detection/result-dataset/result-dataset/data'
dest_folder = '/kaggle/working/imgs/'
annotation_csv = '/kaggle/input/cat-faces-detection/result-dataset/result-dataset/info.csv'
annotation_dest_folder = '/kaggle/working/anno/'
copy_images_and_create_xml(input_folder, dest_folder, annotation_csv, annotation_dest_folder)


In [None]:
IMG_DIR = '/kaggle/input/dog-and-cat-detection/images'
ANN_DIR = '/kaggle/input/dog-and-cat-detection/annotations'


In [None]:
def extract_annotations(annotation_path):
    root = ET.parse(annotation_path).getroot()
    class_name = root.find("./object/name").text
    xmin = int(root.find("./object/bndbox/xmin").text)
    ymin = int(root.find("./object/bndbox/ymin").text)
    xmax = int(root.find("./object/bndbox/xmax").text)
    ymax = int(root.find("./object/bndbox/ymax").text)
    return [ymin, xmin, ymax, xmax], class_name

In [None]:
import os
import xml.etree.ElementTree as ET

def count_labels_in_folder(folder_path):
    cat_count = 0
    dog_count = 0

    # Iterate through each file in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".xml"):
            # Extract annotations from XML file
            annotation_path = os.path.join(folder_path, file_name)
            _, class_name = extract_annotations(annotation_path)

            # Count occurrences of labels "cat" and "dog"
            if class_name == "cat":
                cat_count += 1
            elif class_name == "dog":
                dog_count += 1

    return cat_count, dog_count

# Usage example
folder_path = "/kaggle/input/dog-and-cat-detection/annotations"
cat_count, dog_count = count_labels_in_folder(folder_path)
print("Number of cats:", cat_count)
print("Number of dogs:", dog_count)


In [None]:
# def display_boxes(image_path, boxes, labels):
#     image = cv2.imread(image_path)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    
#     fig, ax = plt.subplots(1, 1, figsize=(16, 8))
#     ax.set_axis_off()

#     alpha_box = 0.4
#     text_height = 25
#     text_width = 20
#     color = (0, 255, 0)

#     for box, label in zip(boxes, labels):
#         output = image.copy()
#         output = cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)

#         overlay_text = image.copy()

#         cv2.rectangle(overlay_text, (box[0], box[1]-7-text_height),
#                       (box[0]+text_width+2, box[1]), color, -1)

#         cv2.addWeighted(overlay_text, alpha_box, output, 1 - alpha_box, 0, output)
#         cv2.putText(output, str(label), (box[0], box[1]-5),
#                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

#     plt.imshow(image) 
#     print(image.shape)   


import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def display_boxes(image_path, bb, class_name):
    # Load the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

    # Create figure and axes
    fig, ax = plt.subplots()

    # Display the image
    ax.imshow(image)

    # Create a Rectangle patch
    rect = patches.Rectangle((bb[1], bb[0]), bb[3] - bb[1], bb[2] - bb[0], linewidth=2, edgecolor='r', facecolor='none')

    # Add the rectangle to the Axes
    ax.add_patch(rect)

    # Add label
    plt.text(bb[1], bb[0] - 10, class_name, color='red', fontsize=15)

    # Remove axis
    plt.axis('off')

    # Show the plot
    plt.show()

In [None]:
# # image_id = 'i.rf.f6f93b07ea662b0526a5bc10b5ff2677'
# # image_path = os.path.join(IMG_DIR, (image_id + '.jpg'))
# # image = cv2.imread(image_path)
# # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
# # boxes = df.loc[df['image_id'] == image_id, ['x_min', 'y_min', 'x_max', 'y_max']].values
# # labels = df.loc[df['image_id'] == image_id, 'class'].values

# img_path = '/kaggle/working/imgs/CAT_CAT_06-00001489_015.jpg'
# bb, class_name =extract_annotations('/kaggle/working/anno/CAT_CAT_06-00001489_015.xml')
# print(bb)
# display_boxes(img_path, bb, class_name)

# Datasets

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
# image_ids = df['image_id'].unique()
# random.shuffle(image_ids)
# train_ids = image_ids[:300]
# valid_ids = image_ids[300:]

add_img= '/kaggle/working/imgs/'
images_list = [image for image in os.listdir(IMG_DIR)]+ [(add_img+ image) for image in os.listdir(add_img)]
np.random.shuffle(images_list)
train_image_list = images_list[:int(0.9*len(images_list))]
test_image_list = images_list[int(0.9*len(images_list)):]
# SHUFFLE THE LIST
np.random.shuffle(train_image_list)
np.random.shuffle(test_image_list)

In [None]:


# train_df = df[df['image_id'].isin(train_ids)]
# valid_df = df[df['image_id'].isin(valid_ids)]
# valid_df.shape, train_df.shape

In [None]:
class CatsDogsDataset(Dataset):

    def __init__(self, image_paths, annotation_dir, transforms=None):
        super().__init__()
        self.image_paths = image_paths
        self.annotation_dir = annotation_dir
        self.transforms = transforms

    def extract_annotations(self, annotation_path):
        root = ET.parse(annotation_path).getroot()
        class_name = root.find("./object/name").text
        xmin = int(root.find("./object/bndbox/xmin").text)
        ymin = int(root.find("./object/bndbox/ymin").text)
        xmax = int(root.find("./object/bndbox/xmax").text)
        ymax = int(root.find("./object/bndbox/ymax").text)
        return [xmin, ymin, xmax, ymax], class_name

    def __getitem__(self, index: int):
        if "working" in self.image_paths[index]:
            image_path = self.image_paths[index]
            annotation_file_name = (image_path.split('/')[-1]).split('.')[0] + '.xml'
            annotation_path = os.path.join('/kaggle/working/anno', annotation_file_name)


        else:
            image_path = IMG_DIR+ '/'+self.image_paths[index]
            annotation_file_name = (image_path.split('/')[-1]).split('.')[0] + '.xml'
            annotation_path = os.path.join(self.annotation_dir, annotation_file_name)

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        original_shape = image.shape
        h, w, _ = image.shape
  



        bb, class_name = self.extract_annotations(annotation_path)
            
        xminn, yminn, xmaxn, ymaxn = bb

        bb_normalized = [xminn / w, yminn / h, xmaxn / w, ymaxn / h]
        if any(coord < 0 or coord > 1 for coord in bb_normalized):
            print(f"Skipping image {image_path} due to invalid bounding box.")
            return None  # or handle the case as needed


  


        # For multi-label scenarios, class index needs adjustment
        if(class_name =="cat"):
            class_index=1
        else:
            class_index= 2
        
        target = {
            'boxes': torch.tensor([bb], dtype=torch.float32),
            'labels': torch.tensor([class_index], dtype=torch.int64),
            'area': torch.tensor([(bb[2] - bb[0]) * (bb[3] - bb[1])], dtype=torch.float32),
            'iscrowd': torch.zeros(1, dtype=torch.int64)
        }

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            
            sample = self.transforms(**sample)
            image = sample['image']
            transformed_bbox = sample['bboxes'][0] if sample['bboxes'] else 'No bboxes'
            

            
            target['boxes'] = torch.tensor(sample['bboxes']).float() if sample['bboxes'] else torch.zeros((0, 4))
            

        return image, target

    def __len__(self):
        return len(self.image_paths)

In [None]:
def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(p=0.5),  # Flip the image horizontally 50% of the time
#         A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.5),  # Randomly apply affine transforms: translate, scale and rotate the image
        A.RandomBrightnessContrast(p=0.2),  # Randomly change brightness and contrast
#         A.GaussianBlur(blur_limit=(3, 5), p=0.2),  # Apply Gaussian Blur to 20% of images
        A.Resize(height=512, width=512, p=1.0),  # Resize all images to 512x512 for consistency
        ToTensorV2(p=1.0)  # Convert the image and its properties to a PyTorch Tensor
    ], bbox_params=A.BboxParams(
    format='pascal_voc', 
    label_fields=['labels'],
    min_visibility=0.3,  # Adjust this to ignore bounding boxes that become too small or go out of frame
    min_area=0,  # Consider setting a minimum area to ensure visibility
))



def get_valid_transform():
    return A.Compose([
        A.Resize(height=512, width=512, p=1.0),  # Resize all images to 512x512 for consistency
        ToTensorV2(p=1.0)  # Ensure validation transformations match training for consistent input size
    ], bbox_params=A.BboxParams(
    format='pascal_voc', 
    label_fields=['labels'],
    min_visibility=0.3,  # Adjust this to ignore bounding boxes that become too small or go out of frame
    min_area=0,  # Consider setting a minimum area to ensure visibility
))



In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
train_dataset = CatsDogsDataset(train_image_list, ANN_DIR, get_train_transform())

valid_dataset = CatsDogsDataset(test_image_list, ANN_DIR, get_valid_transform())

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = DataLoader(
    train_dataset,
    batch_size=10,
    shuffle=True,
    collate_fn=collate_fn,
        num_workers=4  # Adjust based on your system's specification

)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=10,
    shuffle=False,
    collate_fn=collate_fn,
        num_workers=4  # Adjust based on your system's specification

)

# Model

In [None]:
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
# num_classes = 3  # + background

# # get number of input features for the classifier
# in_features = model.roi_heads.box_predictor.cls_score.in_features

# # replace the pre-trained head with a new one
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
import torch.nn as nn

def create_custom_model(num_classes, freeze_layers=True):
    # Load a pre-trained ResNet-50 FPN backbone
    backbone = resnet_fpn_backbone('resnet50', pretrained=True)

    if freeze_layers:
        # Freeze the specified layers
        for name, parameter in backbone.named_parameters():
            # Freeze all except the last two layers (layer3 and layer4 of ResNet-50)
            if 'layer3' not in name and 'layer4' not in name:
                parameter.requires_grad = False

    # Initialize the Faster R-CNN model with the custom backbone
    model = FasterRCNN(backbone, num_classes=num_classes)

    # Replace the pre-trained head with a new one (necessary due to num_classes change)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # Add custom layers to the model to enhance feature extraction
    # Adding a simple FC layer sequence after the ROI pooling step
    additional_layers = nn.Sequential(
        nn.Linear(in_features, 1024),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(1024, 512),
        nn.ReLU(),
        nn.Dropout(0.5)
    )
    # Append these layers to the existing model.roi_heads
    model.roi_heads.additional_layers = additional_layers

    return model

# Configure your model
num_classes = 3  # 1 background 
model = create_custom_model(num_classes)


In [None]:
import torch
torch.cuda.empty_cache()



# Training

In [None]:
torch.cuda.empty_cache()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.00001, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.Adam(params, lr=0.00001, weight_decay=0.0005)


lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)



In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [None]:
import torch
torch.cuda.empty_cache()



In [None]:
# import matplotlib.pyplot as plt

# EPOCHES = 10
# PRINT_STEP = 10
# epoch_losses = []  # List to store average loss per epoch

# loss_hist = Averager()
# itr = 1
    
# for epoch in range(EPOCHES):
#     loss_hist.reset()
#     model.train()
    
#     for data in train_data_loader:
#         if data is None:
#             continue
        
#         images, targets = data

#         images = list(image.to(device) for image in images)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

#         optimizer.zero_grad()
#         loss_dict = model(images, targets)
#         losses = sum(loss for loss in loss_dict.values())
#         loss_value = losses.item()

#         loss_hist.send(loss_value)

#         losses.backward()
#         optimizer.step()

#         if itr % PRINT_STEP == 0:
#             print(f"Iteration #{itr} loss: {loss_value}")

#         itr += 1

#     epoch_avg_loss = loss_hist.value
#     epoch_losses.append(epoch_avg_loss)
#     print(f"Epoch #{epoch} average loss: {epoch_avg_loss}")
    
#     # update the learning rate
#     if lr_scheduler is not None:
#         lr_scheduler.step()
        
#     model_save_path = f'model_epoch_{epoch+1}.pth'
#     torch.save(model.state_dict(), model_save_path)
#     print(f"Model saved to {model_save_path}")
        
        
# plt.figure(figsize=(10, 5))  
# plt.plot(epoch_losses, color='blue')  
# plt.title('Training Loss')  
# plt.xlabel('Epoch')  
# plt.ylabel('Average Loss')  
# plt.show()  

In [None]:
# torch.save(model.state_dict(), 'cats_dogs_detection.pth')

In [None]:
from torchvision.ops import nms

def calculate_iou(box1, box2):
    """Calculate Intersection over Union (IoU) for two given boxes."""
    # Coordinates of the intersection box
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # Area of intersection
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)

    # Area of both boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Area of union
    union_area = box1_area + box2_area - inter_area

    # Compute IoU
    iou = inter_area / union_area if union_area != 0 else 0
    return iou

def apply_nms_and_score_threshold(outputs, score_thresh=0.85, nms_thresh=0.8):
    """Apply score threshold and NMS on model outputs."""
    processed_outputs = []
    for output in outputs:
        mask = output['scores'] > score_thresh
        filtered_boxes = output['boxes'][mask]
        filtered_scores = output['scores'][mask]
        filtered_labels = output['labels'][mask]

        keep = nms(filtered_boxes, filtered_scores, nms_thresh)

        nms_boxes = filtered_boxes[keep]
        nms_scores = filtered_scores[keep]
        nms_labels = filtered_labels[keep]

        processed_outputs.append({
            'boxes': nms_boxes,
            'scores': nms_scores,
            'labels': nms_labels
        })
    return processed_outputs


In [None]:
# def calculate_accuracy_metrics(processed_outputs, targets):
#     true_positives = 0
#     false_positives = 0
#     false_negatives = 0

#     for output, target in zip(processed_outputs, targets):
#         target_boxes = target['boxes'].cpu()
#         target_labels = target['labels'].cpu()
#         output_boxes = output['boxes'].cpu()
#         output_labels = output['labels'].cpu()

#         detected = [False] * len(output_boxes)

#         for target_box, target_label in zip(target_boxes, target_labels):
#             if len(output_boxes) == 0:
#                 # Dacă nu există detectări, toate sunt false negatives
#                 false_negatives += len(target_boxes)
#                 break
#             ious = torch.tensor([calculate_iou(target_box, ob) for ob in output_boxes])
#             max_iou, max_index = ious.max(0)
            
#             if max_iou > 0.5 and target_label == output_labels[max_index]:
#                 true_positives += 1
#                 detected[max_index] = True
#             else:
#                 false_negatives += 1

#         false_positives += sum(not d for d in detected)

#     precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0
#     recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0
#     f1_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

#     return {'precision': precision, 'recall': recall, 'f1_score': f1_score}

def calculate_accuracy_metrics(processed_outputs, targets):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for output, target in zip(processed_outputs, targets):
        target_boxes = target['boxes'].cpu()
        target_labels = target['labels'].cpu()
        output_boxes = output['boxes'].cpu()
        output_labels = output['labels'].cpu()

        detected = [False] * len(output_boxes)

        if len(output_boxes) == 0:
            # Dacă nu există detectări, toate targeturile sunt false negatives
            false_negatives += len(target_boxes)
            continue  # Sari la următorul batch

        for target_box, target_label in zip(target_boxes, target_labels):
            # Calculăm IoU pentru cutia target cu toate detectările
            ious = torch.tensor([calculate_iou(target_box, ob) for ob in output_boxes])
            if ious.numel() == 0:
                # Dacă nu există IoU calculat, continuăm la următoarea cutie target
                false_negatives += 1
                continue
            
            max_iou, max_index = ious.max(0)
            if max_iou > 0.5 and target_label == output_labels[max_index]:
                # True positive: detectare corectă a clasei și IoU suficient
                true_positives += 1
                detected[max_index] = True
            else:
                # False negative: nicio detectare suficientă pentru această cutie target
                false_negatives += 1

        # False positives: detectările care nu au fost asociate cu niciun target
        false_positives += sum(not d for d in detected)

    precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0
    f1_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

    return {'precision': precision, 'recall': recall, 'f1_score': f1_score}



In [None]:
import torch
torch.cuda.empty_cache()



In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_1 (1).pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_2.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_3.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_4.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_5.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_6.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_7.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_8.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_9.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


In [None]:
model = create_custom_model(3)
model.load_state_dict(torch.load('/kaggle/input/modele-epoci/model_epoch_10.pth'))



In [None]:
import torch
import torchmetrics

# Initialize the accuracy metric

model.to(device)
model.eval()  # Set model to training mode to calculate loss

num_images_to_process = 10
images_processed = 0

all_metrics = []
with torch.no_grad():
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        outputs = model(images)  # Presupunem că modelul returnează ieșiri brute
        processed_outputs = apply_nms_and_score_threshold(outputs)

        # Calculăm metricile
        metrics = calculate_accuracy_metrics(processed_outputs, targets)
        all_metrics.append(metrics)

# Calculăm media metricilor peste toate batch-urile
mean_metrics = {k: sum(m[k] for m in all_metrics) / len(all_metrics) for k in all_metrics[0]}
print("Average Precision:", mean_metrics['precision'])
print("Average Recall:", mean_metrics['recall'])
print("Average F1 Score:", mean_metrics['f1_score'])


**Test**

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

# Set the model to evaluation mode
model.to(device)  # where device is either 'cuda' or 'cpu'
model.eval()


# Select a few images to display
num_images_to_display = 15
images_processed = 0

with torch.no_grad():  # Turn off gradients to speed up this part
    for images, targets in valid_data_loader:
        images = list(img.to(device) for img in images)
        outputs = model(images)  # Forward pass

        for i in range(len(images)):
            # Move image data to CPU for visualization
            image_np = images[i].permute(1, 2, 0).cpu().numpy()
            image_np = np.clip(image_np, 0, 1)  # Ensure image has correct format and range

            fig, ax = plt.subplots(1)
            ax.imshow(image_np)

            # Get the predicted boxes, labels, and scores
            pred_boxes = outputs[i]['boxes'].cpu().numpy()
            pred_labels = outputs[i]['labels'].cpu().numpy()
            pred_scores = outputs[i]['scores'].cpu().numpy()

            # Draw each box on the image along with the label
            for box, label, score in zip(pred_boxes, pred_labels, pred_scores):
                if score > 0.5:  # Threshold to filter out lower score boxes
                    rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=1, edgecolor='r', facecolor='none')
                    ax.add_patch(rect)
                    ax.text(box[0], box[1], f'{label} ({score:.2f})', bbox=dict(facecolor='white', alpha=0.5))

            plt.axis('off')
            plt.show()

            images_processed += 1
            if images_processed >= num_images_to_display:
                break
        if images_processed >= num_images_to_display:
            break


# Visualization

In [None]:
images, targets = next(iter(valid_data_loader))

images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [None]:
# Example
img_index = 1

sample = images[1].permute(1,2,0).cpu().numpy()
boxes = targets[1]['boxes'].cpu().numpy().astype(np.int32)
labels = targets[1]['labels'].cpu().numpy().astype(np.int32)

display_boxes(sample, boxes, labels)