## Packages

In [None]:
import torch
import time
import supervision as sv
from transformers import DetrForObjectDetection, DetrImageProcessor, DetrConfig
from torch.utils.data import DataLoader
import os
import torchvision
import shapely.geometry as sg
import random
import cv2
import numpy as np
import time
from tqdm import tqdm
import csv
import matplotlib.pyplot as plt
import random

In [None]:
from pytorch_lightning import Trainer
import pytorch_lightning as pl

## Data

We take the data and annotations, whose only label is "lesion", and train a valid DETR model to detect them.


In [None]:
TYPE="_Balanced"
#TYPE=""

ANNOTATION_FILE_NAME = "annotation_uniclass.json"

if 'uniclass' in ANNOTATION_FILE_NAME:
    MODEL_PATH = 'DETR-uniclass'
else:
    MODEL_PATH = 'DETR'

In [None]:
dataset = './Procesado'+TYPE+'/'

TRAIN_DIRECTORY = os.path.join(dataset, "train/images/")
VAL_DIRECTORY = os.path.join(dataset, "valid/images/")
TEST_DIRECTORY = os.path.join(dataset, "test/images/")

class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(
        self,
        image_directory_path: str,
        image_processor,
        train: bool = True
    ):
        annotation_file_path = os.path.join(image_directory_path, ANNOTATION_FILE_NAME)
        super(CocoDetection, self).__init__(image_directory_path, annotation_file_path)
        self.image_processor = image_processor

    def __getitem__(self, idx):
        images, annotations = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        annotations = {'image_id': image_id, 'annotations': annotations}
        encoding = self.image_processor(images=images, annotations=annotations, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze()
        target = encoding["labels"][0]

        return pixel_values, target

image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
TRAIN_DATASET = CocoDetection(image_directory_path=TRAIN_DIRECTORY, image_processor=image_processor, train=True)
VAL_DATASET = CocoDetection(image_directory_path=VAL_DIRECTORY, image_processor=image_processor, train=False)
TEST_DATASET = CocoDetection(image_directory_path=TEST_DIRECTORY, image_processor=image_processor, train=False)

print("Number of training examples:", len(TRAIN_DATASET))
print("Number of validation examples:", len(VAL_DATASET))
print("Number of test examples:", len(TEST_DATASET))

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Number of training examples: 1605
Number of validation examples: 810
Number of test examples: 107


In [None]:
image_ids = TRAIN_DATASET.coco.getImgIds()
image_id = random.choice(image_ids)
print('Image #{}'.format(image_id))

# load image and annotatons
image = TRAIN_DATASET.coco.loadImgs(image_id)[0]
annotations = TRAIN_DATASET.coco.imgToAnns[image_id]
image_path = os.path.join(TRAIN_DATASET.root, image['file_name'])
image_path = os.path.join(TRAIN_DATASET.root, image['file_name'])
image = cv2.imread(image_path)

# annotate
detections = sv.Detections.from_coco_annotations(coco_annotation=annotations)

# we will use id2label function for training
categories = TRAIN_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}

labels = [
    f"{id2label[class_id]}"
    for _, _, class_id, _
    in detections
]

box_annotator = sv.BoxAnnotator()
frame = box_annotator.annotate(scene=image, detections=detections, labels=labels)

%matplotlib inline
sv.show_frame_in_notebook(image, (8, 8))

## Model

In [None]:
def collate_fn(batch):
    pixel_values = [item[0] for item in batch]
    encoding = image_processor.pad(pixel_values, return_tensors="pt")
    labels = [item[1] for item in batch]
    return {
        'pixel_values': encoding['pixel_values'],
        'pixel_mask': encoding['pixel_mask'],
        'labels': labels
    }

TRAIN_DATALOADER = DataLoader(dataset=TRAIN_DATASET, collate_fn=collate_fn, batch_size=8, num_workers=16,shuffle=True)
VAL_DATALOADER = DataLoader(dataset=VAL_DATASET, collate_fn=collate_fn, batch_size=8, num_workers=16)
TEST_DATALOADER = DataLoader(dataset=TEST_DATASET, collate_fn=collate_fn, batch_size=8)

In [None]:
class Detr(pl.LightningModule):
    def __init__(self, lr, lr_backbone, weight_decay):
        super().__init__()
        # replace COCO classification head with custom head
        # we specify the "no_timm" variant here to not rely on the timm library
        # for the convolutional backbone
        self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50",
                                                             revision="no_timm",
                                                             num_labels=len(id2label),
                                                             ignore_mismatched_sizes=True)
         # see https://github.com/PyTorchLightning/pytorch-lightning/pull/1896
        self.lr = lr
        self.lr_backbone = lr_backbone
        self.weight_decay = weight_decay

    def forward(self, pixel_values, pixel_mask):
        outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

        return outputs

    def common_step(self, batch, batch_idx):
        pixel_values = batch["pixel_values"]
        pixel_mask = batch["pixel_mask"]
        labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

        outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)

        loss = outputs.loss
        loss_dict = outputs.loss_dict

        return loss, loss_dict

    def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        # logs metrics for each training_step,
        # and the average across the epoch
        self.log("training_loss", loss)
        for k,v in loss_dict.items():
            self.log("train_" + k, v.item())

        return loss

    def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log("validation_loss", loss)
        for k,v in loss_dict.items():
            self.log("validation_" + k, v.item())

        return loss

    def configure_optimizers(self):
        param_dicts = [
              {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
              {
                  "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                  "lr": self.lr_backbone,
              },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                  weight_decay=self.weight_decay)

        return optimizer

    def train_dataloader(self):
        return TRAIN_DATALOADER

    def val_dataloader(self):
        return VAL_DATALOADER

In [None]:
model = Detr(lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4)

batch = next(iter(TRAIN_DATALOADER))
outputs = model(pixel_values=batch['pixel_values'], pixel_mask=batch['pixel_mask'])

Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

In [None]:
trainer = Trainer(max_steps=10000, gradient_clip_val=0.1)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
inicio = time.time()
trainer.fit(model)
print("Entrenamiento time(min.): ", round((time.time()-inicio)/60,0))

You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: /home/eva/Clean_Articulo/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                   | Params
-------------------------------------------------
0 | model | DetrForObjectDetection | 41.5 M
-------------------------------------------------
18.0 M    Trainable params
23.5 M    Non-trainable params
41.5 M    Total params
166.007   Total estimated model params size (MB)


Sanity Checking: |                                                                      | 0/? [00:00<?, ?it/s]

/home/eva/anaconda3/lib/python3.9/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 8. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Training: |                                                                             | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

/home/eva/anaconda3/lib/python3.9/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

Validation: |                                                                           | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=10000` reached.


Entrenamiento time(min.):  86.0
472030:28:25


In [None]:
model.model.save_pretrained('./DETR_Results/'+MODEL_PATH+TYPE)

In [None]:
# loading model
model = DetrForObjectDetection.from_pretrained('./DETR_Results/'+MODEL_PATH+TYPE)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

DetrForObjectDetection(
  (model): DetrModel(
    (backbone): DetrConvModel(
      (conv_encoder): DetrConvEncoder(
        (model): ResNetBackbone(
          (embedder): ResNetEmbeddings(
            (embedder): ResNetConvLayer(
              (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
              (normalization): DetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
            (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          )
          (encoder): ResNetEncoder(
            (stages): ModuleList(
              (0): ResNetStage(
                (layers): Sequential(
                  (0): ResNetBottleNeckLayer(
                    (shortcut): ResNetShortCut(
                      (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                      (normalization): DetrFrozenBatchNorm2d()
                    )
                    (layer): Seq

In [None]:
def calculate_iou_matrix(boxes_true, boxes_pred):
    iou_matrix = np.zeros((len(boxes_true), len(boxes_pred)))
    i=1
    for i, box_true in enumerate(boxes_true):
        y_true = sg.box(boxes_true[0], boxes_true[1], boxes_true[0] + boxes_true[2], boxes_true[1] + boxes_true[3])
        for j, box_pred in enumerate(boxes_pred):
            x_pred, y_pred, xx_pred, yy_pred = box_pred
            y_pred2 = sg.box(x_pred, y_pred, xx_pred, yy_pred)
            intersection_area = y_true.intersection(y_pred2).area
            union_area = y_true.union(y_pred2).area
            iou = intersection_area / union_area
            iou_matrix[i, j] = iou
    return iou_matrix

In [None]:
random.seed(123)
CONFIDENCE_TRESHOLD = 0.1
# utils
categories = TEST_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}
box_annotator = sv.BoxAnnotator()

# select random image
image_ids = TEST_DATASET.coco.getImgIds()
image_id = random.choice(image_ids)
print('Image #{}'.format(image_id))

# load image and annotatons
image = TEST_DATASET.coco.loadImgs(image_id)[0]
annotations = TEST_DATASET.coco.imgToAnns[image_id]
image_path = os.path.join(TEST_DATASET.root, image['file_name'])
image = cv2.imread(image_path)

# Annotate ground truth
gt_detections = sv.Detections.from_coco_annotations(coco_annotation=annotations)
labels = [f"{id2label[class_id]}" for _, _, class_id, _ in gt_detections]
frame_ground_truth = box_annotator.annotate(scene=image.copy(), detections=gt_detections, labels=labels)

true_boxes=annotations[0]['bbox']

# Annotate detections
with torch.no_grad():

    # load image and predict
    inputs = image_processor(images=image, return_tensors='pt').to(device)
    outputs = model(**inputs)

    # post-process
    target_sizes = torch.tensor([image.shape[:2]]).to(device)
    results = image_processor.post_process_object_detection(
        outputs=outputs,
        threshold=CONFIDENCE_TRESHOLD,
        target_sizes=target_sizes
    )[0]

    if len(results['boxes']) != 0:
        pred_boxes = results['boxes'].cpu().data.numpy().astype(np.float16)
        scores = results['scores'].cpu().data.numpy()

        iou = calculate_iou_matrix(true_boxes,pred_boxes)
        iou_max = np.argmax(iou[0])
        print(iou[0])
        print('IoU: ',round(iou[0][iou_max],3))
        best_result={'scores':torch.tensor([results['scores'][iou_max].item()],device='cuda:0'),
                     'labels':torch.tensor([results['labels'][iou_max].item()], device='cuda:0'),
                     'boxes' :torch.tensor([results['boxes'][iou_max].tolist()], device='cuda:0')}
        detections = sv.Detections.from_transformers(transformers_results=best_result)
        labels = [f"{id2label[class_id]} {confidence:.2f}" for _, confidence, class_id, _ in detections]
        frame_detections = box_annotator.annotate(scene=image.copy(), detections=detections, labels=labels)

        # Combine both images side by side and display
        fig, axs = plt.subplots(1, 2, figsize=(20, 10))
        axs[0].imshow(cv2.cvtColor(frame_ground_truth, cv2.COLOR_BGR2RGB))
        axs[0].axis('off')
        axs[0].set_title('Ground Truth')

        axs[1].imshow(cv2.cvtColor(frame_detections, cv2.COLOR_BGR2RGB))
        axs[1].axis('off')
        axs[1].set_title('Detections')
        plt.show()
    else:
        print("Sin detetección...")

## Predictions

In [None]:
#Save the bbox test predictions
try:
    os.makedirs('./data'+TYPE+'/DETR') #For ViT
    os.makedirs('./data'+TYPE+'/DETR/test')
    os.makedirs('./data'+TYPE+'/DETR/test/Cáncer')
    os.makedirs('./data'+TYPE+'/DETR/test/Control')
    os.makedirs('./Procesado'+TYPE+'/test/DETR') #For SAM
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

In [None]:
random.seed(123)
inicio=time.time()

CONFIDENCE_TRESHOLD = 0.1

writepredsdetectDict = [] # Dict Results

categories = TEST_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}
box_annotator = sv.BoxAnnotator()
image_ids = TEST_DATASET.coco.getImgIds()

for image_id in tqdm(image_ids):
    # load image and annotatons
    image = TEST_DATASET.coco.loadImgs(image_id)[0]
    annotations = TEST_DATASET.coco.imgToAnns[image_id]
    image_name=image['file_name']
    image_path = os.path.join(TEST_DATASET.root, image['file_name'])
    image = cv2.imread(image_path)

    # Annotate ground truth
    gt_detections = sv.Detections.from_coco_annotations(coco_annotation=annotations)
    gt_labels = [f"{id2label[class_id]}" for _, _, class_id, _ in gt_detections]
    true_boxes=annotations[0]['bbox']

    # Annotate detections
    with torch.no_grad():

        # load image and predict
        inputs = image_processor(images=image, return_tensors='pt').to(device)
        outputs = model(**inputs)

    # post-process
    target_sizes = torch.tensor([image.shape[:2]]).to(device)
    results = image_processor.post_process_object_detection(outputs=outputs,
                                                            threshold=CONFIDENCE_TRESHOLD,
                                                            target_sizes=target_sizes)[0]

    if len(results['boxes']) != 0:
        pred_boxes = results['boxes'].cpu().data.numpy().astype(np.float16)
        scores = results['scores'].cpu().data.numpy()

        iou = calculate_iou_matrix(true_boxes,pred_boxes)[0]
        iou_max = np.argmax(iou)

        best_result={'scores':torch.tensor([results['scores'][iou_max].item()],device='cuda:0'),
                         'labels':torch.tensor([results['labels'][iou_max].item()], device='cuda:0'),
                         'boxes' :torch.tensor([results['boxes'][iou_max].tolist()], device='cuda:0')}
        detections = sv.Detections.from_transformers(transformers_results=best_result)
        labels = [f"{id2label[class_id]} {confidence:.2f}" for _, confidence, class_id, _ in detections]

        #---------We save the result for segmentation---------
        #We cut ROI
        bbox=results['boxes'][iou_max].tolist()
        orig_image=image.copy()
        my_mask=np.zeros((orig_image.shape[0],orig_image.shape[1]),dtype=np.uint8)
        my_mask[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]=255
        my_mask=cv2.cvtColor(my_mask, cv2.COLOR_GRAY2BGR)
        # Apply the mask to the image
        result = cv2.bitwise_and(orig_image, my_mask)

        if 'C' in image_name:
            new_dir='./data'+TYPE+'/DETR/test/Cáncer/'
        else:
            new_dir='./data'+TYPE+'/DETR/test/Control/'
        cv2.imwrite(new_dir+str(image_name), result)
        time.sleep(2)
        cv2.imwrite('./Procesado'+TYPE+'/test/DETR/'+str(image_name), result)
        time.sleep(2)

        writepredsdetectDict.append({'modelo':MODEL_PATH,
                                             'imagen':image_name,
                                             'set':'Test',
                                             'clase':gt_labels,
                                             'true_box':true_boxes,
                                             'predicion':bbox,
                                             'IoU': round(iou[iou_max],3),
                                             'etiqueta':labels,
                                             'score':round(results['scores'][iou_max].item(),3)
                                    })
    else:
        orig_image=image.copy()
        if 'C' in image_name:
            new_dir='./data'+TYPE+'/DETR/test/Cáncer/'
        else:
            new_dir='./data'+TYPE+'/DETR/test/Control/'

        cv2.imwrite(new_dir+str(image_name), orig_image)
        time.sleep(2)
        cv2.imwrite('./Procesado'+TYPE+'/test/DETR/'+str(image_name), orig_image)
        time.sleep(2)
        #-----------We store data---------
        writepredsdetectDict.append({'modelo':MODEL_PATH,
                                        'imagen':image_name,
                                        'set':'Test',
                                        'clase':gt_labels,
                                        'true_box':true_boxes,
                                        'predicion':None,
                                        'IoU': None,
                                        'etiqueta':None,
                                        'score':None})
print('TEST PREDICTIONS COMPLETE')

#-------We proceed to store the results in file
file_name='resultados.csv'
archivo='./data'+TYPE+'/DETR/'+file_name
if os.path.isfile(archivo):
    modo = 'a+'
else:
    modo = 'w'
with open(archivo, modo, newline='') as csvfile:
    fieldnames = ['modelo', 'imagen','set', 'clase','true_box','predicion','IoU','etiqueta','score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    if modo=='w':
        writer.writeheader()
    for d in writepredsdetectDict:
        writer.writerow(d)
print('*****END*****')
print('Tiempo de ejecución: ',round((time.time()-inicio)/60,0))

  return lib.intersection(a, b, **kwargs)
100%|███████████████████████████████████████████████████████████████████████| 107/107 [07:18<00:00,  4.10s/it]

TEST PREDICTIONS COMPLETE
*****END*****
Tiempo de ejecución:  7.0





In [None]:
#To save valid predictions
try:
    os.makedirs('./data'+TYPE+'/DETR/valid')
    os.makedirs('./data'+TYPE+'/DETR/valid/Cáncer')
    os.makedirs('./data'+TYPE+'/DETR/valid/Control')
    os.makedirs('./Procesado'+TYPE+'/valid/DETR')
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

In [None]:
random.seed(123)
inicio=time.time()

CONFIDENCE_TRESHOLD = 0.1

writepredsdetectDict = [] # Dict Results

categories = VAL_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}
box_annotator = sv.BoxAnnotator()
image_ids = VAL_DATASET.coco.getImgIds()

for image_id in tqdm(image_ids):
    # load image and annotatons
    image = VAL_DATASET.coco.loadImgs(image_id)[0]
    annotations = VAL_DATASET.coco.imgToAnns[image_id]
    image_name=image['file_name']
    image_path = os.path.join(VAL_DATASET.root, image['file_name'])
    image = cv2.imread(image_path)

    # Annotate ground truth
    gt_detections = sv.Detections.from_coco_annotations(coco_annotation=annotations)
    gt_labels = [f"{id2label[class_id]}" for _, _, class_id, _ in gt_detections]
    true_boxes=annotations[0]['bbox']

    # Annotate detections
    with torch.no_grad():

        # load image and predict
        inputs = image_processor(images=image, return_tensors='pt').to(device)
        outputs = model(**inputs)

    # post-process
    target_sizes = torch.tensor([image.shape[:2]]).to(device)
    results = image_processor.post_process_object_detection(outputs=outputs,
                                                            threshold=CONFIDENCE_TRESHOLD,
                                                            target_sizes=target_sizes)[0]

    if len(results['boxes']) != 0:
        pred_boxes = results['boxes'].cpu().data.numpy().astype(np.float16)
        scores = results['scores'].cpu().data.numpy()

        iou = calculate_iou_matrix(true_boxes,pred_boxes)[0]
        iou_max = np.argmax(iou)

        best_result={'scores':torch.tensor([results['scores'][iou_max].item()],device='cuda:0'),
                         'labels':torch.tensor([results['labels'][iou_max].item()], device='cuda:0'),
                         'boxes' :torch.tensor([results['boxes'][iou_max].tolist()], device='cuda:0')}
        detections = sv.Detections.from_transformers(transformers_results=best_result)
        labels = [f"{id2label[class_id]} {confidence:.2f}" for _, confidence, class_id, _ in detections]

        #---------We save the result for segmentation---------
        # We cut ROI
        bbox=results['boxes'][iou_max].tolist()
        orig_image=image.copy()
        my_mask=np.zeros((orig_image.shape[0],orig_image.shape[1]),dtype=np.uint8)
        my_mask[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]=255
        my_mask=cv2.cvtColor(my_mask, cv2.COLOR_GRAY2BGR)
        # Apply the mask to the image
        result = cv2.bitwise_and(orig_image, my_mask)

        if 'C' in image_name:
            new_dir='./data'+TYPE+'/DETR/valid/Cáncer/'
        else:
            new_dir='./data'+TYPE+'/DETR/valid/Control/'
        cv2.imwrite(new_dir+str(image_name), result)
        time.sleep(2)
        cv2.imwrite('./Procesado'+TYPE+'/valid/DETR/'+str(image_name), result)
        time.sleep(2)

        writepredsdetectDict.append({'modelo':MODEL_PATH,
                                             'imagen':str(image_name),
                                             'set':'Valid',
                                             'clase':gt_labels,
                                             'true_box':true_boxes,
                                             'predicion':bbox,
                                             'IoU': round(iou[iou_max],3),
                                             'etiqueta':labels,
                                             'score':round(results['scores'][iou_max].item(),3)
                                    })
    else:
        orig_image=image.copy()
        if 'C' in image_name:
            new_dir='./data'+TYPE+'/DETR/valid/Cáncer/'
        else:
            new_dir='./data'+TYPE+'/DETR/valid/Control/'

        cv2.imwrite(new_dir+str(image_name), orig_image)
        time.sleep(2)
        cv2.imwrite('./Procesado'+TYPE+'/valid/DETR/'+str(image_name), orig_image)
        time.sleep(2)
        #-----------We store data---------
        writepredsdetectDict.append({'modelo':MODEL_PATH,
                                        'imagen':str(image_name),
                                        'set':'Valid',
                                        'clase':gt_labels,
                                        'true_box':true_boxes,
                                        'predicion':None,
                                        'IoU': None,
                                        'etiqueta':None,
                                        'score':None})
print('VALID PREDICTIONS COMPLETE')

#-------We proceed to store the results in file
file_name='resultados.csv'
archivo='./data'+TYPE+'/DETR/'+file_name
if os.path.isfile(archivo):
    modo = 'a+'
else:
    modo = 'w'
with open(archivo, modo, newline='') as csvfile:
    fieldnames = ['modelo', 'imagen','set', 'clase','true_box','predicion','IoU','etiqueta','score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    if modo=='w':
        writer.writeheader()
    for d in writepredsdetectDict:
        writer.writerow(d)
print('*****END*****')
print('Tiempo de ejecución: ',round((time.time()-inicio)/60,0))

100%|███████████████████████████████████████████████████████████████████████| 810/810 [55:15<00:00,  4.09s/it]

VALID PREDICTIONS COMPLETE
*****END*****
Tiempo de ejecución:  55.0





In [None]:
try:
    os.makedirs('./data'+TYPE+'/DETR/train')
    os.makedirs('./data'+TYPE+'/DETR/train/Cáncer')
    os.makedirs('./data'+TYPE+'/DETR/train/Control')
    os.makedirs('./Procesado'+TYPE+'/train/DETR')
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

In [None]:
random.seed(123)
inicio=time.time()

CONFIDENCE_TRESHOLD = 0.1

writepredsdetectDict = [] # Dict Results

categories = TRAIN_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}
box_annotator = sv.BoxAnnotator()
image_ids = TRAIN_DATASET.coco.getImgIds()

for image_id in tqdm(image_ids):
    # load image and annotatons
    image = TRAIN_DATASET.coco.loadImgs(image_id)[0]
    annotations = TRAIN_DATASET.coco.imgToAnns[image_id]
    image_name=image['file_name']
    image_path = os.path.join(TRAIN_DATASET.root, image['file_name'])
    image = cv2.imread(image_path)

    # Annotate ground truth
    gt_detections = sv.Detections.from_coco_annotations(coco_annotation=annotations)
    gt_labels = [f"{id2label[class_id]}" for _, _, class_id, _ in gt_detections]
    true_boxes=annotations[0]['bbox']

    # Annotate detections
    with torch.no_grad():

        # load image and predict
        inputs = image_processor(images=image, return_tensors='pt').to(device)
        outputs = model(**inputs)

    # post-process
    target_sizes = torch.tensor([image.shape[:2]]).to(device)
    results = image_processor.post_process_object_detection(outputs=outputs,
                                                            threshold=CONFIDENCE_TRESHOLD,
                                                            target_sizes=target_sizes)[0]

    if len(results['boxes']) != 0:
        pred_boxes = results['boxes'].cpu().data.numpy().astype(np.float16)
        scores = results['scores'].cpu().data.numpy()

        iou = calculate_iou_matrix(true_boxes,pred_boxes)[0]
        iou_max = np.argmax(iou)

        best_result={'scores':torch.tensor([results['scores'][iou_max].item()],device='cuda:0'),
                         'labels':torch.tensor([results['labels'][iou_max].item()], device='cuda:0'),
                         'boxes' :torch.tensor([results['boxes'][iou_max].tolist()], device='cuda:0')}
        detections = sv.Detections.from_transformers(transformers_results=best_result)
        labels = [f"{id2label[class_id]} {confidence:.2f}" for _, confidence, class_id, _ in detections]

        #-------We save the result for segmentation-------
        # We cut ROI
        bbox=results['boxes'][iou_max].tolist()
        orig_image=image.copy()
        my_mask=np.zeros((orig_image.shape[0],orig_image.shape[1]),dtype=np.uint8)
        my_mask[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]=255
        my_mask=cv2.cvtColor(my_mask, cv2.COLOR_GRAY2BGR)
        # Apply the mask to the image
        result = cv2.bitwise_and(orig_image, my_mask)

        if 'C' in image_name:
            new_dir='./data'+TYPE+'/DETR/train/Cáncer/'
        else:
            new_dir='./data'+TYPE+'/DETR/train/Control/'
        cv2.imwrite(new_dir+str(image_name), result)
        time.sleep(2)
        cv2.imwrite('./Procesado'+TYPE+'/train/DETR/'+str(image_name), result)
        time.sleep(2)

        writepredsdetectDict.append({'modelo':MODEL_PATH,
                                             'imagen':str(image_name),
                                             'set':'Train',
                                             'clase':gt_labels,
                                             'true_box':true_boxes,
                                             'predicion':bbox,
                                             'IoU': round(iou[iou_max],3),
                                             'etiqueta':labels,
                                             'score':round(results['scores'][iou_max].item(),3)
                                    })
    else:
        orig_image=image.copy()
        if 'C' in image_name:
            new_dir='./data'+TYPE+'/DETR/train/Cáncer/'
        else:
            new_dir='./data'+TYPE+'/DETR/train/Control/'

        cv2.imwrite(new_dir+str(image_name), orig_image)
        time.sleep(2)
        cv2.imwrite('./Procesado'+TYPE+'/train/DETR/'+str(image_name), orig_image)
        time.sleep(2)
        #-----------we store data---------
        writepredsdetectDict.append({'modelo':MODEL_PATH,
                                        'imagen':str(image_name),
                                        'set':'Train',
                                        'clase':gt_labels,
                                        'true_box':true_boxes,
                                        'predicion':None,
                                        'IoU': None,
                                        'etiqueta':None,
                                        'score':None})
print('TRAIN PREDICTIONS COMPLETE')

#-------We proceed to store the results in file
file_name='resultados.csv'
archivo='./data'+TYPE+'/DETR/'+file_name
if os.path.isfile(archivo):
    modo = 'a+'
else:
    modo = 'w'
with open(archivo, modo, newline='') as csvfile:
    fieldnames = ['modelo', 'imagen','set', 'clase','true_box','predicion','IoU','etiqueta','score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    if modo=='w':
        writer.writeheader()
    for d in writepredsdetectDict:
        writer.writerow(d)
print('*****END*****')
print('Tiempo de ejecución: ',round((time.time()-inicio)/60,0))

100%|███████████████████████████████████████████████████████████████████| 1605/1605 [1:49:26<00:00,  4.09s/it]

TRAIN PREDICTIONS COMPLETE
*****END*****
Tiempo de ejecución:  109.0



