In [1]:
import os
import numpy as np
import torch
from PIL import Image
import xml.etree.ElementTree as ET
from torchvision import transforms
from tqdm import tqdm 
from torchmetrics.detection.mean_ap import MeanAveragePrecision


TWOJ_KOD = None 

def create_bbox_coords(bbox):
    xmin = float(bbox.find('xmin').text)
    ymin = float(bbox.find('ymin').text)
    xmax = float(bbox.find('xmax').text)
    ymax = float(bbox.find('ymax').text)
    return [xmin, ymin, xmax, ymax]

def create_mask(plasmodium_img, bbox):
    xmin, ymin, xmax, ymax = create_bbox_coords(bbox)
    mask = np.zeros((plasmodium_img.size[1], plasmodium_img.size[0]), dtype=np.uint8)
    mask[int(ymin):int(ymax), int(xmin):int(xmax)] = 1 
    return mask 

    
class MalariaPlasmodiumDataset(torch.utils.data.Dataset):
    # Będziemy czytać pliki jpg i odpowiadające im pliki XML 
    # z katalogu directory_root 
    # Podamy też transformacje jakie chcemy przeprowadzać na zwracanych wartościach 
    
    def __init__(self, directory_root, images_transforms=None):

        # Przypisujemy parametetry konstruktora do self 
        # Chcemy aby nasz przyszły obiekt wiedzial o tym gdzie szukać plików oraz 
        # jakie transformacje wykonywać na przeczytanych JPG 
        self.directory_root = directory_root        
        self.images_transforms = images_transforms

        # Listujemy wszystkie pliki które mają rozszerzenie "JPG" 
        self.all_image_files = sorted([img for img in os.listdir(directory_root) if img.endswith(".jpg")])

    def __getitem__(self, idx: int):
        # "magiczna" metoda __getitem__ jest wykorzystywana kiedy chcemy aby nasz obiekt był dostępny poprzez operator [int] 
        # podobnie jak lista czy dict 
        single_plasmodium_img_path = self.get_single_plasmodium_path(idx)
        single_annotation_file_path = single_plasmodium_img_path.replace(".jpg", ".xml")
        plasmodium_img = Image.open(single_plasmodium_img_path).convert("RGB") 
        
        # czytamy xml file
        annotations = ET.parse(single_annotation_file_path)
        boxes = []
        masks = []        
        
        for detected_plasmodium in annotations.findall('object'):            
            bbox = detected_plasmodium.find('bndbox')
            # dodajemy bboxes
            boxes.append(
                create_bbox_coords(bbox)
            )
            # dodajemy maski 
            masks.append(
                create_mask(
                    plasmodium_img, bbox
                )
            )
        image_id = torch.tensor([idx])
        if boxes:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            masks = torch.as_tensor(np.array(masks), dtype=torch.uint8)  
            labels = torch.ones((len(boxes),), dtype=torch.int64) 
        else:
            boxes =  torch.empty(0, 4)
            masks = torch.zeros(0, plasmodium_img.height, plasmodium_img.width, dtype=torch.uint8)
            labels = torch.zeros(0, dtype=torch.int64)
        
        if self.images_transforms is not None:
            transformed_plasmodium_img = self.images_transforms(plasmodium_img)
        else:
            transformed_plasmodium_img = plasmodium_img            
        # zapisujemy target dla jednego pliku img 
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["masks"] = masks
        
        return transformed_plasmodium_img, target

    def get_single_plasmodium_path(self, idx):
        single_plasmodium_img_path = os.path.join(self.directory_root, self.all_image_files[idx])
        return single_plasmodium_img_path

    
    def __len__(self):
        # magiczna metoda __len__ jest używana gdy na instancji wykonujemy len() 
        return len(self.all_image_files)

In [2]:
import torchvision.transforms as T
from PIL import Image, ImageDraw

def draw_bounding_boxes(image_path, bboxes, scores=None, color=(255, 0, 0), return_pt = False):    
    img_pil = Image.open(image_path).convert("RGBA")
    new = Image.new('RGBA', img_pil.size, (255, 255, 255, 0))
    draw = ImageDraw.Draw(new)

    for i, box in enumerate(bboxes):
        xmin, ymin, xmax, ymax = box        
        if scores is not None:          
            alpha = int(255 * scores[i])  # Convert score to an alpha value.                      
            color_with_alpha = color + (alpha,)
        else:       
            color_with_alpha = color + (255,)
        draw.rectangle([xmin, ymin, xmax, ymax], outline=color_with_alpha, width=2)

    out = Image.alpha_composite(img_pil, new).convert("RGB")
    return T.ToTensor()(out) if return_pt else out 


In [3]:
images_transforms = transforms.Compose([
    transforms.ToTensor(), # chcemy najpierw 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2
])

dataset = MalariaPlasmodiumDataset(
    "/home/kamil/Downloads/plasmodium-phonecamera/train/", images_transforms=images_transforms
)

# TODO: podzielmy sobie nasz dataset na 3 randomowe rozłączne subsety w proporcjach 0.8,0.1
# torch.utils.data.random_split <= nasz przyjaciel 
train_set, val_set = TWOJ_KOD, TWOJ_KOD

In [4]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


train_data_loader = torch.utils.data.DataLoader(
    train_set, batch_size=1, 
    shuffle=True, num_workers=2,
    collate_fn=lambda x: tuple(zip(*x))
)


val_data_loader = torch.utils.data.DataLoader(
    val_set, batch_size=1, 
    shuffle=True, num_workers=2,
    collate_fn=lambda x: tuple(zip(*x))
)

model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2()

num_classes = 2  # 1 zarodziec + tło
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)


TypeError: object of type 'NoneType' has no len()

In [None]:

device = torch.device('cpu')
model.to(device)

from torch.utils.tensorboard import SummaryWriter
# Parameters
params = [p for p in model.parameters() if p.requires_grad] # only some parameters are trainable 
optimizer = TWOJ_KOD # TODO: otwórz optimizer Adam (lr=0.0005)

writer = SummaryWriter() 


# TODO: initialize best_eval_metric_result 
best_eval_metric_result = TWOJ_KOD

num_epochs = 60
for epoch in range(num_epochs):
    # train 
    model.train() 
    for images, targets in tqdm(train_data_loader, desc=f"Training epoch {epoch}"):     
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    # evaluation
    model.eval()    
    eval_metric = TWOJ_KOD #TODO utwórz mAP metric z tensormetrics  [iou_type="bbox", iou_thresholds = [0.5]]
    with torch.no_grad():
        for images, targets in tqdm(val_data_loader, desc="Evaluation..."):        
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            output = model(images)
    
            # MAP metric update 
            eval_metric.update(output, targets)    
        # metryka po całej epoce 
        result = eval_metric.compute()            
        writer.add_scalar("map@validation_set", result['map'].detach().numpy(), epoch)
        # Zapiszmy obecnie najlepszy model 
        if result['map'].detach().numpy() > best_eval_metric_result:           
            pass 
            # TODO: zapis model.state_dict() jako 'best_model.pth'
            # TODO: nadpisz best_eval_metric_result             
            

    # Po wyjściu z pętli walidacji powyżej zmienne targets i outputs nadal istnieją - skorzystamy z nich 
    # by wyświetlić jak wyglądają przykładowe detekcje po tej epoce 
    
    bboxes_true = targets[0]['boxes']
    bboxes_predicted = output[0]['boxes']
    scores = output[0]['scores']
    img_id = targets[0]['image_id']
    img = val_set.dataset.get_single_plasmodium_path(targets[0]['image_id'])

    # zapiszmy zdjecie z predykcjami obok prawdziwych zarodźców w tensorboard 
    img_tensor = torch.cat([
        draw_bounding_boxes(img, bboxes_true, return_pt=True), 
        draw_bounding_boxes(img, bboxes_predicted, scores, color=(0,255,0), return_pt=True)
        ], dim=2
    )
    # TODO: add_image to tensorboard to see current detections 
    TWOJ_KOD
