In [None]:
import glob
import os
import time
import random

import numpy as np
import pandas as pd
import PIL

from sklearn.model_selection import StratifiedKFold

import cv2
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm
from tqdm.contrib.concurrent import process_map

import torch
from torch.utils.data.dataset import Dataset
import torch.cuda.amp as amp
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import albumentations as A
from albumentations.pytorch import ToTensorV2

from map_boxes import mean_average_precision_for_boxes

In [None]:
DATA_DIR = "../"
RESIZE_DIR = "../"

SIZE = (512, 512)
FOLDS = 5
NUM_CLASSES = 4
BATCHSIZE = 8
SEED = 420
MODEL_NAME = "tf_efficientdet_d0"

In [None]:
class XRayDatasetFromDFOpacityAnnotations(Dataset):
    def __init__(
        self,
        df,
        train=True,
        predict=True,
        augment=True,
        data_dir=os.path.join(DATA_DIR, "train"),
        size=(384, 384),
    ):
        self.df = df
        self.label_list = ["opacity"]
        self.ids = df.index.sort_values()#[:100]
        self.path_suffix = data_dir
        self._augment = augment
        self._train = train
        self._predict = predict
        self._size = size
        self._transform_list = [
            # A.Resize(size[0], size[1], p=1)
        ]

        if self._augment:
            self._transform_list.extend(
                [
#                     A.VerticalFlip(p=0.5),
#                     A.HorizontalFlip(p=0.5),
#                     A.ShiftScaleRotate(
#                         scale_limit=0.20,
#                         rotate_limit=10,
#                         shift_limit=0.1,
#                         p=0.5,
#                         border_mode=cv2.BORDER_CONSTANT,
#                         value=0,
#                     ),
                    A.RandomBrightnessContrast(p=0.5),
                    # A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    # ToTensorV2(),
                ]
            )

        if self._train or self._predict:
            self._transform_list.extend(
                [
                    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    ToTensorV2(),
                ]
            )

        if self._transform_list:

            self._transforms = A.Compose(
                self._transform_list,
                bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"]),
            )
        self._preprocess_boxes()

    def _preprocess_boxes(self):
        self.id_bbox_map = {}
        scaled_w = self._size[1]
        scaled_h = self._size[0]
        opacity_count = 0
        none_count = 0
        for i, id in enumerate(self.ids):
            row = self.df.loc[id]
            all_boxes = []
            all_cats = []
            if pd.notna(row["boxes"]):
                boxes = eval(row["boxes"])
                for box in boxes:
                    # convert to center and normalize to 0, 1
                    box["x"] = (max(0, box["x"])) * scaled_w / (row["width"]) 
                    box["y"] = (max(0, box["y"])) * scaled_h / (row["height"])
                    box["width"] = box["width"] * scaled_w / row["width"]
                    box["height"] = box["height"] * scaled_h / row["height"]
                    bbox = [
                        box["x"],
                        box["y"],
                        box["x"] + (box["width"]),
                        box["y"] + (box["height"]),
                    ]

                    all_boxes.append(bbox)
                    all_cats.append(self.label_list.index("opacity") + 1)
                    opacity_count += 1
            else:
                # setting the entire image as a negative "detection".
                bbox = [0, 0, scaled_w, scaled_h]
                all_boxes.append(bbox)
                all_cats.append(self.label_list.index("none") + 1)
                none_count += 1
            self.id_bbox_map[id] = (all_boxes, all_cats)
        print("Opacity detections: {}".format(opacity_count))
        print("None Count: {}".format(none_count))

    def __len__(self):
        return len(self.ids)

    def draw_bbox_idx(self, idx):
        img_id = self.ids[idx]
        row = self.df.loc[img_id]
        print(img_id)
        image = PIL.Image.open(row["path"])
        scaled_w = image.width
        scaled_h = image.height
        print((scaled_w, scaled_h))
        if pd.notna(row["boxes"]):
            boxes = eval(row["boxes"])
            draw = PIL.ImageDraw.Draw(image)
            for box in boxes:
                box["x"] = box["x"] / row["width"]
                box["y"] = box["y"] / row["height"]
                box["width"] = box["width"] / row["width"]
                box["height"] = box["height"] / row["height"]
                draw.rectangle(
                    [
                        box["x"] * scaled_w,
                        box["y"] * scaled_h,
                        (box["x"] + box["width"]) * scaled_w,
                        (box["y"] + box["height"]) * scaled_h,
                    ]
                )
        return image

    def _yolo_to_voc_format(self, yolo_bboxes):
        # takes in bounding boxes of the yolo format
        # converts them to voc format.
        # yolo format (x_c, y_c, width, height) normalized to 0, 1 by dividing by image dims.
        # voc format (x_min, y_min, x_max, y_max), unnormalized.
        scaled_w = self._size[1]
        scaled_h = self._size[0]
        bboxes_voc = torch.zeros_like(yolo_bboxes)
        # x_min = (x_c - width / 2) * scaled_w
        bboxes_voc[:, 0] = (yolo_bboxes[:, 0] - yolo_bboxes[:, 2] / 2) * scaled_w
        bboxes_voc[:, 1] = (yolo_bboxes[:, 1] - yolo_bboxes[:, 3] / 2) * scaled_h
        bboxes_voc[:, 2] = bboxes_voc[:, 0] + yolo_bboxes[:, 2] * scaled_w
        bboxes_voc[:, 3] = bboxes_voc[:, 1] + yolo_bboxes[:, 3] * scaled_h

        return bboxes_voc

    def draw_bbox_img(self, image, bboxes, label):
        image = PIL.Image.fromarray(image)
        draw = PIL.ImageDraw.Draw(image)
        for bbox in bboxes:
            # x_c = bbox[0]
            # y_c = bbox[1]
            # width = bbox[2]
            # height = bbox[3]
            # x_1 = (x_c - width / 2) * image.width
            # y_1 = (y_c - height / 2) * image.height
            # x_2 = x_1 + width * image.width
            # y_2 = y_1 + height * image.height
            # draw.rectangle([x_1, y_1, x_2, y_2])
            draw.rectangle([bbox[0], bbox[1], bbox[2], bbox[3]])
        print(f"Number of boxes{len(label)}")
        return image

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        boxes, labels = self.id_bbox_map[img_id]
        row = self.df.loc[img_id]

        path = row["path"]
        # ideally, we'd clean up the df,
        # but may be we use it to produce predictions as well.
        dicom_arr = (
            cv2.imread(path)
            if path.endswith(".jpg")
            else dicom2array(path, size=self._size)
        )
        img = cv2.cvtColor(dicom_arr, cv2.COLOR_BGR2RGB)
        image_and_labels = {}
        if self._augment or (self._train or self._predict):
            image_and_labels = self._transforms(image=img, bboxes=boxes, labels=labels)
        else:
            image_and_labels = {"image": img, "bboxes": boxes, "labels": labels}

        # image_and_labels["bboxes"] = self._yolo_to_voc_format(
        #     torch.tensor(image_and_labels["bboxes"])
        # )
        
        if not image_and_labels["bboxes"]:
            w = torch.rand(1) * 10
            h = torch.rand(1) * 10
            image_and_labels["bboxes"] = [[self._size[0] /2 - w, self._size[1]/2 - h, self._size[0]/2 + w, self._size[1]/2 + h]]
            image_and_labels["labels"] = [-1]
        # print(image_and_labels["bboxes"].shape)
        image_and_labels["bboxes"] = torch.tensor(image_and_labels["bboxes"]).to(dtype=torch.float32)
        image_and_labels["labels"] = torch.tensor(image_and_labels["labels"]).to(dtype=torch.long)

        return image_and_labels


In [None]:
submission_df = pd.read_csv('ImagePredictionsFormatted.csv')

In [None]:
len(submission_df.imageid.unique())

In [None]:
len(submission_df[submission_df["conf"] >= 0.2].imageid.unique())

In [None]:
submission_df[submission_df["conf"] >= 0.2].columns

In [None]:
sns.kdeplot(data=submission_df[submission_df["conf"] >= 0.2], x="conf", hue="label")

In [None]:
validation_df = pd.read_csv('C:/Users/adars/Workspaces/covid19-contest-working-dir/efficient-det/validation_fold-4')

In [None]:
validation_ds = XRayDatasetFromDFOpacityAnnotations(df=validation_df, train=False, predict=False, augment=True, size=SIZE)

In [None]:
idx = 101
img_id = os.path.splitext(os.path.basename(validation_df.loc[idx]["path"]))[0]

In [None]:
img_id

In [None]:
validation_ds.draw_bbox_idx(idx)

In [None]:
data = validation_ds[idx]
image = data["image"]

In [None]:
data["bboxes"]

In [None]:
preds_df = pd.read_csv('C:/Users/adars/Workspaces/covid19-contest-working-dir/efficient-det/best_mean_ap_preds.csv')

In [None]:
preds_df[preds_df["ImageID"].str.contains(img_id)]

In [None]:
boxes = preds_df[(preds_df["ImageID"].str.contains(img_id)) & (preds_df["Conf"] >= 0.35)][["XMin", "YMin", "XMax", "YMax"]].values
labels = preds_df[(preds_df["ImageID"].str.contains(img_id)) & (preds_df["Conf"] >= 0.35)][["LabelName"]].values
confs = preds_df[(preds_df["ImageID"].str.contains(img_id)) & (preds_df["Conf"] >= 0.35)][["Conf"]].values

In [None]:
confs

In [None]:
validation_ds.draw_bbox_img(image, boxes, labels)