In [None]:
#!wget https://www.cbica.upenn.edu/MICCAI_BraTS2020_TrainingData

In [None]:
#!cp /content/MICCAI_BraTS2020_TrainingData /content/drive/MyDrive

In [None]:
#@title
from PIL import Image                                      # (pip install Pillow)
import numpy as np                                         # (pip install numpy)
from skimage import measure                                # (pip install scikit-image)
from shapely.geometry import Polygon, MultiPolygon         # (pip install Shapely)
import os
import json

def create_sub_masks(mask_image,category_colors):
    # Initialize a dictionary of sub-masks indexed by category_colors
    
    sub_masks = {}

    keys = category_colors.keys()
    for key in keys:
      sub_masks[key] = np.isin(mask_image, category_colors[key])

    return sub_masks

def create_sub_mask_annotation(sub_mask):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(sub_mask, 0.5, positive_orientation="low")

    polygons = []
    segmentations = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(0.5, preserve_topology=False)
        
        if(poly.area < 10):
            # Go to next iteration, dont save empty values in list
            continue


        polygons.append(poly)
        
        # segmentation = np.array(poly.exterior.coords).ravel().tolist()
        # segmentations.append(segmentation)
    
    return polygons, segmentations

def create_category_annotation(category_dict):
    category_list = []

    for key, value in category_dict.items():
        category = {
            "supercategory": key,
            "id": value,
            "name": key
        }
        category_list.append(category)

    return category_list

def create_image_annotation(file_name, width, height, image_id):
    images = {
        "file_name": file_name,
        "height": height,
        "width": width,
        "id": image_id
    }

    return images

def create_annotation_format(polygon, segmentation, image_id, category_id, annotation_id):
    min_x, min_y, max_x, max_y = polygon.bounds
    width = max_x - min_x
    height = max_y - min_y
    bbox = (min_x, min_y, width, height)
    area = polygon.area

    annotation = {
        "segmentation": segmentation,
        "area": area,
        "iscrowd": 0,
        "image_id": image_id,
        "bbox": bbox,
        "category_id": category_id,
        "id": annotation_id
    }

    return annotation

def get_coco_json_format():
    # Standard COCO format 
    coco_format = {
        "info": {},
        "licenses": [],
        "images": [{}],
        "categories": [{}],
        "annotations": [{}]
    }

    return coco_format

In [None]:
!unzip /content/drive/MyDrive/MICCAI_BraTS2020_TrainingData

Archive:  /content/drive/MyDrive/MICCAI_BraTS2020_TrainingData
   creating: MICCAI_BraTS2020_TrainingData/
   creating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_001/
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_001/BraTS20_Training_001_flair.nii.gz  
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_001/BraTS20_Training_001_seg.nii.gz  
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_001/BraTS20_Training_001_t1.nii.gz  
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_001/BraTS20_Training_001_t1ce.nii.gz  
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_001/BraTS20_Training_001_t2.nii.gz  
   creating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_002/
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_002/BraTS20_Training_002_flair.nii.gz  
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_002/BraTS20_Training_002_seg.nii.gz  
  inflating: MICCAI_BraTS2020_TrainingData/BraTS20_Training_002/BraTS20_Tr

In [None]:
!pip -q install SimpleITK==2.1.1
!pip -q install nibabel

[K     |████████████████████████████████| 48.4 MB 20 kB/s 
[?25h

In [None]:
import zipfile  # For file extraction
import glob  # For retrieving files/pathnames matching a specified pattern
import re # specifies a set of strings that matches it
import SimpleITK as sitk
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, IntSlider, ToggleButtons

In [None]:
t1 = glob.glob('/content/MICCAI_BraTS2020_TrainingData/*/*t1.nii.gz')
t2 = glob.glob('/content/MICCAI_BraTS2020_TrainingData/*/*t2.nii.gz')
flair = glob.glob('/content/MICCAI_BraTS2020_TrainingData/*/*flair.nii.gz')
t1ce = glob.glob('/content/MICCAI_BraTS2020_TrainingData/*/*t1ce.nii.gz')
seg = glob.glob('/content/MICCAI_BraTS2020_TrainingData/*/*seg.nii.gz')  # Ground Truth
pattern = re.compile('/content/MICCAI_BraTS2020_TrainingData/.*_(\w*)\.nii\.gz')

data_paths = [{
    pattern.findall(item)[0]:item for item in items
}
for items in list(zip(t1, t2, t1ce, flair, seg))]

print('number of training examples' ,len(data_paths))
print(data_paths[0])

number of training examples 369
{'t1': '/content/MICCAI_BraTS2020_TrainingData/BraTS20_Training_356/BraTS20_Training_356_t1.nii.gz', 't2': '/content/MICCAI_BraTS2020_TrainingData/BraTS20_Training_356/BraTS20_Training_356_t2.nii.gz', 't1ce': '/content/MICCAI_BraTS2020_TrainingData/BraTS20_Training_356/BraTS20_Training_356_t1ce.nii.gz', 'flair': '/content/MICCAI_BraTS2020_TrainingData/BraTS20_Training_356/BraTS20_Training_356_flair.nii.gz', 'seg': '/content/MICCAI_BraTS2020_TrainingData/BraTS20_Training_356/BraTS20_Training_356_seg.nii.gz'}


In [None]:
def read_img_sitk(img_path):
  image_data = sitk.ReadImage(img_path)
  return image_data
def read_img_nii(img_path):
  image_data = np.array(nib.load(img_path).get_fdata())
  return image_data
np_img = read_img_nii(data_paths[0]['seg'])
sitk_img = read_img_sitk(data_paths[0]['seg'])
#Check shape of images

np_shape = np_img.shape
sitk_shape = sitk_img.GetSize()
print("Shape of np_img : ", np_shape)
print("Shape of sitk_img : ", sitk_shape)

Shape of np_img :  (240, 240, 155)
Shape of sitk_img :  (240, 240, 155)


In [None]:

@interact
def explore_3dimage(layer = [l for l in range(156)] , modality=['t1', 't2', 't1ce', 'flair','seg'] , view = ['axial' , 'sagittal' , 'coronal'], patient= [l for l in range(370)] ): 
    if modality == 't1':
      modal = 't1'
    elif modality == 't2':
      modal = 't2'
    elif modality == 't1ce':
      modal = 't1ce'
    elif modality == 'flair':
      modal = 'flair'
    elif modality == 'seg':
      modal = 'seg'
    else :
      print("Error")

    
    image = read_img_nii(data_paths[patient][modal])
    array_view = (image)

    if view == 'axial':
      array_view = array_view[layer, :,:]
    elif view == 'coronal':
      array_view = array_view[:, layer, :]
    elif view == 'sagittal':
      array_view = array_view[:, : ,layer]
    else:
      print("Error")
    print(array_view.shape)
    plt.figure(figsize=(10, 5))
    plt.imshow(array_view, cmap='gray')
    # plt.title('Explore Layers of Brain', fontsize=10)
    plt.axis('off')

# Processing images

In [None]:
!rm -r /content/images

In [None]:
import os

os.mkdir('images/')

In [None]:
from tqdm import tqdm

import time
import multiprocessing
from sklearn.preprocessing import MinMaxScaler


In [None]:
def images_annotations_info(mask, img_path,image_id,images,annotations,annotation_id):
  # We make a reference to the original file in the COCO JSON file
    
    original_file_name = img_path

    
    mask_image_open = mask
    w, h = mask_image_open.shape
    
    # "images" info 
    image = create_image_annotation(original_file_name, w, h, image_id)
    images.append(image)



    sub_masks = create_sub_masks(mask_image_open, category_colors)


    for color, sub_mask in sub_masks.items():
      
        
          category_id = color
        # try: 
        # "annotations" info
          polygons, segmentations = create_sub_mask_annotation(sub_mask)

          # Check if we have classes that are a multipolygon
          if category_id in multipolygon_ids:
              # Combine the polygons to calculate the bounding box and area
              multi_poly = MultiPolygon(polygons)
              if multi_poly.area > 1:              
                annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)

                annotations.append(annotation)
                annotation_id += 1
              
          else:
              for i in range(len(polygons)):
                  # Cleaner to recalculate this variable
                  segmentation = [np.array(polygons[i].exterior.coords).ravel().tolist()]
                  
                  annotation = create_annotation_format(polygons[i], segmentation, image_id, category_id, annotation_id)
                  
                  annotations.append(annotation)
                  annotation_id += 1
        # except:
        #   print('error')
    image_id += 1
    
    return images,annotations, annotation_id,image_id


In [None]:
def myScaler(case):
  scaler = MinMaxScaler()
  return scaler.fit_transform(case.reshape(-1, case.shape[-1])).reshape(case.shape).astype('float16')

In [None]:
!pip install -q geopandas

[K     |████████████████████████████████| 1.0 MB 6.4 MB/s 
[K     |████████████████████████████████| 6.3 MB 40.0 MB/s 
[K     |████████████████████████████████| 16.7 MB 354 kB/s 
[?25h

In [None]:
def bb_intersection_over_union(boxA, boxB):
	# determine the (x, y)-coordinates of the intersection rectangle
  if boxA[0] > boxA[2]:
    boxA[2] = boxA[0] + boxA[2]
    boxA[3] = boxA[1] + boxA[3]
    boxB[2] = boxB[0] + boxB[2]
    boxB[3] = boxB[1] + boxB[3]
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])
  # compute the area of intersection rectangle
  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  # compute the area of both the prediction and ground-truth
  # rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  # compute the intersection over union by taking the intersection
  # area and dividing it by the sum of prediction + ground-truth
  # areas - the interesection area
  iou = interArea / float(boxAArea + boxBArea - interArea)
  # return the intersection over union value
  return iou

In [None]:
# adapted code from https://gis.stackexchange.com/questions/271733/geopandas-dissolve-overlapping-polygons

import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon
from sklearn.cluster import DBSCAN
from scipy.sparse.csgraph import connected_components



def processPolygons( polys):
  EPS_DISTANCE = 15
  MIN_SAMPLE_POLYGONS = 1
  df = gpd.GeoDataFrame(geometry=polys)
  overlap_matrix = df['geometry'].apply(lambda x: df['geometry'].intersects(x)).values.astype(int)
  
  n, ids = connected_components(overlap_matrix)
  df['group'] = ids
  multi_poly_list = []
  
  for id in np.sort(np.unique(ids)):
    temp = df[df['group'] == id]
    if len(temp) > 1 :
      
      multi_poly_list.append(MultiPolygon(temp['geometry'].tolist()))
    else:
      multi_poly_list += temp['geometry'].tolist()

  df2 = gpd.GeoDataFrame(geometry=multi_poly_list)
  def bbox_iou(p1, p2):
    if p1 is p2:
      return 1
    score = bb_intersection_over_union(p1.bounds, p2.bounds)
    if score >= 0.5 :
      return 1
    else:
      return 0

  bbox_overlap_matrix = df2['geometry'].apply(lambda x: df2['geometry'].apply(lambda y: bbox_iou(x, y)) ).values.astype(int)
  n, cluster_ids = connected_components(bbox_overlap_matrix)
  df2['group'] = cluster_ids

  # preparation for dbscan

  dissolved_df = df2[['geometry', 'group']].dissolve(by='group')
  def myDistance(p1, p2):
    if p1 is p2:
      return 1
    else:
      return p1.distance(p2)

  distance_matrix = dissolved_df['geometry'].apply(lambda x: dissolved_df['geometry'].apply(lambda y: myDistance(x, y)) ).values.astype(float)
  
  # dbscan
  dbscan = DBSCAN(eps=EPS_DISTANCE, min_samples=MIN_SAMPLE_POLYGONS, metric='precomputed')
  clusters = dbscan.fit(distance_matrix)

  # add labels back to dataframe
  labels = pd.Series(clusters.labels_).rename('cluster')
  df2 = pd.concat([df2, labels], axis=1)

  
  return df2, np.sort(np.unique(clusters.labels_))

In [None]:

from pycocotools import mask as _mask

def images_annotations_info(
    mask, img_path, image_id, images, annotations, annotation_id
):
    # We make a reference to the original file in the COCO JSON file

    original_file_name = img_path

    mask_image_open = mask
    w, h = mask_image_open.shape

    # "images" info
    image = create_image_annotation(original_file_name, w, h, image_id)
    images.append(image)

    sub_masks = create_sub_masks(mask_image_open, category_colors)

    for color, sub_mask in sub_masks.items():
          
           
            category_id = color
            
            # "annotations" info
            polygons, segmentations = create_sub_mask_annotation(sub_mask)
            if len(polygons) == 0:
              continue
            df , clusters_ids =  processPolygons( polygons)
            
            for id in clusters_ids:
              sub_df = df[df['cluster'] == id]
              sub_df = sub_df.explode(ignore_index=True)
              
              if len(sub_df) > 1:

                multi_poly = MultiPolygon(list(sub_df['geometry']))
                segmentations = [np.array(geo.exterior.coords).ravel().tolist() for geo in sub_df['geometry']]  

                rles =  _mask.frPyObjects(segmentations, 256, 256)

                stacked_masks = _mask.decode(rles) 
                if stacked_masks.ndim > 2:
                  reduced= np.add.reduce(stacked_masks,axis=2)
                  m = np.where( (reduced % 2) == 0 , 0 , reduced).astype(bool)
                else:
                  m = stacked_masks
                segmentations = _mask.encode(m)
                
                segmentations['counts'] = segmentations['counts'].decode('utf8')
                annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)

                annotations.append(annotation)
                annotation_id += 1
              else:
                
                segmentation = [np.array(sub_df['geometry'][0].exterior.coords).ravel().tolist()]
                segmentation =  _mask.frPyObjects(segmentation, 256, 256)
                assert len(segmentation) == 1, print(segmentation)
                segmentation = segmentation[0] 
                segmentation['counts'] = segmentation['counts'].decode('utf8')

                annotation = create_annotation_format(sub_df['geometry'][0], segmentation, image_id, category_id, annotation_id)
                
                annotations.append(annotation)
                annotation_id += 1
    image_id += 1

    return images, annotations, annotation_id, image_id


In [None]:
def pad_image(image):
  nimage = np.zeros(( 256,256,4))
  for i in range(image.shape[2]):
    
    nimage[:,:,i] = np.pad(image[:,:,i], [(8, 8), (8, 8)], mode='constant', constant_values=0)
  
  return nimage

def pad_mask(mask):
      nmask = np.zeros(( 256,256))
      nmask = np.pad(mask, [(8, 8), (8, 8)], mode='constant', constant_values=0)
      return nmask

In [None]:
import random
random.shuffle(data_paths)

In [None]:
nb_slices = read_img_nii(data_paths[0]['seg']).shape[2]
np_data = []
case_id = 0
save_dir = "/content/images"



val = int(len(data_paths)*0.8)
# Label ids of the dataset
category_ids = {
    "wholeTumor": 1,
    # "tumorCore": 2,    
    # "enhancing": 3
}

# Define which colors match which categories in the images
category_colors = {
    
    1: [1,2,4], 
    # 2: [1,4], 
    # 3: [4], 
}

# Define the ids that are a multiplolygon. 
multipolygon_ids = [1]

annotation_id = 0
image_id = 0
annotations = []
images = []

coco_format = get_coco_json_format()
coco_format["categories"] = create_category_annotation(category_ids)

import torch
for case in tqdm(data_paths):

  patient_id = case['t1'].split('/')[3]
  case_t1 = (read_img_nii(case['t1']))
  case_t2 = (read_img_nii(case['t2']))
  case_t1ce = (read_img_nii(case['t1ce']))
  case_flair = (read_img_nii(case['flair']))
  case_seg = read_img_nii(case['seg'])

  for scan_slice in range(nb_slices):
    
    seg_mask = case_seg[:,:,scan_slice].astype(int)
    if  seg_mask.any():
      image = np.stack([
          case_t1[:,:,scan_slice],
          case_t2[:,:,scan_slice],
          case_t1ce[:,:,scan_slice],
          case_flair[:,:,scan_slice]], axis=2).astype('int')
      
      # image=myScaler(image)
      image_name = f"image{patient_id}_{scan_slice}.npz"
  

      mask = pad_mask(seg_mask)
      
      images,annotations, annotation_id,image_id = images_annotations_info(mask, image_name,image_id,images,annotations,annotation_id)
      
      
      image = pad_image(image)
      savin_path = f'{save_dir}/{image_name}'
      with open(savin_path, 'wb') as f:
        np.savez_compressed(f, image)
        

     
  if case_id == val:
    coco_format["images"], coco_format["annotations"] = images,annotations
    with open("train.json","w") as outfile:
        json.dump(coco_format, outfile)
    coco_format["images"], coco_format["annotations"] = [],[]
    images,annotations = [],[]
    annotation_id = 0
    image_id = 0



      

  case_id += 1

coco_format["images"], coco_format["annotations"] = images,annotations
with open("val.json","w") as outfile:
    json.dump(coco_format, outfile)

100%|██████████| 369/369 [34:58<00:00,  5.69s/it]


In [None]:
!tar -czf "bratsV13.tar.gz" images train.json val.json

In [None]:
!cp /content/bratsV13.tar.gz /content/drive/MyDrive

In [None]:
!du ./* -sh

2.5G	./bratsV13.tar.gz
14G	./drive
2.7G	./images
3.1G	./MICCAI_BraTS2020_TrainingData
55M	./sample_data
9.8M	./train.json
2.5M	./val.json


# Visualization

In [None]:
for d in random.sample(coco_format["annotations"],100):
  plt.imshow(_mask.decode(d['segmentation']))
  plt.show()

# Some tests

## bar plot of the area

In [None]:
test = np.load('/content/images/image2_109.npz')['arr_0']

In [None]:
import matplotlib.pyplot as plt
plt.imshow(test[:,:,1])
plt.show()

In [None]:
edema = []
enhance = []
necrotic = []
for case in data_paths:
  case_seg = read_img_nii(case['seg'])
  for mask in range((case_seg.shape[2])):
    
    edema.append(np.sqrt(np.isin(case_seg[:,:,mask], [1,2,4]).sum()))
    enhance.append(np.sqrt(np.isin(case_seg[:,:,mask], [1,4]).sum()))
    necrotic.append(np.sqrt(np.isin(case_seg[:,:,mask] , [1]).sum()))

In [None]:
import plotly.express as px

fig = px.histogram({'edema':edema,'enhancing':enhance,'necrotic':necrotic}, barmode='group', nbins=10)
fig.show()

## bar plot of the distance between ploygons

In [None]:
import itertools

def processMask(mask):
  contours = measure.find_contours(mask, 0.5, positive_orientation="low")
  polygons = []

  for contour in contours:
      # Flip from (row, col) representation to (x, y)
      # and subtract the padding pixel
      for i in range(len(contour)):
          row, col = contour[i]
          contour[i] = (col - 1, row - 1)

      # Make a polygon and simplify it
      poly = Polygon(contour)
      if(poly.area < 1):
          # Go to next iteration, dont save empty values in list
          continue
      polygons.append(poly)

  all_conbinations = itertools.combinations(polygons, 2)
  distances = []
  for p1, p2 in all_conbinations:
    
    distances.append(p1.distance(p2))
  if distances:

    return max(distances)
  else:
    return 0.

In [None]:
from tqdm import tqdm
wholeTumor = []
tumorCore = []
enhance = []
for case in tqdm(data_paths):
  case_seg = read_img_nii(case['seg'])
  for scan_slice in range((case_seg.shape[2])):
    mask = case_seg[:,:,scan_slice]
    if mask.any():
      wholeTumor.append(processMask(np.isin(mask, [1,2,4])))
      tumorCore.append(processMask(np.isin(mask, [1,4])))
      enhance.append(processMask(np.isin(mask , [4])))

100%|██████████| 369/369 [03:10<00:00,  1.93it/s]


In [None]:
import plotly.express as px

fig = px.histogram({'wholeTumor':wholeTumor,'enhancing':enhance,'tumorCore':tumorCore}, barmode='group', nbins=0)
fig.show()

In [None]:
!tar -xf /content/drive/MyDrive/bratsV8.tar.gz

tar: /content/drive/MyDrive/bratsV8.tar.gz: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now


In [None]:
import json
with open('train.json') as f:
  test_train = json.load(f)

In [None]:
test_train.keys()

dict_keys(['info', 'licenses', 'images', 'categories', 'annotations'])

In [None]:
from pycocotools.coco import COCO
coco = COCO('train.json')

loading annotations into memory...
Done (t=0.81s)
creating index...
index created!


In [None]:
coco.getCatIds()

[1, 2, 3]

In [None]:
from itertools import product, permutations

In [None]:
cat_ids = coco.getCatIds()
n = 0
y = 0
for id in coco.getImgIds():
  en_anns = coco.getAnnIds(imgIds=[id], catIds=3)
  tc_anns = coco.getAnnIds(imgIds=[id], catIds=2)
  wt_anns = coco.getAnnIds(imgIds=[id], catIds=1)
  l = list(product(en_anns, tc_anns, wt_anns))
 
  for c in l:
    n += 1
    
    assert len(coco.loadAnns(c[0])) == 1
    if coco.loadAnns(c[0])[0]['bbox'] ==  coco.loadAnns(c[2])[0]['bbox']:
      y += 1

In [None]:
print(f'number of tries {n}, cases where wt == enhancing {y}, percent {y/n}')

number of tries 18297, cases where wt == enhancing 237, percent 0.01295294310542712


In [None]:
print(f'number of tries {n}, cases where tc == wt {y}, percent {y/n}')

number of tries 18297, cases where tc == wt 443, percent 0.02421161939115702


In [None]:
print(f'number of tries {n}, cases where tc == enhancing == wt {y}, percent {y/n}')

number of tries 18297, cases where tc == enhancing == wt 190, percent 0.010384215991692628


In [None]:
print(f'number of tries {n}, cases where tc == enhancing {y}, percent {y/n}')

number of tries 14339, cases where tc == enhancing 6290, percent 0.4386637840853616


## bar plot of iou between boxes

In [None]:
!tar -xf /content/drive/MyDrive/bratsV11.tar.gz

In [None]:
from pycocotools.coco import COCO
from itertools import product, permutations, combinations
coco = COCO('train.json')

loading annotations into memory...
Done (t=0.33s)
creating index...
index created!


In [None]:
def bb_intersection_over_union(boxA, boxB):
	# determine the (x, y)-coordinates of the intersection rectangle
  if boxA[0] > boxA[2]:
    boxA[2] = boxA[0] + boxA[2]
    boxA[3] = boxA[1] + boxA[3]
    boxB[2] = boxB[0] + boxB[2]
    boxB[3] = boxB[1] + boxB[3]
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])
  # compute the area of intersection rectangle
  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  # compute the area of both the prediction and ground-truth
  # rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  # compute the intersection over union by taking the intersection
  # area and dividing it by the sum of prediction + ground-truth
  # areas - the interesection area
  iou = interArea / float(boxAArea + boxBArea - interArea)
  # return the intersection over union value
  return iou

In [None]:

bb_intersection_over_union(coco.loadAnns(100)[0]['bbox'], coco.loadAnns(100)[0]['bbox'])

1.0

In [None]:
import pycocotools.mask as _mask
iou_list = []
for id in coco.getImgIds():
  wt_anns = coco.getAnnIds(imgIds=[id], catIds=1)
  all_conbinations = combinations(wt_anns, 2)
  
  for ann1, ann2 in all_conbinations:
    
    bbox1 = coco.loadAnns(ann1)[0]['bbox']
    bbox2 = coco.loadAnns(ann2)[0]['bbox']
    iou_list.append( bb_intersection_over_union(bbox1, bbox2) )


In [None]:
max(iou_list)

0.43872714962762355

In [None]:
import plotly.express as px

fig = px.histogram({'iou':iou_list}, nbins=10)
fig.show()

## bar plot of the area using the bbox

In [None]:


for d in random.sample(test_train["annotations"],100):
  print(d['area'])
  plt.imshow(_mask.decode(d['segmentation']))
  plt.show()

In [None]:
area = []
for d in (test_train["annotations"]):
  area.append(d['area'])

In [None]:
import plotly.express as px

fig = px.histogram({'area':area}, nbins=0)
fig.show()

## overlapping boxes
trying to find instances with the same boxe coordinates, to prove that mask rcnn can't handle the full three classes of brain tumor