# Setup

In [None]:
# OpenSlide
!apt update && apt install -y openslide-tools
!pip install openslide-python

In [None]:
# ASAP
!wget https://github.com/computationalpathologygroup/ASAP/releases/download/ASAP-2.1/ASAP-2.1-py38-Ubuntu2004.deb
!apt install libdcmtk14
!apt install libopenslide0
!ls
!dpkg -i ASAP-2.1-py38-Ubuntu2004.deb

# add ASAP to pythonpath
import sys
asap_path = "/opt/ASAP/bin"
if asap_path not in sys.path:
    sys.path.append(asap_path);
print(sys.path);

# Patch Extraction

In [None]:
from PIL import Image
from PIL import TiffImagePlugin
import numpy as np
import cv2
import openslide
import multiresolutionimageinterface as mir
import os
import random

NORMAL_PATH = '/content/drive/MyDrive/Thesis/Normal/'
TUMOR_PATH = '/content/drive/MyDrive/Thesis/Tumor/'
TUMOR_ANN_PATH = '/content/drive/MyDrive/Thesis/Tumor_Annotations/'
TUMOR_MASK_PATH = '/content/drive/MyDrive/Thesis/Tumor_Masks/'
PATCH_POS_PATH = '/content/drive/MyDrive/Thesis/Patches_Positive/'
PATCH_NEG_PATH = '/content/drive/MyDrive/Thesis/Patches_Negative/'

MAG_FACTOR = 256          # magnification factor 
PATCHES_PER_BBOX = 20     # number of samples per bounding box
ADAPTIVE_QUANT = 0        # adapt number of samples based on box size
PATCH_SIZE = 256          # size of sampled patches
THRESH = 0.2              # % of patch that should be tumor (for tumor patches only)

tumor_wsis = os.listdir(TUMOR_PATH)
normal_wsis = os.listdir(NORMAL_PATH)

def read_wsi(wsi_path, mag_factor):
  wsi_full_size = openslide.OpenSlide(wsi_path)
  mag_options = wsi_full_size.level_downsamples
  mag_level = mag_options.index(mag_factor)
  wsi_scaled = np.array(wsi_full_size.read_region((0, 0), mag_level,
                                        wsi_full_size.level_dimensions[mag_level]))
  return wsi_full_size, wsi_scaled

def get_tissue_contours(wsi_scaled):
  hsv_img = cv2.cvtColor(wsi_scaled, cv2.COLOR_BGR2HSV)
  lower_red = np.array([20, 20, 20])
  upper_red = np.array([200, 200, 200])
  tissue_mask = cv2.inRange(hsv_img, lower_red, upper_red)

  close_kernel = np.ones((20, 20), dtype=np.uint8)
  image_close = cv2.morphologyEx(np.array(tissue_mask), cv2.MORPH_CLOSE, 
                                 close_kernel)
  open_kernel = np.ones((5, 5), dtype=np.uint8)
  image_open = cv2.morphologyEx(np.array(image_close), cv2.MORPH_OPEN, 
                                open_kernel)
  
  tissue_contours, _ = cv2.findContours(image_open, cv2.RETR_EXTERNAL, 
                                 cv2.CHAIN_APPROX_SIMPLE)

  return tissue_contours

def get_tumor_contours(mask_img):
  bw_mask = cv2.cvtColor(mask_img, cv2.COLOR_BGR2GRAY)
  tumor_contours, _ = cv2.findContours(np.array(bw_mask), cv2.RETR_EXTERNAL, 
                                      cv2.CHAIN_APPROX_SIMPLE)
  
  return tumor_contours

def get_bbox(contours):
  bounding_boxes = [cv2.boundingRect(c) for c in contours]
  bounding_boxes_big = [i for i in bounding_boxes if i[2] > 10 or i[3] > 10]
  return bounding_boxes_big



## Sample Tumor Patches

In [None]:
for tumor_wsi in tumor_wsis:
  wsi_full_size, wsi_scaled = read_wsi(TUMOR_PATH + tumor_wsi, MAG_FACTOR)
  mask_full_size, mask_scaled = read_wsi(TUMOR_MASK_PATH + tumor_wsi.replace('tumor', 'mask'), 
                      MAG_FACTOR)
  tissue_contours = get_tissue_contours(wsi_scaled)
  tumor_contours = get_tumor_contours(mask_scaled)

  patch_index = 0

  # get tumor patches
  for bbox in get_bbox(tumor_contours):
    x,y,w,h = bbox
    patch_in_box = 0

    while (patch_in_box < PATCHES_PER_BBOX):
      rand_x = random.randint(x, x+w)
      rand_y = random.randint(y, y+h)

      real_x = rand_x * MAG_FACTOR
      real_y = rand_y * MAG_FACTOR

      # extract mask for selected patch
      mask_patch = mask_full_size.read_region((real_x, real_y), 0, (PATCH_SIZE, PATCH_SIZE))
      mask_patch_np = np.array(mask_patch)[:,:,0]

      # check what proportion of patch is tumor
      tumor_percent = (mask_patch_np.sum(axis=0).sum(axis=0)/255)/(PATCH_SIZE*PATCH_SIZE)
      mask_patch.close()

      # save patch only if threshold for tumor proportion is met
      if (tumor_percent > THRESH):
        wsi_patch = wsi_full_size.read_region((real_x, real_y), 0, (PATCH_SIZE, PATCH_SIZE))
        wsi_patch.save(PATCH_POS_PATH + str(patch_index), 'PNG')
        patch_index += 1
        patch_in_box += 1
        wsi_patch.close()

## Sample Non-Tumor Patches from Normal WSIs