# Setup

In [None]:
# OpenSlide
!apt update && apt install -y openslide-tools
!pip install openslide-python

In [None]:
# ASAP
!wget https://github.com/computationalpathologygroup/ASAP/releases/download/ASAP-2.1/ASAP-2.1-py38-Ubuntu2004.deb
!apt install libdcmtk14
!apt install libopenslide0
!ls
!dpkg -i ASAP-2.1-py38-Ubuntu2004.deb

# add ASAP to pythonpath
import sys
asap_path = "/opt/ASAP/bin"
if asap_path not in sys.path:
    sys.path.append(asap_path);
print(sys.path);

# Patch Extraction

In [None]:
from PIL import Image
from PIL import TiffImagePlugin
import numpy as np
import cv2
import openslide
import multiresolutionimageinterface as mir
import os
import random

NORMAL_PATH = '/content/drive/MyDrive/Thesis/Normal/'
TUMOR_PATH = '/content/drive/MyDrive/Thesis/Tumor/'
TUMOR_ANN_PATH = '/content/drive/MyDrive/Thesis/Tumor_Annotations/'
TUMOR_MASK_PATH = '/content/drive/MyDrive/Thesis/Tumor_Masks/'
PATCH_POS_PATH = '/content/drive/MyDrive/Thesis/Patches_Positive/'
PATCH_NEG_PATH = '/content/drive/MyDrive/Thesis/Patches_Negative/'

MAG_FACTOR = 256          # magnification factor 
PATCHES_PER_BBOX = 20     # number of samples per bounding box
ADAPTIVE_QUANT = 0        # adapt number of samples based on box size
PATCH_SIZE = 256          # size of sampled patches
THRESH = 0.2              # % of patch that should be tumor (for tumor patches only)

LOWER_BOUND = np.array([20, 20, 20])    # colors for thresholding tissue
UPPER_BOUND = np.array([200, 200, 200])

tumor_wsis = os.listdir(TUMOR_PATH)
normal_wsis = os.listdir(NORMAL_PATH)

def read_wsi(wsi_path, mag_factor):
  wsi_full_size = openslide.OpenSlide(wsi_path)
  mag_options = wsi_full_size.level_downsamples
  mag_level = mag_options.index(mag_factor)
  wsi_scaled = np.array(wsi_full_size.read_region((0, 0), mag_level,
                                        wsi_full_size.level_dimensions[mag_level]))
  return wsi_full_size, wsi_scaled

def get_tissue_contours(wsi_scaled, lower_bound, upper_bound):
  hsv_img = cv2.cvtColor(wsi_scaled, cv2.COLOR_BGR2HSV)
  tissue_img = extract_tissue(hsv_img, lower_bound, upper_bound)
  tissue_contours, _ = cv2.findContours(tissue_img, cv2.RETR_EXTERNAL, 
                                 cv2.CHAIN_APPROX_SIMPLE)

  return tissue_contours

def extract_tissue(hsv_img, lower_bound, upper_bound):
  tissue_mask = cv2.inRange(hsv_img, lower_bound, upper_bound)

  kernel_close = np.ones((20, 20), dtype=np.uint8)
  kernel_open = np.ones((5, 5), dtype=np.uint8)

  image_closed = cv2.morphologyEx(np.array(tissue_mask), cv2.MORPH_CLOSE, 
                                 kernel_close)
  image_open = cv2.morphologyEx(np.array(image_closed), cv2.MORPH_OPEN, 
                                kernel_open)
  
  return image_open

def get_tumor_contours(mask_img):
  bw_mask = cv2.cvtColor(mask_img, cv2.COLOR_BGR2GRAY)
  tumor_contours, _ = cv2.findContours(np.array(bw_mask), cv2.RETR_EXTERNAL, 
                                      cv2.CHAIN_APPROX_SIMPLE)
  
  return tumor_contours

def get_bbox(contours):
  bounding_boxes = [cv2.boundingRect(c) for c in contours]
  bounding_boxes_big = [i for i in bounding_boxes if i[2] > 10 and i[3] > 10]
  return bounding_boxes_big


## Sample Patches From Tumor WSIs

In [None]:
# get tumor patches from tumor WSIs
def get_tumor_patches_const(wsi_full_size, mask_full_size, MAG_FACTOR, img_idx,
                            tumor_contours, PATCHES_PER_BBOX, PATCH_SIZE, THRESH):
  
  patch_idx = 0

  for bbox in get_bbox(tumor_contours):
    # tumor bounding box
    x,y,w,h = bbox
    patch_in_box = 0

    # sample until desired number of patches is reached (constant)
    while (patch_in_box < PATCHES_PER_BBOX):
      # start from a random point within downsampled bounding box
      rand_x = random.randint(x, x+w)
      rand_y = random.randint(y, y+h)

      # calculate point same point for full size WSI
      real_x = rand_x * MAG_FACTOR
      real_y = rand_y * MAG_FACTOR

      # extract mask for selected patch
      mask_patch = mask_full_size.read_region((real_x, real_y), 0, 
                                              (PATCH_SIZE, PATCH_SIZE))
      mask_patch_np = np.array(mask_patch)[:,:,0]

      # check what proportion of patch is tumor
      tumor_percent = (mask_patch_np.sum(axis=0).sum(axis=0)/255)/(PATCH_SIZE*PATCH_SIZE)
      mask_patch.close()

      # save patch only if threshold for tumor proportion is met
      if (tumor_percent > THRESH):
        wsi_patch = wsi_full_size.read_region((real_x, real_y), 0,
                                              (PATCH_SIZE, PATCH_SIZE))
        wsi_patch.save(PATCH_POS_PATH + img_idx + '_' + str(patch_idx), 'PNG')
        patch_idx += 1
        patch_in_box += 1
        wsi_patch.close()

  return 0

In [None]:
# get normal patches from tumor WSIs
def get_normalT_patches_const(wsi_full_size, mask_full_size, MAG_FACTOR, img_idx,
                            tumor_contours, PATCHES_PER_BBOX, PATCH_SIZE, 
                            THRESH, lower_bound, upper_bound):
  
  patch_idx = 0

  for bbox in get_bbox(tissue_contours):
    # tissue bounidng box
    x,y,w,h = bbox
    patch_in_box = 0

    # sample until desired number of patches is reached (constant)
    while (patch_in_box < PATCHES_PER_BBOX):
      rand_x = random.randint(x, x+w)
      rand_y = random.randint(y, y+h)

      real_x = rand_x * MAG_FACTOR
      real_y = rand_y * MAG_FACTOR

      # extract mask for selected patch
      mask_patch = mask_full_size.read_region((real_x, real_y), 0, 
                                              (PATCH_SIZE, PATCH_SIZE))
      mask_patch_np = np.array(mask_patch)[:,:,0]
      mask_patch.close()

      # only continue if patch has no cancerous tissue
      if (mask_patch_np.sum(axis=0).sum(axis=0) == 0):
        # extract selected patch
        wsi_patch = wsi_full_size.read_region((real_x, real_y), 0, 
                                                (PATCH_SIZE, PATCH_SIZE))
        hsv_patch = cv2.cvtColor(np.array(wsi_patch), cv2.COLOR_BGR2HSV)

        tissue_patch = extract_tissue(hsv_patch, lower_bound, upper_bound)

        # check what proportion of patch is tissue
        tissue_percent = (tissue_patch.sum(axis=0).sum(axis=0)/255)/(PATCH_SIZE*PATCH_SIZE)
        
        # save patch only if threshold for tissue proportion is met
        if (tissue_percent > THRESH):
          wsi_patch.save(PATCH_NEG_PATH + img_idx + '_T' + str(patch_idx), 'PNG')
          patch_idx += 1
          patch_in_box += 1
          wsi_patch.close()
          print('saving')

  return 0

In [None]:
for tumor_wsi in tumor_wsis:
  # load in mask and WSI
  wsi_full_size, wsi_scaled = read_wsi(TUMOR_PATH + tumor_wsi, MAG_FACTOR)
  mask_full_size, mask_scaled = read_wsi(TUMOR_MASK_PATH + tumor_wsi.replace('tumor', 'mask'), 
                      MAG_FACTOR)
  
  # extract tissue and tumor contours
  tissue_contours = get_tissue_contours(wsi_scaled, LOWER_BOUND, UPPER_BOUND)
  tumor_contours = get_tumor_contours(mask_scaled)

  img_idx = tumor_wsi.split('.', 1)[0].split('_', 1)[1]

  # getting tumor patches
  get_tumor_patches_const(wsi_full_size, mask_full_size, MAG_FACTOR, img_idx,
                            tumor_contours, PATCHES_PER_BBOX, PATCH_SIZE, THRESH)
  
  # getting normal patches
  get_normalT_patches_const(wsi_full_size, mask_full_size, MAG_FACTOR, img_idx,
                            tumor_contours, PATCHES_PER_BBOX, PATCH_SIZE, 
                            THRESH, LOWER_BOUND, UPPER_BOUND)

## Sample Patches from Normal WSIs

In [None]:
def get_normalN_patches_const(wsi_full_size, MAG_FACTOR, img_idx, 
                              tissue_contours, PATCHES_PER_BBOX, PATCH_SIZE, 
                              THRESH, lower_bound, upper_bound):
  
  patch_idx = 0

  for bbox in get_bbox(tissue_contours):
    x,y,w,h = bbox
    patch_in_box = 0

    while (patch_in_box < PATCHES_PER_BBOX):
      rand_x = random.randint(x, x+w)
      rand_y = random.randint(y, y+h)

      real_x = rand_x * MAG_FACTOR
      real_y = rand_y * MAG_FACTOR

      # extract selected patch
      wsi_patch = wsi_full_size.read_region((real_x, real_y), 0, 
                                              (PATCH_SIZE, PATCH_SIZE))
      hsv_patch = cv2.cvtColor(np.array(wsi_patch), cv2.COLOR_BGR2HSV)

      tissue_patch = extract_tissue(hsv_patch, lower_bound, upper_bound)

      # check what proportion of patch is tissue
      tissue_percent = (tissue_patch.sum(axis=0).sum(axis=0)/255)/(PATCH_SIZE*PATCH_SIZE)
      wsi_patch.close()

      # save patch only if threshold for tissue proportion is met
      if (tissue_percent > THRESH):
        wsi_patch.save(PATCH_NEG_PATH + img_idx + '_N' + str(patch_idx), 'PNG')
        patch_idx += 1
        patch_in_box += 1
        wsi_patch.close()

  return 0

In [None]:
for normal_wsi in normal_wsis:
  wsi_full_size, wsi_scaled = read_wsi(NORMAL_PATH + normal_wsi, MAG_FACTOR)
  tissue_contours = get_tissue_contours(wsi_scaled, LOWER_BOUND, UPPER_BOUND)

  img_idx = normal_wsi.split('.', 1)[0].split('_', 1)[1]

  get_normalN_patches_const(wsi_full_size, MAG_FACTOR, img_idx, 
                              tissue_contours, PATCHES_PER_BBOX, PATCH_SIZE, 
                              THRESH, LOWER_BOUND, UPPER_BOUND)