# Easy Patch Sampling

DESCRIPTION: This code extracts patches from a histology slide image  
INPUT: a folder of images  
OUTPUT:  
1) `all_images` : a dictionary of patches by file  
2) `OUT_DIR/%s` : a folder full of patches, arranged by original image  

Note: Level count is going to tell you how far you can zoom the image. I'm taking the highest amount you can zoom the image, then extracting patches

In [None]:
import openslide
import os
import import_ipynb
import matplotlib.pyplot as plt
from tqdm import tqdm
from patch_functions import extract_patches, determine_quality, pad

**User-adjusted Hyperparameters:**

In [None]:
FILETYPE = '.svs' # file extension of histology slides
FILE_DIR = '../../../Data/Raw/Histology/HE_IMAGES/' # location of files (all images should be in one folder)
OUT_DIR = '../../../Data/Processed/patches5000' # name of output folder
TILE_SIZE = 5000 # pixel size of image
WHITESPACE_CUTOFF = .85 #0.35 # how much whitespace will you allow: [0, 1]

## Save patches to image

In [None]:
def saveimage(all_images):

    # check for OUT_DIR
    try:
        os.listdir(OUT_DIR)
    except:
        os.mkdir(OUT_DIR)

    for key in all_images:
        try:
            os.listdir('%s/%s' % (OUT_DIR, key[:-4]))
        except:
            os.mkdir('%s/%s' % (OUT_DIR, key[:-4]))
        i = 0
    
        for img in all_images[key]:
            name = 'patch' + pad(i,len(str(len(all_images[key]))))
            img.save('%s/%s/%s.png' % (OUT_DIR, key[:-4], name),'PNG')
            i += 1

## Run Code

In [None]:
# extract all histology slides in FILE_DIR
files = os.listdir(FILE_DIR)
files = [x for x in files if FILETYPE in x]

In [None]:
# find files that already are processed
exists = os.listdir(OUT_DIR)

In [None]:
# extract patches and remove ones with too much whitespace
fail = []

for file in files:
    if file[:-4] in exists:
        continue
    try:
        all_images = {}
        m = openslide.OpenSlide('%s/%s' % (FILE_DIR, file))
        print(file, '| Image size:', m.dimensions)
    
        print('* Extracting patches...')
        d = extract_patches(m, TILE_SIZE)
    
        print('* Removing patches with too much whitespace...')
    
        all_images[file] = []
        for img in d:
            if determine_quality(img, WHITESPACE_CUTOFF):
                all_images[file].append(img)
        print(file, ':', len(all_images[file]))
        saveimage(all_images)
    except:
        print('FAILED:',file)
        fail.append(file)
            

In [None]:
print('The following files failed to compute:')
for filename in fail:
    print(filename)

## Visualize

See first 100 accepted images

In [None]:
filename = list(all_images)[0]
len(all_images[filename])

In [None]:
fig, axes = plt.subplots(10,10, figsize=(15,15) )
tot = 0

filename = list(all_images)[0]
l = len(all_images[filename])
stoploop = False

for j in range(10):
    if stoploop:
        break
    
    for i in range(10):
        axes[i,j].imshow(all_images[filename][tot])
        axes[i,j].axis('off')
        tot += 1
        if tot == l:
            stoploop = True
            break
plt.show()