In [48]:
import csv
import os
import glob
import re
from pandas import DataFrame, Series
from openslide import open_slide
from PIL import Image
import timeit
import time

In [63]:
train_paths = ["../data/valid/Celiac", "../data/valid/EE", "../data/valid/Normal", 
              "../data/train/Celiac", "../data/train/EE", "../data/train/Normal"]
images = {}
images_by_folder = {}
for train_path in train_paths:
    images_by_folder[str(train_path)] = []
    files = glob.glob(os.path.join(train_path, '*.svs'))
    for fl in files:
        flbase = os.path.basename(fl)
        flbase_noext = os.path.splitext(flbase)[0]
        images[flbase_noext]=fl
        images_by_folder[str(train_path)].append(flbase_noext)

{'../data/valid/Celiac': ['C07-37_06',
  'C11-56_02',
  'C03-10_05',
  'C03-07_03',
  'C03-05_01',
  'C16-76_03',
  'C03-08_02',
  'C07-35_01',
  'C15-74_04',
  'C04-18_04',
  'C05-26_01',
  'C04-23_01',
  'C05-26_03',
  'C16-77_04',
  'C16-77_02',
  'C07-37_07',
  'C12-61_01',
  'C14-70_01',
  'C03-05_04',
  'C09-46_01',
  'C07-35_06',
  'C06-30_03',
  'C06-27_02',
  'C10-48_04',
  'C07-40_04',
  'C03-12_03',
  'C14-70_04',
  'C06-29_02',
  'C17-83_02',
  'C10-47_02',
  'C10-49_03',
  'C09-46_02',
  'C16-79_01',
  'C06-30_02',
  'C04-18_02',
  'C14-70_05',
  'C06-33_04',
  'C07-40_03',
  'C17-81_01',
  'C12-63_02',
  'C12-60_01',
  'C13-66_02',
  'C04-24_00',
  'C03-09_01'],
 '../data/valid/EE': ['130366_6717_001',
  '1',
  '34',
  '84',
  '20',
  '73',
  '130379_6731_002',
  '130377_6729_001',
  '51',
  '130375_6727_001'],
 '../data/valid/Normal': ['N16-47_01',
  'N16-39_00',
  'N16-36_01',
  'N14-19_02',
  'N14-15_03',
  'N17-58_04',
  'N15-30_01',
  'N17-50_02',
  'N15-23_03',
  'N

In [24]:
patches_dir='../data/patches_r_0/'
patches_by_disease = {
    'EE': [],
    'C': [],
    'N': []
}
patches = []
for fl in glob.glob(os.path.join(patches_dir, '*.txt')):
    flbase = os.path.basename(fl)
    patches.append(fl)
    for d in ['EE', 'C', 'N']:
        if flbase.startswith(str(d)):
            patches_by_disease[str(d)].append(fl)

In [124]:
path_disease_map = {
    "../data/valid/Celiac": 'C',
    "../data/valid/EE": 'EE',
    "../data/valid/Normal": 'N',
    "../data/train/Celiac": 'C',
    "../data/train/EE": 'EE',
    "../data/train/Normal": 'N'
}
columns = ['image_name', 'image_path', 'patch_path']
image_patch_data = DataFrame(columns=columns)
for train_path in train_paths:
    patches_for_folder = patches_by_disease[path_disease_map[str(train_path)]]
    for image_name in images_by_folder[train_path]:
        for patch in patches_for_folder:
            if path_disease_map[str(train_path)] == 'EE':
                image_from_patch = os.path.basename(patch).lower().split('_')[1].split('-')[1]
                if (patch.lower().find(image_name.lower()) != -1) and (image_from_patch == image_name.lower()):
                    temp_data = DataFrame([[image_name, images[image_name], patch]])
                    temp_data.columns = columns
                    image_patch_data = image_patch_data.append(temp_data, ignore_index=True)
                elif (os.path.basename(patch).lower().split('_')[1].split('-')[0].find(image_name.lower().split('_')[0]) != -1
                     and len(image_name) > 6):
                    temp_data = DataFrame([[image_name, images[image_name], patch]])
                    temp_data.columns = columns
                    image_patch_data = image_patch_data.append(temp_data, ignore_index=True)
            else:
                if patch.lower().find(image_name.lower()) != -1:
                    temp_data = DataFrame([[image_name, images[image_name], patch]])
                    temp_data.columns = columns
                    image_patch_data = image_patch_data.append(temp_data, ignore_index=True)
        

In [132]:
image_path_coord_map = dict((k, []) for k in list(image_patch_data['image_path'].unique()))

for index, row in image_patch_data.iterrows():
    with open(row['patch_path'],'r') as intputFile:
        reader=csv.reader(intputFile,delimiter=',')
        for x,y in reader:
            image_path_coord_map[row['image_path']].append((x, y))

In [139]:
path_change_map = {}

for key in list(image_path_coord_map.keys()):
    temp = key.replace('data', 'data_segments')
    path_change_map[key] = '/'.join(temp.split('/')[0:-1])

In [150]:
start_time = time.time()
count = 0
width_height = (256,256)
for key,value in image_path_coord_map.items():
    op_slide_img = open_slide(key)
    for val in value:
        x = int(val[0])
        y = int(val[1])
        img = op_slide_img.read_region((x,y), 0, (1000,1000))
        img = img.resize(width_height, Image.ANTIALIAS)
        jpg_img = img.convert('RGB')
        image_name = key.split('/')[-1]
        image_name = image_name.split('.')[0]
        jpg_img.save(path_change_map[key] + '/' + image_name + '_' + str(x) + '_' + str(y) + '.jpg')
    print(key + ' Saved!')
    count+=1
    print(count/451)
    stop = time.time()
    print(stop-start_time)
stop = time.time()
print(stop-start_time)

../data/valid/Celiac/C07-37_06.svs Saved!
0.0022172949002217295
205.2351496219635


KeyboardInterrupt: 