In [1]:
wsi_csv = '/media/yanis/LaCie/Final_execution_files/train/process_list_autogen.csv'
wsi_path = '/media/yanis/LaCie/final_training_val_set/'
patches_path = '/media/yanis/LaCie/Final_execution_files/train/patches/'
output_path = '/media/yanis/LaCie/Final_execution_files/train/features2/'

In [2]:
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
from torchvision import transforms
import h5py

class WSI_Dataset(Dataset):

	def __init__(self, csv_path):
		self.df = pd.read_csv(csv_path)
	
	def __len__(self):
		return len(self.df)

	def __getitem__(self, idx):
		return self.df['slide_id'][idx]
        
class Patches_Dataset(Dataset):
	def __init__(self,
		file_path,
		wsi,
		pretrained=False,
		custom_transforms=None,
		custom_downsample=1,
		target_patch_size=-1
		):
		self.pretrained=pretrained
		self.wsi = wsi
		self.roi_transforms = transforms.Compose([transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.485, 0.456, 0.406), 
                                                                       std = (0.229, 0.224, 0.225))])
		self.file_path = file_path
		with h5py.File(self.file_path, "r") as f:
			dset = f['coords']
			self.patch_level = f['coords'].attrs['patch_level']
			self.patch_size = f['coords'].attrs['patch_size']
			self.length = len(dset)
			if target_patch_size > 0:
				self.target_patch_size = (target_patch_size, ) * 2
			elif custom_downsample > 1:
				self.target_patch_size = (self.patch_size // custom_downsample, ) * 2
			else:
				self.target_patch_size = None
		self.summary()

	def __len__(self):
		return self.length

	def summary(self):
		hdf5_file = h5py.File(self.file_path, "r")
		dset = hdf5_file['coords']
		for name, value in dset.attrs.items():
			print(name, value)

	def __getitem__(self, idx):
		with h5py.File(self.file_path,'r') as hdf5_file:
			coord = hdf5_file['coords'][idx]
		img = self.wsi.read_region(coord, self.patch_level, (self.patch_size, self.patch_size)).convert('RGB')

		if self.target_patch_size is not None:
			img = img.resize(self.target_patch_size)
		img = self.roi_transforms(img).unsqueeze(0)
		return img, coord
        
def collate_features(batch):
	img = torch.cat([item[0] for item in batch], dim = 0)
	coords = np.vstack([item[1] for item in batch])
	return [img, coords]
    

In [3]:
def save_hdf5(output_path, asset_dict, attr_dict= None, mode='a'):
    file = h5py.File(output_path, mode)
    for key, val in asset_dict.items():
        data_shape = val.shape
        if key not in file:
            data_type = val.dtype
            chunk_shape = (1, ) + data_shape[1:]
            maxshape = (None, ) + data_shape[1:]
            dset = file.create_dataset(key, shape=data_shape, maxshape=maxshape, chunks=chunk_shape, dtype=data_type)
            dset[:] = val
            if attr_dict is not None:
                if key in attr_dict.keys():
                    for attr_key, attr_val in attr_dict[key].items():
                        dset.attrs[attr_key] = attr_val
        else:
            dset = file[key]
            dset.resize(len(dset) + data_shape[0], axis=0)
            dset[-data_shape[0]:] = val
    file.close()
    return output_path

The real time isn't showed, it took way longer (hours)

In [4]:
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from models.resnet_custom import resnet50_baseline
import openslide

wsi_bag = WSI_Dataset(wsi_csv)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = resnet50_baseline(pretrained=True)
model = model.to(device)
mode = 'w'

for item in wsi_bag.df['slide_id']:
    wsi = openslide.open_slide(wsi_path+item+'.tif')
    patches = Patches_Dataset(patches_path+item+'.h5', wsi, True, 1, 256)
    patches_loader = DataLoader(patches, 256, {'num_workers': 4, 'pin_memory': True}, collate_fn=collate_features)
    print('Bag : {} - Processing {} batches in total'.format(item,len(patches_loader)))
    for i, (batch_images,batch_coords) in enumerate(tqdm(patches_loader)):
        with torch.no_grad():
            #if i % 10 == 0:
            	#print('batch {}/{}, {} files processed'.format(i, len(patches_loader), i * 256))
            batch_images = batch_images.to(device, non_blocking=True)
            features = model(batch_images)
            features = features.cpu().numpy()
            outputs = {'features': features, 'coords': batch_coords}
            save_hdf5(output_path+item+'_features.h5', outputs, None, mode=mode)
            mode = 'a'

    file = h5py.File(output_path+item+'_features.h5', "r")
    
    features = file['features'][:]
    features = torch.from_numpy(features)
    torch.save(features, output_path+item+'_features.pt')

print("The features' computation is done.")


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_001
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_001 - Processing 32 batches in total


100%|███████████████████████████████████████████| 32/32 [00:41<00:00,  1.30s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_002
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_002 - Processing 62 batches in total


100%|███████████████████████████████████████████| 62/62 [01:03<00:00,  1.03s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_003
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_003 - Processing 93 batches in total


100%|███████████████████████████████████████████| 93/93 [02:03<00:00,  1.32s/it]


downsample [1. 1.]
downsampled_level_dim [ 96256 218624]
level_dim [ 96256 218624]
name normal_004
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_004 - Processing 22 batches in total


100%|███████████████████████████████████████████| 22/22 [00:26<00:00,  1.21s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_005
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_005 - Processing 47 batches in total


100%|███████████████████████████████████████████| 47/47 [00:56<00:00,  1.20s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 217088]
level_dim [ 97792 217088]
name normal_006
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_006 - Processing 16 batches in total


100%|███████████████████████████████████████████| 16/16 [00:16<00:00,  1.01s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_007
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_007 - Processing 58 batches in total


100%|███████████████████████████████████████████| 58/58 [01:06<00:00,  1.14s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_008
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_008 - Processing 9 batches in total


100%|█████████████████████████████████████████████| 9/9 [00:09<00:00,  1.04s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 220672]
level_dim [ 97792 220672]
name normal_009
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_009 - Processing 98 batches in total


100%|███████████████████████████████████████████| 98/98 [01:55<00:00,  1.18s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_010
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_010 - Processing 38 batches in total


100%|███████████████████████████████████████████| 38/38 [00:47<00:00,  1.24s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_011
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_011 - Processing 172 batches in total


100%|█████████████████████████████████████████| 172/172 [03:13<00:00,  1.13s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_012
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_012 - Processing 47 batches in total


100%|███████████████████████████████████████████| 47/47 [00:52<00:00,  1.11s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_013
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_013 - Processing 25 batches in total


100%|███████████████████████████████████████████| 25/25 [00:28<00:00,  1.15s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name normal_014
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_014 - Processing 34 batches in total


100%|███████████████████████████████████████████| 34/34 [00:40<00:00,  1.19s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 219648]
level_dim [ 97792 219648]
name normal_015
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : normal_015 - Processing 75 batches in total


100%|███████████████████████████████████████████| 75/75 [01:33<00:00,  1.24s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_001
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_001 - Processing 144 batches in total


100%|█████████████████████████████████████████| 144/144 [03:13<00:00,  1.35s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 219648]
level_dim [ 97792 219648]
name tumor_002
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_002 - Processing 63 batches in total


100%|███████████████████████████████████████████| 63/63 [01:52<00:00,  1.78s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_003
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_003 - Processing 109 batches in total


100%|█████████████████████████████████████████| 109/109 [02:32<00:00,  1.40s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 220672]
level_dim [ 97792 220672]
name tumor_004
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_004 - Processing 187 batches in total


100%|█████████████████████████████████████████| 187/187 [04:00<00:00,  1.29s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 219648]
level_dim [ 97792 219648]
name tumor_005
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_005 - Processing 55 batches in total


100%|███████████████████████████████████████████| 55/55 [01:25<00:00,  1.55s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_006
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_006 - Processing 241 batches in total


100%|█████████████████████████████████████████| 241/241 [05:56<00:00,  1.48s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_007
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_007 - Processing 155 batches in total


100%|█████████████████████████████████████████| 155/155 [03:24<00:00,  1.32s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_008
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_008 - Processing 180 batches in total


100%|█████████████████████████████████████████| 180/180 [03:55<00:00,  1.31s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 217088]
level_dim [ 97792 217088]
name tumor_009
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_009 - Processing 193 batches in total


100%|█████████████████████████████████████████| 193/193 [04:36<00:00,  1.43s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_011
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_011 - Processing 107 batches in total


100%|█████████████████████████████████████████| 107/107 [02:28<00:00,  1.39s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 215552]
level_dim [ 97792 215552]
name tumor_012
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_012 - Processing 86 batches in total


100%|███████████████████████████████████████████| 86/86 [02:00<00:00,  1.40s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 217088]
level_dim [ 97792 217088]
name tumor_013
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_013 - Processing 100 batches in total


100%|█████████████████████████████████████████| 100/100 [02:32<00:00,  1.52s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 214016]
level_dim [ 97792 214016]
name tumor_014
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_014 - Processing 156 batches in total


100%|█████████████████████████████████████████| 156/156 [03:36<00:00,  1.39s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 221184]
level_dim [ 97792 221184]
name tumor_016
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_016 - Processing 68 batches in total


100%|███████████████████████████████████████████| 68/68 [01:46<00:00,  1.56s/it]


downsample [1. 1.]
downsampled_level_dim [ 97792 219648]
level_dim [ 97792 219648]
name tumor_017
patch_level 0
patch_size 256
save_path ./project/execution_files/train/patches
Bag : tumor_017 - Processing 141 batches in total


100%|█████████████████████████████████████████| 141/141 [03:13<00:00,  1.37s/it]


The features' computation is done.
