In [1]:
import os
import sys
import torch
#import accimage
from PIL import Image
from imageio import imread
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms, set_image_backend, get_image_backend

%reload_ext autoreload
%autoreload 2

In [None]:
# https://github.com/pytorch/accimage
set_image_backend('accimage')
get_image_backend()

## ImageFolder

In [None]:
i = 3
TCGA_COAD_IMG_DIR = '/n/image_pngs/COAD/'

dirs = os.listdir(TCGA_COAD_IMG_DIR)
imgs = [d[:-4] for d in dirs]
current_img = TCGA_COAD_IMG_DIR + dirs[i] + '/' + imgs[i] + '_files'

In [None]:
# https://github.com/pytorch/examples/issues/236
current_img

In [None]:
# https://github.com/pytorch/examples/blob/42e5b996718797e45c46a25c55b031e6768f8440/imagenet/main.py#L89-L101
train_dir = current_img
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
    transforms.RandomResizedCrop(256),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize])

train_dataset = datasets.ImageFolder(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, pin_memory=True)

In [None]:
train_dataset.class_to_idx['20.0']

In [None]:
for i,img in enumerate(train_loader):
    break

In [None]:
img[0].shape

## Inception v3

In [None]:
# inception_v3 expects tensors with a size of N x 3 x 299 x 299
net = models.inception_v3(pretrained=True)
net

## New Dataset Class

In [2]:
# https://chsasank.github.io/vision/_modules/torchvision/datasets/folder.html
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

def accimage_loader(path):
    try:
        return accimage.Image(path)
    except IOError:
        # potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)

def default_loader(path):
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)

In [3]:
# https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
class TCGADataset(Dataset):
    """TCGA dataset."""

    def __init__(self, sample_annotations, root_dir, transform=None, loader=default_loader):
        """
        Args:
            sample_annot (dict): dictionary of sample names and their respective labels.
            root_dir (string): directory containing all of the samples and their respective images.
            transform (callable, optional): optional transform to be applied on the images of a sample.
        """
        self.sample_names = list(sample_annotations.keys())
        self.sample_labels = list(sample_annotations.values())
        self.root_dir = root_dir
        self.transform = transform
        self.loader = loader

    def __len__(self):
        return len(self.sample_names)

    def __getitem__(self, idx):
        slide_tiles = []
        img_dir = self.root_dir + self.sample_names[idx] + '.svs/' + self.sample_names[idx] + '_files/5.0'
        imgs = os.listdir(img_dir)
        
        #total = 0
        for im in imgs:
            path = img_dir + '/' + im
            image = self.loader(path)
            #image = imread(path)

            if self.transform is not None:
                #print(image.mode)
                image = self.transform(image)
                
            if image.shape[1] == 256 and image.shape[2] == 256:
                slide_tiles.append(image)
                #total += sys.getsizeof(image)
        
        slide = torch.stack(slide_tiles)
        #print(total/1e6)
        #slide = slide_tiles
        label = self.sample_labels[idx]
        sample = {'slide': slide, 'label': label}

        return sample

In [4]:
# https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py
# https://pillow.readthedocs.io/en/5.1.x/handbook/concepts.html#concept-modes
sample_annotations = {'TCGA-T9-A92H-01Z-00-DX3.1DE7D5ED-60F7-4645-8243-AB0C027B3ED7': 0, 
                      'TCGA-WS-AB45-01Z-00-DX1.1FD99E7A-830F-40DC-98CD-53C62C678AC6': 1,
                      'TCGA-NH-A8F8-01Z-00-DX1.0C13D583-0BCE-44F7-A4E6-5994FE97B99C': 0,
                      'TCGA-QG-A5YV-01Z-00-DX1.9B7FD3EA-D1AB-44B3-B728-820939EF56EA': 1,
                      'TCGA-QG-A5YW-01Z-00-DX1.3242285F-FA82-4A92-9D0E-951013A3C91A': 0,
                      'TCGA-QG-A5YX-01Z-00-DX1.28125B5A-B696-44AE-8A86-72E2CF7B9A6A': 1,
                      'TCGA-QG-A5Z1-01Z-00-DX2.2CE72B6A-557F-43BD-BA4C-B252E14E46EF': 0,
                      'TCGA-QG-A5Z2-01Z-00-DX2.F2352352-8F00-4BB3-8A62-8D1C1E374F95': 1,
                      'TCGA-QL-A97D-01Z-00-DX1.6B48E95D-BE3C-4448-A1AF-6988C00B7AF1': 0,
                      'TCGA-SS-A7HO-01Z-00-DX1.D20B9109-F984-40DE-A4F1-2DFC61002862': 1}
root_dir = '/n/image_pngs/COAD/'
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([transforms.ToTensor(), normalize])

In [5]:
train_set = TCGADataset(sample_annotations, root_dir, transform=transform)

In [6]:
sample = train_set.__getitem__(1)

In [7]:
sample['slide'].shape

torch.Size([1010, 3, 256, 256])

In [9]:
sample['label']

1

In [8]:
train_loader = DataLoader(train_set, batch_size=1, shuffle=True, pin_memory=True)

In [10]:
for s in train_loader:
    print(s['slide'].shape, s['label'])

torch.Size([1, 867, 3, 256, 256]) tensor([1])
torch.Size([1, 1373, 3, 256, 256]) tensor([1])
torch.Size([1, 846, 3, 256, 256]) tensor([0])
torch.Size([1, 703, 3, 256, 256]) tensor([0])
torch.Size([1, 1155, 3, 256, 256]) tensor([1])
torch.Size([1, 1010, 3, 256, 256]) tensor([1])
torch.Size([1, 1097, 3, 256, 256]) tensor([1])
torch.Size([1, 966, 3, 256, 256]) tensor([0])
torch.Size([1, 912, 3, 256, 256]) tensor([0])
torch.Size([1, 955, 3, 256, 256]) tensor([0])
