In [8]:
from __future__ import absolute_import, division, print_function, unicode_literals
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
%matplotlib inline
import numpy as np
import pandas as pd
import os, shutil, glob, sys, math, cv2, re

import segmentation_models_pytorch as smp
import albumentations as albu
from torchsummary import summary

from tqdm import tqdm

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as cp
from collections import OrderedDict
from torch.utils.model_zoo import load_url as load_state_dict_from_url
from torch import Tensor
from torch.jit.annotations import List
from torchvision import models

from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset

from sklearn.model_selection import train_test_split

In [3]:
DATA_FOLDER = "/home/Tsung/pathology/data/tcga"

# model

In [4]:
model = models.resnet18()
model.fc = nn.Linear(in_features=512, out_features=2, bias=True)
model.load_state_dict(torch.load("2020_09_22_18_52_46_Resnet18-tumor-or-nonTumor_Dataset_Zenodo.h5"))
model.cuda()
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# dataset

In [5]:
def get_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.Resize(height = 256, width = 256, always_apply=True),
    ]
    return albu.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')

# https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py, to_tensor     
def to0_1(x, **kwargs):
    return x/255

def get_preprocessing():

    _transform = [
        albu.Lambda(image=to_tensor, mask=to_tensor),
        albu.Lambda(image=to0_1, mask=to0_1),
    ]
    return albu.Compose(_transform)

In [6]:
from PIL import Image
class Dataset(BaseDataset):
    
    def __init__(self, image_array, augmentation=None, preprocessing=None):
        self.image_array = image_array
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        fp = self.image_array[i]
        
        image = Image.open(fp)
        image = np.asarray(image)
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image)
            image = sample['image']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image)
            image = sample['image']
        
        return fp, image
    
    def __len__(self):
        return len(self.image_array)

In [7]:
patches_folder = os.path.join(DATA_FOLDER, 'normalize_patches')
all_folders = sorted(os.listdir(patches_folder))

In [10]:
for number, folder_name in enumerate(all_folders):
    if folder_name[13] == '1':
        continue
    all_images = sorted(glob.glob(os.path.join(patches_folder, folder_name, "*.jpg")))
    
    test_dataset = Dataset(
        all_images,
        augmentation = get_augmentation(),
        preprocessing = get_preprocessing()
    )
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)
    
    is_tumor_image_name = np.array([])
    
    for data in test_dataloader:
        image_names, images = data
        image_names = np.array([name for name in image_names])
        
        images = images.cuda()
        
        with torch.no_grad():
            pred = model(images)
        pred = torch.softmax(pred, axis = 1)
        pred = torch.argmax(pred, axis = 1)
        pred = pred.detach().cpu().numpy()
        
        if len(pred.shape) != 1:
            pred = pred.squeeze()
            
        tp = np.where(pred == 1)[0]
        is_tumor_image_name = np.concatenate((is_tumor_image_name, image_names[tp]))
    print("{}, all_images: {}, tumor_images: {}".format(number, len(all_images), len(is_tumor_image_name)))
    np.save("{}/normalized_tumor/{}.npy".format(DATA_FOLDER, folder_name), is_tumor_image_name)

0, all_images: 4481, tumor_images: 2455
1, all_images: 2446, tumor_images: 1714
2, all_images: 1788, tumor_images: 1275
3, all_images: 3286, tumor_images: 2840
4, all_images: 1234, tumor_images: 676
5, all_images: 2424, tumor_images: 1733
6, all_images: 2583, tumor_images: 1514
7, all_images: 1242, tumor_images: 499
8, all_images: 842, tumor_images: 515
9, all_images: 752, tumor_images: 541
11, all_images: 763, tumor_images: 35
12, all_images: 313, tumor_images: 1
13, all_images: 812, tumor_images: 121
14, all_images: 370, tumor_images: 79
16, all_images: 1034, tumor_images: 685
17, all_images: 925, tumor_images: 154
19, all_images: 618, tumor_images: 433
20, all_images: 797, tumor_images: 561
21, all_images: 4004, tumor_images: 2270
22, all_images: 4084, tumor_images: 2492
24, all_images: 988, tumor_images: 463
25, all_images: 1170, tumor_images: 820
27, all_images: 1068, tumor_images: 166
28, all_images: 1526, tumor_images: 293
30, all_images: 1068, tumor_images: 866
31, all_images: 

259, all_images: 841, tumor_images: 143
260, all_images: 928, tumor_images: 135
261, all_images: 540, tumor_images: 317
262, all_images: 488, tumor_images: 239
263, all_images: 595, tumor_images: 2
264, all_images: 724, tumor_images: 539
265, all_images: 843, tumor_images: 598
266, all_images: 338, tumor_images: 242
267, all_images: 330, tumor_images: 210
268, all_images: 249, tumor_images: 81
269, all_images: 670, tumor_images: 188
270, all_images: 300, tumor_images: 57
271, all_images: 846, tumor_images: 129
272, all_images: 440, tumor_images: 33
273, all_images: 485, tumor_images: 27
274, all_images: 336, tumor_images: 32
275, all_images: 856, tumor_images: 323
276, all_images: 493, tumor_images: 267
277, all_images: 438, tumor_images: 88
278, all_images: 340, tumor_images: 124
279, all_images: 442, tumor_images: 209
280, all_images: 384, tumor_images: 80
281, all_images: 1002, tumor_images: 520
282, all_images: 764, tumor_images: 355
283, all_images: 958, tumor_images: 546
284, all

483, all_images: 7230, tumor_images: 4902
485, all_images: 12971, tumor_images: 4289
486, all_images: 14145, tumor_images: 6301
488, all_images: 7000, tumor_images: 5211
489, all_images: 8028, tumor_images: 5978
491, all_images: 953, tumor_images: 444
492, all_images: 999, tumor_images: 105
494, all_images: 11892, tumor_images: 3931
495, all_images: 13556, tumor_images: 7614
497, all_images: 4147, tumor_images: 123
498, all_images: 4556, tumor_images: 62
500, all_images: 2644, tumor_images: 1402
501, all_images: 2949, tumor_images: 1351
502, all_images: 2251, tumor_images: 49
503, all_images: 1932, tumor_images: 467
504, all_images: 1589, tumor_images: 677
505, all_images: 1436, tumor_images: 1194
506, all_images: 1649, tumor_images: 701
507, all_images: 1825, tumor_images: 614
508, all_images: 1688, tumor_images: 183
509, all_images: 1382, tumor_images: 529
510, all_images: 9285, tumor_images: 4609
511, all_images: 6183, tumor_images: 2700
512, all_images: 4528, tumor_images: 2832
513

688, all_images: 1804, tumor_images: 1402
689, all_images: 1207, tumor_images: 467
690, all_images: 3585, tumor_images: 1440
691, all_images: 1833, tumor_images: 243
692, all_images: 1493, tumor_images: 663
693, all_images: 2573, tumor_images: 1379
694, all_images: 15969, tumor_images: 1294
695, all_images: 10372, tumor_images: 418
696, all_images: 13544, tumor_images: 1444
697, all_images: 8038, tumor_images: 1058
698, all_images: 17550, tumor_images: 137
699, all_images: 11283, tumor_images: 70
700, all_images: 12507, tumor_images: 3751
701, all_images: 9737, tumor_images: 1545
702, all_images: 13405, tumor_images: 5316
703, all_images: 15156, tumor_images: 6156
704, all_images: 10294, tumor_images: 5635
705, all_images: 11284, tumor_images: 3961
706, all_images: 12523, tumor_images: 6652


KeyboardInterrupt: 

In [None]:
npy = glob.glob("/nfs/Shared/data/tcga/tumor/*.npy")
c = 0
for i in npy:
    j = np.load(i)
    c += len(j)