In [1]:
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torchvision.models import densenet121, vgg16, resnet50, inception_v3
import glob
from torch.autograd import Variable
from efficientnet_pytorch import EfficientNet
import copy
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
from skimage import io, transform
from PIL import Image
from sklearn.utils import class_weight
from torch.autograd import Variable
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.externals.joblib import load



In [2]:
BASE_DIR = '/srv/app/data'

DATA_DIR = BASE_DIR + '/data'

MODEL_DIR = BASE_DIR + '/models/'

TEST_DIR = DATA_DIR + '/numpy_array/stage_2_test_images_299_roi_interpolated/'
IMAGE_FORMAT = 'npy'

BATCH_SIZE = 168

LABEL_COLUMN = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']

STACKING_MODEL = BASE_DIR + '/models/stackingRF.pkl'

targets = ['ID', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural', 'any']

files_list = os.listdir(TEST_DIR)

files_ids = [x.split('.')[0] for x in files_list]

CUDA_DEVICES = [1,2,3]

In [3]:
is_cuda=False
if torch.cuda.is_available():
    is_cuda = True
print(is_cuda)    

# Detect if we have a GPU available
cuda_list = ','.join(str(c) for c in CUDA_DEVICES)
device = torch.device("cuda:{}".format(cuda_list) if torch.cuda.is_available() else "cpu")

True


# Load Test Data

In [4]:
len(files_list)

78545

In [5]:
class CustomPredictDataset(Dataset):

    def __init__(self, X, img_folder, img_ext='png', transform=None, index=None):
        """
        Args:
            X (dataframe): Dataframe with images ID.
            img_folder (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.X = X
        self.img_folder = img_folder
        self.img_ext = img_ext
        self.transform = transform
        self.index = index

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_folder, self.X.iloc[idx, 0] + '.' + self.img_ext)
        #image = np.load(img_name).astype('uint8')
        image = np.load(img_name)
        
        if self.index:
            image = image[:,:,[int(self.index)]]
            image = np.repeat(image, 3, axis=2)
        if self.transform:
        
            image = self.transform(TF.to_pil_image(image))

        return image

In [6]:
X = pd.DataFrame(files_ids, columns =['ID']) 
X_stack = pd.DataFrame(files_ids, columns =['ID']) 

# Load model

In [7]:
def predictProbas(model, model_name, transform, layer=None):
    global X_stack
    dataset = CustomPredictDataset(
                            X=X, 
                            img_folder=TEST_DIR, 
                            img_ext=IMAGE_FORMAT,
                            transform=transform[1],
                            index=layer
    )
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    outputs = torch.zeros(1, 6).to(device)
    with torch.no_grad():
        for inputs in tqdm(loader):
            inputs = inputs.to(device)
            output = model(inputs)
            if type(output) == tuple:
                output = output[0]
            output = torch.sigmoid(output)
            outputs = torch.cat((outputs, output))
    outputs = outputs[1:,:]
    labels = [item+"_"+model_name+transform[0] for item in LABEL_COLUMN]
    Y_pred = pd.DataFrame(outputs.tolist(), columns = labels)
    Y_pred = Y_pred.reset_index(drop=True)
    X_stack = X_stack.merge(Y_pred, left_index = True, right_index = True)

In [8]:
def testTimeAugmentationPredict(model, transform_list, layer=None):
    loaded_model = torch.load(MODEL_DIR+model[1])
    loaded_model.eval()
    for transform in transform_list:
        print('Transform {}'.format(str(transform)))
        predict = predictProbas(loaded_model, model[0],transform, layer)

In [9]:
def stackModelsTestTimeAugmentation(models_list, transform_list, layer=None):
    for model in models_list:
        print('Model: {}'.format(model[1]))
        predict = testTimeAugmentationPredict(model, transform_list, layer)

In [10]:
test_transf = transforms.Compose([
    transforms.ToTensor()
])

test_transfA1 = transforms.Compose([
    transforms.RandomRotation((0,360)),
    transforms.ToTensor()
])

test_transfA2 = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor()
])

test_transfA3 = transforms.Compose([
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor()
])

transforms_list = [('T1', test_transf), ('T2', test_transfA1), ('T3', test_transfA2), ('T4', test_transfA3)]

In [None]:
models = [('ResNet','FineTuningResNet50AttentionMultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.07118233637800009.pt'),
         ('DenseNet','FineTuningDensenet121MultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.06919282247931359.pt')]

stackModelsTestTimeAugmentation(models, transforms_list)

Model: FineTuningResNet50AttentionMultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.07666666343915433.pt


  0%|          | 3/78545 [00:00<52:41, 24.84it/s]

Transform ('T1', Compose(
    ToTensor()
))


100%|██████████| 78545/78545 [28:59<00:00, 45.15it/s]
  0%|          | 5/78545 [00:00<28:34, 45.81it/s]

Transform ('T2', Compose(
    RandomRotation(degrees=(0, 360), resample=False, expand=False)
    ToTensor()
))


 44%|████▎     | 34266/78545 [12:42<17:35, 41.96it/s]

# Load stacking model

In [13]:
stack = load(STACKING_MODEL)
X_stacked = X_stack[stack['X_train_empty'].columns]

Y_stack = stack['rf'].predict_proba(X_stacked)

In [14]:
Y_array = np.array(Y_stack)[:,:,1]

In [None]:
Y_array

In [15]:
data = {
    LABEL_COLUMN[0]: Y_array[0,:],
    LABEL_COLUMN[1]: Y_array[1,:],
    LABEL_COLUMN[2]: Y_array[2,:],
    LABEL_COLUMN[3]: Y_array[3,:],
    LABEL_COLUMN[4]: Y_array[4,:],
    LABEL_COLUMN[5]: Y_array[5,:]
}

# Save to DataFrame

In [16]:
Y = pd.DataFrame.from_dict(data)
Y = Y.reset_index(drop=True)

In [17]:
len(Y)

78545

In [18]:
pred = X.merge(Y, left_index = True, right_index = True)
pred = pred[targets]
pred = pd.melt(pred, id_vars=['ID'], value_vars=targets[1:])
pred['ID'] = pred['ID']+'_'+pred['variable']
pred = pred.drop('variable', axis =1)
pred.columns = ['ID', 'Label']

In [19]:
len(pred) #471270

471270

In [21]:
pred.head()

Unnamed: 0,ID,Label
0,ID_8e9de75bb_epidural,0.0
1,ID_d9a58cb3f_epidural,0.0
2,ID_b63a27c4b_epidural,0.0
3,ID_822c2893f_epidural,0.0
4,ID_6e0130955_epidural,0.0


# Save to CSV

In [20]:
pred.to_csv(DATA_DIR + '/predicts/stage_2_pred02.csv', index=False)