In [1]:
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torchvision.models import densenet121, vgg16, resnet50, inception_v3
import glob
from torch.autograd import Variable
from efficientnet_pytorch import EfficientNet
import copy
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
from skimage import io, transform
from PIL import Image
from sklearn.utils import class_weight
from torch.autograd import Variable
from torch.optim.lr_scheduler import ReduceLROnPlateau
from dask.distributed import Client
from dask import array as da
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
BASE_DIR = '/srv/app/data'

DATA_DIR = BASE_DIR + '/data'

MODEL_DIR = BASE_DIR + '/models/'

TRAIN_DIR = DATA_DIR + '/numpy_array/stage_2_train_images_299_roi_interpolated/'
IMAGE_FORMAT = 'npy'

BATCH_SIZE = 168

LABEL_COLUMN = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']

targets = ['ID', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural', 'any']

TRAIN_LABELS = DATA_DIR + '/stage_1_train_pivoted_z.csv'

CUDA_DEVICES = [1,2,3]

BLACK_LIST_ID = ['ID_6431af929', 'ID_8da38f2e4', 'ID_0e21abf7a', 'ID_470e639ae', 'ID_d91d52bdc', 
                 'ID_dfcb69305', 'ID_5005bcb25']

files_list = os.listdir(TRAIN_DIR)

files_ids = [x.split('.')[0] for x in files_list]

In [3]:
is_cuda=False
if torch.cuda.is_available():
    is_cuda = True
print(is_cuda)    

# Detect if we have a GPU available
cuda_list = ','.join(str(c) for c in CUDA_DEVICES)
device = torch.device("cuda:{}".format(cuda_list) if torch.cuda.is_available() else "cpu")

True


# Load Test Data

In [4]:
class CustomPredictDataset(Dataset):

    def __init__(self, X, img_folder, img_ext='png', transform=None, index=None):
        """
        Args:
            X (dataframe): Dataframe with images ID.
            img_folder (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.X = X
        self.img_folder = img_folder
        self.img_ext = img_ext
        self.transform = transform
        self.index = index

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_folder, self.X.iloc[idx].ID + '.' + self.img_ext)
        #image = np.load(img_name).astype('uint8')
        image = np.load(img_name)
        
        if self.index:
            image = image[:,:,[int(self.index)]]
            image = np.repeat(image, 3, axis=2)
        if self.transform:
        
            image = self.transform(TF.to_pil_image(image))

        return image

In [6]:
X = pd.DataFrame(files_ids, columns =['ID']) 
X = X.loc[~X.ID.isin(BLACK_LIST_ID)]

X_stack = pd.DataFrame(files_ids, columns =['ID']) 
X_stack = X_stack.loc[~X_stack.ID.isin(BLACK_LIST_ID)]

In [7]:
len(X)

752796

# Load model

In [8]:
def predictProbas(model, model_name, transform, layer=None):
    global X_stack
    dataset = CustomPredictDataset(
                            X=X, 
                            img_folder=TRAIN_DIR, 
                            img_ext=IMAGE_FORMAT,
                            transform=transform[1],
                            index=layer
    )
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    outputs = torch.zeros(1, 6).to(device)
    with torch.no_grad():
        for inputs in tqdm(loader):
            inputs = inputs.to(device)
            output = model(inputs)
            if type(output) == tuple:
                output = output[0]
            output = torch.sigmoid(output)
            outputs = torch.cat((outputs, output))
    outputs = outputs[1:,:]
    labels = [item+"_"+model_name+transform[0] for item in LABEL_COLUMN]
    Y_pred = pd.DataFrame(outputs.tolist(), columns = labels)
    Y_pred = Y_pred.reset_index(drop=True)
    X_stack = X_stack.merge(Y_pred, left_index = True, right_index = True)

In [9]:
def testTimeAugmentationPredict(model, transform_list, layer=None):
    loaded_model = torch.load(MODEL_DIR+model[1])
    loaded_model.eval()
    for transform in transform_list:
        print('Transform {}'.format(str(transform)))
        predict = predictProbas(loaded_model, model[0],transform, layer)

In [10]:
def stackModelsTestTimeAugmentation(models_list, transform_list, layer=None):
    for model in models_list:
        print('Model: {}'.format(model[1]))
        predict = testTimeAugmentationPredict(model, transform_list, layer)

In [11]:
test_transf = transforms.Compose([
    transforms.ToTensor()
])

test_transfA1 = transforms.Compose([
    transforms.RandomRotation((0,360)),
    transforms.ToTensor()
])

test_transfA2 = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor()
])

test_transfA3 = transforms.Compose([
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor()
])

transforms_list = [('T1', test_transf), ('T2', test_transfA1), ('T3', test_transfA2), ('T4', test_transfA3)]

In [None]:
models = [('ResNet','FineTuningResNet50AttentionMultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.07118233637800009.pt'),
         ('DenseNet','FineTuningDensenet121MultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.06919282247931359.pt')]

stackModelsTestTimeAugmentation(models, transforms_list)

Model: FineTuningResNet50AttentionMultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.07118233637800009.pt


  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T1', Compose(
    ToTensor()
))


100%|██████████| 4481/4481 [54:23<00:00,  1.37it/s]
  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T2', Compose(
    RandomRotation(degrees=(0, 360), resample=False, expand=False)
    ToTensor()
))


100%|██████████| 4481/4481 [56:49<00:00,  1.31it/s]
  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T3', Compose(
    RandomHorizontalFlip(p=0.5)
    ToTensor()
))


100%|██████████| 4481/4481 [54:38<00:00,  1.37it/s] 
  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T4', Compose(
    RandomVerticalFlip(p=0.5)
    ToTensor()
))


100%|██████████| 4481/4481 [43:31<00:00,  1.72it/s]


Model: FineTuningDensenet121MultiTaskV2_SGDMomentumV7_WeightedMultiLabelLogLoss_imgsize299_loss0.06919282247931359.pt


  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T1', Compose(
    ToTensor()
))


100%|██████████| 4481/4481 [44:54<00:00,  1.66it/s]
  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T2', Compose(
    RandomRotation(degrees=(0, 360), resample=False, expand=False)
    ToTensor()
))


100%|██████████| 4481/4481 [1:04:36<00:00,  1.16it/s]
  0%|          | 0/4481 [00:00<?, ?it/s]

Transform ('T3', Compose(
    RandomHorizontalFlip(p=0.5)
    ToTensor()
))


 91%|█████████ | 4058/4481 [1:13:55<47:31,  6.74s/it]

In [None]:
len(X_stack) #752796

In [None]:
X_stack.to_csv(DATA_DIR + '/predicts/stage_2_train_pred02.csv', index=False)

In [None]:
X_stacked = X_stack.drop('ID', axis = 1)
Y = X_stack.merge(data, left_index = True, right_index = True)
Y = Y[['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']]
Y_labels = Y.to_numpy()
Y_labels.shape

# Train Random Forest

In [None]:
rf = RandomForestClassifier(random_state=1, n_jobs = -1, n_estimators=500)

In [None]:
rf.fit(X_stacked, Y_labels)

# Save model

In [None]:
from sklearn.externals import joblib
X_train_empty = X_stack[0:0]
joblib.dump({'rf':rf,'X_train_empty':X_train_empty}, BASE_DIR + '/models/stackingRF.pkl')