To obtain a prediction of the class we :
1. we will create **several** versions of the test image using an augmentation policy
2. will use the models training-resolution as a **sliding window** to obtain a scan of the probability over multiple patches in the test image variants
3. **aggregate** the probabilities to get a final score
4. **reduce** the probabilities to yes / no melanoma.



In [1]:
import numpy as np
import pandas as pd
import os
import sys
import random
from collections import defaultdict


import pydicom
import skimage
import albumentations
import cv2
from torchvision import transforms

import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torchvision.utils import *

from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import get_image_backend

from PIL import Image, ImageOps, ImageEnhance

import natsort


from tqdm import tqdm

import timm
import pretrainedmodels
from efficientnet_pytorch import EfficientNet
from torchvision import models as tv_models
torchvision_models = dir(tv_models)


In [2]:
# use patient information: if a patient has one image with melanoma it is unlikely that the others are melanoma as well

In [3]:
#root = '/home/bramiozo/DATA/ISIC2020'
root = '/media/bramiozo/DATA-FAST/kaggle/image_classification/MEDICAL/melanoma'
os.chdir(root)

In [4]:
load_patient_mapping = pd.read_csv("TRAINING_1024/ISIC_2020_test.csv")
img_pat = load_patient_mapping[['image_name', 'patient_id']].set_index('image_name').to_dict()['patient_id']

In [5]:
num_gpu = 1
use_cpu = False
use_amp = False
begin_resize = False
stride = 280
map_sel = 2 
num_classes=2
test_count = 10982 # number of test samples
smart_mean = True
get_all_probas = False # USE TO EXTRACT MORE LABELS
submit_to_kaggle = False
debug = False

run_settings = defaultdict(dict)
run_settings['resize'] = {'resize': (300, 300), 'scan_res': 290, 'randomcrop': None, 'centercrop': None, 
                          'stride': 280, 'TTA': 7, 'proba_aggregation': 'mean_softmax',
                          'use_inference_weights': False}
run_settings['centercrop'] = {'resize': None, 'scan_res': 150, 'randomcrop': (500,300), 'centercrop': None,
                              'stride': 140, 'TTA': 7, 'proba_aggregation': 'logmean',
                              'use_inference_weights': False}

# proba_aggregation: mean_softmax, softmax_mean_linear, logmean

modelloc_dict = defaultdict(dict)

#modelloc_dict['xception']['resize'] = '../_models/melanoma/xception_DepthALL_40epochs_lastALLlayers_resized_balancingWsampler_batchsize32_optimizerAdaTuneAdam_numClass2_weightedLoss_wAugmentation_res400x400_dat_numGPU1_cross_entropy.pyth'

modelloc_dict['efficient_net_b0']['resize'] = '../_models/melanoma/good/tf_efficientnet_b0_ns_DepthALL_60epochs_lastALLlayers_resized_balancingWsampler_batchsize32_optimizerAdaTuneAdam_numClass2_weightedLoss_wAugmentation_res300x300_datTRAINING_1024_BINOMIAL_numGPU1_cross_entropy_lr1e-4.pyth'
#modelloc_dict['efficient_net_b0']['centercrop'] = '../_models/melanoma/tf_efficientnet_b0_ns_DepthALL_20epochs_lastALLlayers_randomcropped_balancingWsampler_batchsize32_optimizerAdaTuneAdam_numClass2_weightedLoss_wAugmentation_res300x300_datTRAINING_1024_BINOMIAL_numGPU1_cross_entropy.pyth'
#modelloc_dict['efficient_net_b0']['segmentation'] =

modelloc_dict['regnet_064']['resize'] = '../_models/melanoma/good/regnety_064_DepthALL_60epochs_lastALLlayers_resized_balancingWsampler_batchsize32_optimizerAdaTuneAdam_numClass2_weightedLoss_wAugmentation_res300x300_datTRAINING_1024_BINOMIAL_numGPU1_cross_entropy_lr4e-5.pyth'
modelloc_dict['regnet_064']['centercrop'] = '../_models/melanoma/good/regnety_064_DepthALL_40epochs_lastALLlayers_randomcropped_balancingWsampler_batchsize32_optimizerAdaTuneAdam_numClass2_weightedLoss_wAugmentation_res300x300_datTRAINING_1024_BINOMIAL_numGPU1_cross_entropy.pyth'
#modelloc_dict['regnet_064']['segmentation'] =



image_loc = 'TRAINING_1024/TEST'

We have a model dictionary, that is indexed as follows:
```
model_name: 
    resize:
    centercrop:
    segmentation:
```

For the ```resize``` models we resize the test images, for the ```centercrop``` models we apply a scan on the test images and for the ```segmentation``` models we need to apply the segmentation model on the images before classification. I.e. three different test image treatments, for each model, for each TTA run.

If we assume the following imagenet pre-trained models:
* 3x efficientnet; say B0, B2, B6
* 3x regnet; say 006, 032 and 16
* Exception
* InceptionV4

We end up with ```(2+#-scans) x #-TTA x #-num-models``` inference runs. I.e. with a minimum of 4 scans and a TTA of 3 we have ```18 * #-num-models``` inference runs, or ```30 * #-num-models``` inference runs.
One inference run on a RTX2080 takes about **5** minutes, i.e. about **2.5**  hours of inference **PER MODEL**.

In [6]:
# IDEA: put class_map inside the model..
class_maps = {1:{'ak': 0, 'anv': 1, 'bcc': 2, 'bkl': 3, 'df': 4, 'mel': 5, 'nv': 6, 'scc': 7,'vasc': 8},
              2:{'non-mel':0, 'mel': 1},
              3:{'ak': 0, 'anv': 1, 'bcc':2, 'bkl':3, 'df':4, 'mel':5, 'misc':6, 'nv':7, 'scc':8, 'vasc':9}
             }

class_weights = {1:{'ak': 70000/1000, 'anv': 70000/500, 'bcc': 70000/4000, 'bkl': 70000/3000, 
                    'df': 70000/500, 'mel': 70000/6000, 'nv': 70000/20000, 'scc': 70000/1000,
                    'vasc': 70000/500},
                 2:{'non-mel': 33/6, 'mel': 33/27},
                 3:{'ak': 70000/1000, 'anv': 70000/500, 'bcc': 70000/4000, 'bkl': 70000/3000, 
                    'df': 70000/500, 'mel': 70000/6000, 'nv': 70000/20000, 'scc': 70000/1000,
                    'vasc': 70000/500, 'misc': 70000/27000}
                }

class_map = class_maps[map_sel]
class_weight = class_weights[map_sel]
ind_class = inds_class = {v:k for k,v in class_map.items()}
ind_weights = {class_map[k]:v for k,v in class_weight.items()}

In [7]:
class WrappedModel(nn.Module):
    def __init__(self, module):
        super(WrappedModel, self).__init__()
        self.module = module # that I actually define.
    def forward(self, x):
        return self.module(x)      
    
class TestDataSet(Dataset):
    # https://discuss.pytorch.org/t/how-does-concatdataset-work/60083
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        all_imgs = os.listdir(main_dir)
        self.total_imgs = natsort.natsorted(all_imgs) # natsorted(all_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image, img_loc
    
class Tinterpolate:
    def __init__(self, size, random_resize_crop=False):
        self.size = size
        self.rnd_resize_crop = random_resize_crop
    def __call__(self, x):
        if self.rnd_resize_crop:
            return TF.to_tensor(TF.RandomResizedCrop(TF.to_pil_images(x), size=self.size))
        else:
            return TF.to_tensor(TF.resize(TF.to_pil_image(x), size=self.size))
        

# credits: https://www.kaggle.com/c/siim-isic-melanoma-cp=0.5, num_holes=8, max_h_size=8, max_w_size=8lassification/discussion/159476
# credits: https://www.kaggle.com/allunia/don-t-turn-into-a-smoothie-after-the-shake-up
def random_microscope(img):
    circle = cv2.circle((np.ones(img.shape) * 255).astype(np.uint8), # image placeholder
                        (img.shape[0]//2, img.shape[1]//2), # center point of circle
                        np.random.randint(img.shape[0]//1.75 - 1, img.shape[0]//1.75 + 15), # radius
                        (0, 0, 0), # color
                        -1)

    mask = circle - 255
    img = np.multiply(img, mask)
    return img

class Microscope:
    """
    Cutting out the edges around the center circle of the image
    Imitating a picture, taken through the microscope

    Args:
        p (float): probability of applying an augmentation
    """

    def __init__(self, p: float = 0.5):
        self.p = p
    
    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to apply transformation to.

        Returns:
            PIL Image: Image with transformation.
        """
        img = np.asarray(img)
        if np.random.random() <self.p:
            img = random_microscope(img)
        img = Image.fromarray(np.uint8(img))
        return img
    
    def __repr__(self):
        return f'{self.__class__.__name__}(p={self.p})'

class AutoContrast:
    def __init__(self, p: float= 0.5, cutoff: float = 0.2):
        self.p = p        
        self.cutoff = cutoff
    
    def __call__(self, img):
        if np.random.random() < self.p:
            return ImageOps.autocontrast(img, cutoff=self.cutoff)
        else:
            return img
    
    def __repr__(self):
        return f'{self.__class__.__name__}(p={self.p})'
    
class Equalize:
    def __init__(self, p: float= 0.5):
        self.p = p

    
    def __call__(self, img):
        if np.random.random() < self.p:
            return ImageOps.equalize(img)
        else:
            return img
    
    def __repr__(self):
        return f'{self.__class__.__name__}(p={self.p})'
    
class Sharpness:
    def __init__(self, p: float= 0.5, magnitude: int=3):
        self.p = p
        self.magnitude = magnitude
        self.magnitudes = np.linspace(0.1, 1.9, 11)
    
    def __call__(self, img):
        if np.random.random() < self.p:
            return ImageEnhance.Sharpness(img)\
                                .enhance(np.random.uniform(self.magnitudes[self.magnitude], 
                                                        self.magnitudes[self.magnitude+1]))
        else:
            return img
    
    def __repr__(self):
        return f'{self.__class__.__name__}(p={self.p})'

class GaussianBlur:
    def __init__(self, p=0.5, ksize=3, sigma=1):
        self.p = p
        self.ksize = ksize
        self.sigma = sigma
    
    def __call__(self, img):
        if np.random.random()<self.p:  
            img = np.array(img)
            res = albumentations.augmentations.transforms.F.gaussian_blur(img, self.ksize)
            return res 
        else:
            return img        

    def __repr__(self):        
        return f'{self.__class__.__name__}(p={self.p})'
# albumentations.augmentations.transforms.Cutout(num_holes=5, 
#max_h_size=32, max_w_size=32, fill_value=0, always_apply=False, p=0.2)
#from albumentations.augmentations.transforms import Cutout as AlbCutout

class CutOut():
    def __init__(self, p: float=0.5, min_holes: int=2, max_holes: int=6, 
                 fill_value: int=0, max_height: int=32, max_width: int=32,
                 min_height: int=8, min_width: int=8):
        self.p = p
        self.min_holes = min_holes
        self.max_holes = max_holes
        self.fill_value = fill_value
        self.max_height = max_height
        self.max_width = max_width
        self.min_height = min_height
        self.min_width = min_width
        
    def __call__(self, img):
        if np.random.random()<self.p:
            
            img = np.array(img)
            height, width = img.shape[:2]

            holes = []
            for _n in range(np.random.randint(self.min_holes, self.max_holes)):
                hole_height = np.random.randint(self.min_height, self.max_height)
                hole_width = np.random.randint(self.min_width, self.max_width)

                y1 = np.random.randint(0, height - hole_height)
                x1 = np.random.randint(0, width - hole_width)
                y2 = y1 + hole_height
                x2 = x1 + hole_width
                holes.append((x1, y1, x2, y2))
            
            res = albumentations.augmentations.transforms.F.cutout(img, holes, self.fill_value)
            return res # return Image.fromarray(res)
        else:
            return img
        
    def __repr__(self):
        return f'{self.__class__.__name__}(p={self.p})'
    
class Hair:
    """
    Impose an image of a hair to the target image

    Args:
        hairs (int): maximum number of hairs to impose
        hairs_folder (str): path to the folder with hairs images
    """

    def __init__(self, hairs: int = 1, p: float = 0.1, scale: float = 0.1, hairs_folder: str = ""):
        self.p = p
        self.scale=0.10
        self.hairs = hairs
        self.hairs_folder = hairs_folder

    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to draw hairs on.

        Returns:
            PIL Image: Image with drawn hairs.
        """
        
        if np.random.random()< self.p:
            
            '''
            scale_percent = self.scale*100 # percent of original size
            width = int(img.shape[1] * scale_percent / 100)
            height = int(img.shape[0] * scale_percent / 100)
            dim = (width, height)
            # resize image
            img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)            
            '''
            n_hairs = np.random.randint(1, self.hairs)

            if not n_hairs:
                return img                
            
            
            #img = TF.resize(img, size=250)
            img = np.array(img)
            
            height, width, _ = img.shape  # target image width and height
            hair_images = [im for im in os.listdir(self.hairs_folder) if 'png' in im]

            for _ in range(n_hairs):
                hair = cv2.imread(os.path.join(self.hairs_folder, np.random.choice(hair_images)))
                hair = cv2.flip(hair, np.random.choice([-1, 0, 1]))
                hair = cv2.rotate(hair, np.random.choice([0, 1, 2]))
                hair = cv2.resize(hair, (width-50, height-50))
                

                h_height, h_width, _ = hair.shape  # hair image width and height
                print(img.shape, hair.shape)
                roi_ho = np.random.randint(0, img.shape[0] - hair.shape[0])
                roi_wo = np.random.randint(0, img.shape[1] - hair.shape[1])
                roi = img[roi_ho:roi_ho + h_height, roi_wo:roi_wo + h_width]

                # Creating a mask and inverse mask
                img2gray = cv2.cvtColor(hair, cv2.COLOR_BGR2GRAY)
                ret, mask = cv2.threshold(img2gray, 10, 255, cv2.THRESH_BINARY)
                mask_inv = cv2.bitwise_not(mask)
                # Now black-out the area of hair in ROI
                img_bg = cv2.bitwise_and(roi, roi, mask=mask_inv)
                # Take only region of hair from hair image.
                hair_fg = cv2.bitwise_and(hair, hair, mask=mask)
                # Put hair in ROI and modify the target image
                dst = cv2.add(img_bg, hair_fg)

                #img.setflags(write=1)
                img[roi_ho:roi_ho + h_height, roi_wo:roi_wo + h_width] = dst
                
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)        
            return img
        else:
            return img

    def __repr__(self):
        return f'{self.__class__.__name__}(hairs={self.hairs}, hairs_folder="{self.hairs_folder}")'





In [8]:
gpu_str = ",".join([str(i) for i in range(num_gpu)])
gpu_str = "" if use_cpu else gpu_str
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_str
GPU_COUNT = torch.cuda.device_count() 

if GPU_COUNT>1:
    device = torch.device("cuda" if (torch.cuda.is_available()) and (use_cpu==False) else "cpu")
else:
    device = torch.device("cuda:0" if (torch.cuda.is_available()) and (use_cpu==False) else "cpu")
    
if num_gpu>GPU_COUNT:
    raise Exception("We see only {} instead of {} GPU's".format(GPU_COUNT, num_gpu))
    
if torch.cuda.is_available():
    print("{} GPU's Detected".format(GPU_COUNT))
else:
    print("GPU is NOT detected!")

1 GPU's Detected


In [9]:
torchvision_models = ['shufflenetv2', 'wide_resnet50_2', 'wide_resnet101_2', 'squeezenet1_1', 
                      'resnext50_32x4d', 'resnext101_32x8d', 'shufflenet_v2_x2_0',
                      'mnasnet1_3']

pretrainedmodels_models = ['inceptionv4', 'resnet152', 'nasnetamobile', 'xception']

efficientnet_models = ['efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 
                       'efficientnet-b3', 'efficientnet-b4', 'efficientnet-b5',
                       'efficientnet-b6', 'efficientnet-b7', 'efficientnet-b8']

timm_models = ['tf_efficientnet_b8', 'tf_efficientnet_b8_ap', 'mixnet_xl', 'gluon_xception65', 'xception',
               'tf_efficientnet_b7_ns', 'tf_efficientnet_b6_ns', 'tf_efficientnet_b5_ns', 
               'tf_efficientnet_b4_ns', 'tf_efficientnet_b3_ns', 'tf_efficientnet_b2_ns',
               'tf_efficientnet_b1_ns', 'tf_efficientnet_b0_ns',
               'tf_efficientnet_l2_ns_475',
               'seresnext101_32x4d', 'resnext101_64x4d', 'regnety_002', 'regnety_004',
               'regnety_006', 'regnety_008', 'regnety_016', 'regnety_032', 'regnety_040',
               'regnety_064', 'regnety_080', 'regnety_120', 'regnety_160', 'regnety_320',
               'tf_mobilenetv3_large_100', 'efficientnet_b1_pruned', 'efficientnet_b2_pruned',
               'efficientnet_b3_pruned', 'densenet264d_iabn', 'mobilenetv2_120d',
               'mobilenetv3_large_100', 'mixnet_m', 'mixnet_l']


In [10]:
def load_base_model(model_name):
    print(model_name)
    if model_name in torchvision_models:
        print("Pulling model from torchvision model zoo")
        return eval('tv_models.'+model_name+'()')
    elif model_name.lower() in efficientnet_models:    
        print("Pulling model from efficientnet model zoo")
        return EfficientNet.from_pretrained(model_name.lower(), advprop=False)
    elif model_name in pretrainedmodels_models:
        print("Pulling model from pretrainedmodels model zoo")
        return eval('pretrainedmodels.'+model_name+'()')
    elif model_name in timm_models:
        print("Pulling model from timm model zoo")
        return timm.create_model(model_name, pretrained=True)
    else:
        raise ValueError("No model found under the name {}".format(model_name))

In [11]:
def create_model_dict(modelloc_dict):
    model_dict = defaultdict(lambda: defaultdict(object))
    for _arch, m in modelloc_dict.items():
        for _traintype in m.keys():
            model = torch.load(modelloc_dict[_arch][_traintype], 
                                   map_location=torch.device(str(device))) 
                
            #for _p in model.named_children():
            #    pass    
            
            if 'WrappedModel' in str(type(model)):
                try:
                    print('load model..')
                    model = torch.load(modelloc_dict[_arch][_traintype], 
                                       map_location=torch.device(str(device)))    
                    model = WrappedModel(model.module)
                    print('load model...')
                    _mod = torch.load(modelloc_dict[_arch][_traintype], 
                                      map_location=torch.device(str(device)))
                    _mod.module.eval()
                    print('loading weights...')
                    state_dict = _mod.module.state_dict()
                    #state_dict.pop('_fc.weight')
                    #state_dict.pop('_fc.bias')

                    if num_gpu>1:
                        model= nn.DataParallel(model)        
                    model.to(device)

                    model.module.load_state_dict(state_dict)
                    #num_classes = model.module._fc.out_features
                except Exception as e:
                    print("Failed strategy 1, {}".format(e))    
                    try:            
                        model = torch.load(modelloc_dict[_arch][_traintype], 
                                           map_location=torch.device(str(device)))
                        model = WrappedModel(model.module)

                        if num_gpu>1:
                            model= nn.DataParallel(model) 
                        model.to(device)           

                        model.module.load_state_dict(torch.load(modelloc_dict[_arch][_traintype]),
                                                     map_location=torch.device(str(device)))
                        #num_classes = model.module._fc.out_features
                    except Exception as e:                
                        print("Problem loading model!, {}".format(e))
                    
                model = model.module
                        
            if use_amp:
                print("Import model assuming AMP APEX optimisation")
                from apex import amp
                checkpoint = torch.load(modelloc_dict[_arch][_traintype])
                model = load_base_model(_arch)
                model.to(device)
                model, _ = amp.initialize(model, None, opt_level='O1')
                model.load_state_dict(checkpoint['model'])
                amp.load_state_dict(checkpoint['amp'])     

            model_dict[_arch][_traintype]= model
    return model_dict

In [12]:
model_dict = create_model_dict(modelloc_dict)

load model..
load model...
loading weights...


In [13]:
# dataloader stuffs
#
def create_loader(**kwargs):
    centercrop = kwargs['centercrop']
    randomcrop = kwargs['randomcrop']
    resize = kwargs['resize']
    
    img_transforms = [
                              transforms.RandomGrayscale(p=0.1),
                              Equalize(p=0.2),
                              AutoContrast(p=0.4, cutoff=0.2),
                              Sharpness(p=0.1, magnitude=3),
                              transforms.RandomHorizontalFlip(),
                              transforms.RandomVerticalFlip(),
                              transforms.RandomAffine(degrees=60, 
                                                      translate=(0.05, 0.05),
                                                      shear=15,
                                                      resample=Image.BILINEAR),
                              transforms.ColorJitter(brightness=0.25, contrast=0.15, 
                                                    saturation=0.15, hue=0.15),                              
                              Microscope(p=0.25),
                              GaussianBlur(p=0.25, ksize=5),  
                              CutOut(p=0.4, min_holes=4, max_holes=24, 
                                 fill_value=0, max_height=32, max_width=32,
                                    min_height=16, min_width=16)
                              ]
        
    transform_list = img_transforms + [
                         transforms.ToTensor(),
                         transforms.RandomErasing(p=0.3, scale=(0.025, 0.05), ratio=(0.05, .1), 
                                                  value=0, inplace=False),
                         transforms.RandomErasing(p=0.2, scale=(0.025, 0.05), ratio=(4, 10), 
                                                  value=0, inplace=False),
                         transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225],)]
    if begin_resize:
        opt = [transforms.CenterCrop(centercrop)] if centercrop is not None \
                                                  else [transforms.Resize(randomcrop[0]),
                                                        transforms.RandomCrop(randomcrop[1],
                                                                              padding=0,
                                                                              pad_if_needed=True)] \
                                                    if randomcrop is not None \
                                                        else [resizer]
        transform_list = opt + transform_list
    else:
        opt = [transforms.ToPILImage(), transforms.CenterCrop(centercrop), transforms.ToTensor()] \
                                if centercrop is not None \
                                    else [transforms.ToPILImage(),
                                          transforms.Resize(randomcrop[0]),
                                          transforms.RandomCrop(randomcrop[1],
                                                                padding=0,
                                                                pad_if_needed=True),
                                          transforms.ToTensor()] \
                                        if randomcrop is not None \
                                        else [Tinterpolate(resize, False)]
        transform_list = transform_list + opt


    transformer = transforms.Compose(transform_list)


    dataset = TestDataSet(image_loc,transform=transformer)

    return torch.utils.data.DataLoader(dataset, 
                                              num_workers = 16,
                                              shuffle = False,
                                              sampler = None,
                                              drop_last = False, 
                                              pin_memory = True if device=='cuda:0' else False)



In [14]:
loader_dict = defaultdict(object)
for _arch, _kwargs in run_settings.items():
    loader_dict[_arch] = create_loader(**_kwargs)

In [15]:
def model_convolve(img, stride, scan_res, model, num_classes, padding=None):
        
    assert scan_res < np.min(img.shape[2:4]), 'Scan resolution is higher than the smallest image dimension'
    
    kx = int(np.floor(img.shape[2]/stride))
    ky = int(np.floor(img.shape[3]/stride))        
    k =  kx * ky
    output = np.zeros((k, num_classes))

        
    w = np.linspace(0, kx-1, kx).astype(int)
    h = np.linspace(0, ky-1, ky).astype(int)
    
    W,H = np.meshgrid(w,h)
        
    for _k in range(k):
        i = _k%kx
        j = int(np.floor(_k/kx)) 
        kxi = W[j,i]
        kyj = H[j,i]
        win_xl, win_xr  = stride*kxi, stride*kxi+scan_res
        win_yd, win_yu  = stride*kyj, stride*kyj+scan_res
        
        _scan = img[:, :, win_xl:win_xr, win_yd:win_yu]
        output[_k,:] = model(_scan.to(device)).to('cpu')
    return output
    

```

# suggestion 1: count ranks, i.e. majority vote
cnts = np.zeros((num_classes,))
for i in results.shape[0]:    
    cnts[np.argmax(results[i,:])] += 1
  
# suggestion 2: unweighted score summing
score_sum = np.zeros((num_classes,))
for i in results.shape[0]:    
    cnts[:] = cnts[:] + results[i,:]

# suggestion 3: unweighted softmax summing
score_sum = np.zeros((num_classes,))
for i in results.shape[0]:    
    cnts[:] = cnts[:] + _softmax(results[i,:])
    
# add higher weights to higher score entropy, or based on patch location?
..
..
..
```



In [16]:
def _softmax(x):
    # https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python
    """Compute softmax values for each set of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

rnd_choice_list = [v for k,v in class_map.items() if k!='mel']
def agg_results(x, how='mean_softmax', thres=0.75, mel_ind=None, 
                weighted=False, inference_weights=None, all_probas=False): 
    assert isinstance(mel_ind, int), 'Index of melanoma class not given'
           
    if how=='mean_softmax':
        probs = np.zeros((x.shape[1],))
        if weighted==False:
            for k in range(x.shape[0]):  
                probs = probs + _softmax(x[k, :])            
        else:
            for k in range(x.shape[0]):  
                probs = probs + _softmax(x[k, :]*inference_weights) 
        probs = probs/(k+1)  
        if all_probas:
            return probs
        else:
            return probs[mel_ind]
    elif how=='softmax_mean_linear':
        if weighted==False:
            probs = _softmax(np.mean(x, axis=0))
        else:
            probs = _softmax(np.mean(x, axis=0)*inference_weights)
            
        if all_probas:
            return probs
        else:
            return probs[mel_ind]
    elif how=='logmean':
        probs = np.zeros((x.shape[1],))
        if weighted==False:
            for k in range(x.shape[0]):  
                probs = probs + np.log(_softmax(x[k, :]))          
        else:
            for k in range(x.shape[0]):  
                probs = probs + _softmax(x[k, :]*inference_weights) 
        probs = np.exp(probs/(k+1))  
        if all_probas:
            return probs
        else:
            return probs[mel_ind]

def smart_mean(x):    
    xw = np.abs(x.values - 0.5)     
    wm = np.dot(x.values.T, xw)
    if x.shape[1]>1:        
        return pd.Series(np.diag(wm), index=x.columns)/np.sum(xw)
    else:
        return wm[0][0]/sum(np.sum(xw))
    
def log_mean(x):
    # only for aggregation (i.e. columnwise)
    return np.exp(np.mean(np.log(x)))
    
    
def model_run(model, num_classes, data_loader, _archname, traintype, smart_mean, get_all_probas, **kwargs):   
    stride = kwargs['stride']
    scan_res = kwargs['scan_res']
    TTA = kwargs['TTA']
    proba_aggregation = kwargs['proba_aggregation']
    use_inference_weights = kwargs['use_inference_weights']
    
    res_list = []
    model.eval()
    #class_count = defaultdict(int)
    proba_sum = 0
    with torch.no_grad():
        for _m in range(TTA):
            print("Starting on TTA round {}".format(_m))
            for idx, data in tqdm(enumerate(data_loader, 0)):
                
                # 
                # get image size tuple
                im_size = data[0].shape
                im_loc = data[1]    
                im_name = im_loc[0].split("/")[-1].strip("\.jpg")
                try:
                    results = model_convolve(data[0], stride, scan_res, model, num_classes, padding=None)
                except Exception as e:
                    print("Convolving inference failed: {}. On image: {}".format(e, im_name))
                    
                res = agg_results(results, how=proba_aggregation, 
                                  mel_ind=class_map['mel'], inference_weights=ind_weights,
                                  weighted=use_inference_weights, all_probas=get_all_probas)      

                if get_all_probas:
                    tdict = {ind_class[idx]:_p for idx, _p in enumerate(res)}
                    tdict['image_name'] = im_name
                    tdict['tta_run'] = _m
                    res_list.append(tdict)                                        
                else:
                    res_list.append({'image_name': im_name,
                                     'target': res,
                                     'tta_run': _m})
                proba_sum += res
                if (idx % 50 == 49) & (debug==True):
                    break                
                if idx % 2500 == 0:
                    print("Mean melanoma probability is: {}".format(100*proba_sum/(idx+1+_m*test_count)))
                    
                
            res_df = pd.DataFrame(res_list)

            #fileString = "PREDICTIONS/"+image_loc.split("/")[0]+"_"+_archname+"_"+traintype+"_TTA"+str(_m)+"_smartmean.csv"
            #res_df_sm = res_df.groupby('image_name').apply(smart_mean).reset_index()
            #if get_all_probas==False:
            #    res_df_sm[['image_name', 'target']].to_csv(fileString, sep=",", index=False)
            #res_df_sm['arch'] = _archname+"_"+traintype+"_TTA"+str(_m)

            fileString = "PREDICTIONS/"+image_loc.split("/")[0]+"_"+_archname+"_"+traintype+"_TTA"+str(_m)+"_normmean.csv"
            res_df_norm = res_df.groupby('image_name').mean().reset_index()
            if get_all_probas==False:
                res_df_norm[['image_name', 'target']].to_csv(fileString, sep=",", index=False)
            res_df_norm['arch'] = _archname+"_"+traintype+"_TTA"+str(_m)    

    if smart_mean:
        return res_df_sm
    else:
        return res_df_norm        
            

In [17]:
cnt = 0
for _archname in model_dict.keys():       
    for _traintype, _model in model_dict[_archname].items(): 
        print("architecture: {}, traintype: {}".format(_archname, _traintype))
        _kwargs = run_settings[_traintype]
        _res = model_run(_model, 
                         num_classes,
                         loader_dict[_traintype], 
                         _archname, 
                         _traintype, 
                         smart_mean,
                         get_all_probas,
                         **_kwargs)
        tot_res = pd.concat([tot_res, _res]) if cnt>0 else _res
        cnt = cnt + 1

architecture: efficient_net_b0, traintype: resize
Starting on TTA round 0


4it [00:00,  2.96it/s]

Mean melanoma probability is: 8.856242911556992e-05


2505it [00:54, 45.57it/s]

Mean melanoma probability is: 9.120822789947058


5008it [01:47, 43.62it/s]

Mean melanoma probability is: 9.332921707615363


7510it [02:39, 48.04it/s]

Mean melanoma probability is: 9.16391138130003


10010it [03:32, 46.68it/s]

Mean melanoma probability is: 9.170760453275706


10982it [03:53, 47.11it/s]


Starting on TTA round 1


5it [00:00,  3.47it/s]

Mean melanoma probability is: 9.185584270999806


2505it [00:53, 49.78it/s]

Mean melanoma probability is: 9.185112719117683


5005it [01:46, 45.57it/s]

Mean melanoma probability is: 9.1739723374001


7510it [02:39, 45.52it/s]

Mean melanoma probability is: 9.096594332056153


10006it [03:32, 45.73it/s]

Mean melanoma probability is: 9.085085999110804


10982it [03:53, 47.07it/s]


Starting on TTA round 2


4it [00:00,  3.98it/s]

Mean melanoma probability is: 9.08108951363426


2511it [00:53, 49.23it/s]

Mean melanoma probability is: 9.078929987691522


5006it [01:46, 47.66it/s]

Mean melanoma probability is: 9.132770503272027


7505it [02:39, 44.28it/s]

Mean melanoma probability is: 9.08971650824337


10007it [03:32, 45.03it/s]

Mean melanoma probability is: 9.133244211928577


10982it [03:52, 47.14it/s]


Starting on TTA round 3


5it [00:00,  3.44it/s]

Mean melanoma probability is: 9.118903545762253


2506it [00:53, 46.36it/s]

Mean melanoma probability is: 9.120672653968564


5007it [01:46, 43.29it/s]

Mean melanoma probability is: 9.15098083075978


7506it [02:39, 44.84it/s]

Mean melanoma probability is: 9.135763995539241


10009it [03:32, 46.10it/s]

Mean melanoma probability is: 9.138740478073705


10982it [03:53, 47.03it/s]


Starting on TTA round 4


4it [00:00,  3.95it/s]

Mean melanoma probability is: 9.12322884392314


2509it [00:54, 48.83it/s]

Mean melanoma probability is: 9.143689566394304


5007it [01:46, 46.54it/s]

Mean melanoma probability is: 9.164154454030834


7508it [02:39, 45.89it/s]

Mean melanoma probability is: 9.155958299452138


10006it [03:33, 45.90it/s]

Mean melanoma probability is: 9.164570607521487


10982it [03:53, 47.02it/s]


Starting on TTA round 5


5it [00:00,  3.37it/s]

Mean melanoma probability is: 9.175806614522273


2507it [00:53, 47.36it/s]

Mean melanoma probability is: 9.179868805882236


5006it [01:46, 47.70it/s]

Mean melanoma probability is: 9.179959512906784


7509it [02:39, 45.53it/s]

Mean melanoma probability is: 9.169173050226537


10009it [03:32, 45.23it/s]

Mean melanoma probability is: 9.154902957904397


10982it [03:52, 47.19it/s]


Starting on TTA round 6


5it [00:00,  3.52it/s]

Mean melanoma probability is: 9.161287050560867


2508it [00:53, 47.99it/s]

Mean melanoma probability is: 9.176446258532538


5007it [01:46, 46.23it/s]

Mean melanoma probability is: 9.191389477362835


7507it [02:39, 45.30it/s]

Mean melanoma probability is: 9.167757999601898


10009it [03:33, 47.80it/s]

Mean melanoma probability is: 9.173158584891844


10982it [03:53, 47.02it/s]


architecture: regnet_064, traintype: resize
Starting on TTA round 0


2it [00:00,  4.13it/s]

Mean melanoma probability is: 0.02587336026214255


2506it [01:08, 38.33it/s]

Mean melanoma probability is: 8.276872406838363


5005it [02:16, 37.42it/s]

Mean melanoma probability is: 8.293760510016655


7504it [03:23, 36.27it/s]

Mean melanoma probability is: 8.116651561905545


10007it [04:31, 35.75it/s]

Mean melanoma probability is: 8.069944856971507


10982it [04:57, 36.86it/s]


Starting on TTA round 1


3it [00:00,  4.21it/s]

Mean melanoma probability is: 8.085896725785458


2507it [01:08, 37.12it/s]

Mean melanoma probability is: 8.092514396525997


5008it [02:16, 36.42it/s]

Mean melanoma probability is: 8.131652605889593


7506it [03:24, 37.09it/s]

Mean melanoma probability is: 8.064943603851468


10008it [04:31, 34.31it/s]

Mean melanoma probability is: 8.071639500918302


10982it [04:58, 36.84it/s]


Starting on TTA round 2


4it [00:00,  3.32it/s]

Mean melanoma probability is: 8.099461079336882


2506it [01:08, 38.34it/s]

Mean melanoma probability is: 8.079021041282477


5004it [02:16, 36.37it/s]

Mean melanoma probability is: 8.082975596464227


7505it [03:24, 34.17it/s]

Mean melanoma probability is: 8.039726531789245


10007it [04:32, 37.11it/s]

Mean melanoma probability is: 8.027674675790475


10982it [04:58, 36.76it/s]


Starting on TTA round 3


4it [00:00,  3.35it/s]

Mean melanoma probability is: 8.046550693277705


2508it [01:08, 37.45it/s]

Mean melanoma probability is: 8.025807345808254


5007it [02:16, 33.54it/s]

Mean melanoma probability is: 8.040995731215219


7506it [03:24, 36.81it/s]

Mean melanoma probability is: 8.04107889108446


10005it [04:32, 37.05it/s]

Mean melanoma probability is: 8.036981393542696


10982it [04:58, 36.75it/s]


Starting on TTA round 4


4it [00:00,  3.52it/s]

Mean melanoma probability is: 8.052471508893616


2508it [01:08, 37.31it/s]

Mean melanoma probability is: 8.049803000039516


5007it [02:16, 37.01it/s]

Mean melanoma probability is: 8.060683880445621


7506it [03:24, 37.32it/s]

Mean melanoma probability is: 8.044515682468221


10005it [04:32, 37.39it/s]

Mean melanoma probability is: 8.036464981701924


10982it [04:59, 36.65it/s]


Starting on TTA round 5


3it [00:00,  3.21it/s]

Mean melanoma probability is: 8.04845113375143


2507it [01:08, 38.35it/s]

Mean melanoma probability is: 8.047036690353362


5008it [02:16, 35.31it/s]

Mean melanoma probability is: 8.050891887818638


7507it [03:23, 36.98it/s]

Mean melanoma probability is: 8.040173012840723


10008it [04:31, 35.88it/s]

Mean melanoma probability is: 8.032276323131933


10982it [04:58, 36.80it/s]


Starting on TTA round 6


4it [00:00,  3.26it/s]

Mean melanoma probability is: 8.03888119183775


2507it [01:08, 33.41it/s]

Mean melanoma probability is: 8.035373500581407


5006it [02:16, 32.49it/s]

Mean melanoma probability is: 8.037723440559859


7506it [03:24, 37.20it/s]

Mean melanoma probability is: 8.038168394904892


10005it [04:32, 36.78it/s]

Mean melanoma probability is: 8.042975126313939


10982it [04:59, 36.71it/s]


architecture: regnet_064, traintype: centercrop
Starting on TTA round 0


2it [00:00,  2.47it/s]

Mean melanoma probability is: 1.2162251031928049


2502it [03:42, 11.37it/s]

Mean melanoma probability is: 3.271253047686172


5002it [07:21, 11.34it/s]

Mean melanoma probability is: 3.3540365338356164


7503it [11:07, 10.32it/s]

Mean melanoma probability is: 3.296817852471426


10003it [14:59, 11.11it/s]

Mean melanoma probability is: 3.3069374169086125


10982it [16:31, 11.08it/s]


Starting on TTA round 1


2it [00:00,  2.34it/s]

Mean melanoma probability is: 3.3084755668808215


2502it [03:54, 10.45it/s]

Mean melanoma probability is: 3.3163114494234294


5002it [07:45, 10.69it/s]

Mean melanoma probability is: 3.3269484194670156


7502it [11:36, 10.61it/s]

Mean melanoma probability is: 3.2901141271549608


10003it [15:24, 11.34it/s]

Mean melanoma probability is: 3.3101019638488265


10982it [16:54, 10.83it/s]


Starting on TTA round 2


1it [00:00,  2.05it/s]

Mean melanoma probability is: 3.308221882253626


2501it [03:44, 10.68it/s]

Mean melanoma probability is: 3.3181102853064735


5002it [07:36, 10.64it/s]

Mean melanoma probability is: 3.328741403932561


7503it [11:28, 11.07it/s]

Mean melanoma probability is: 3.319034971304157


10003it [15:17, 11.26it/s]

Mean melanoma probability is: 3.3203381829849556


10982it [16:44, 10.94it/s]


Starting on TTA round 3


2it [00:00,  2.42it/s]

Mean melanoma probability is: 3.322226324032766


2502it [03:43, 11.49it/s]

Mean melanoma probability is: 3.3273839116082025


5003it [07:25, 11.17it/s]

Mean melanoma probability is: 3.3335130978266694


7502it [11:07, 10.53it/s]

Mean melanoma probability is: 3.330889940540361


10002it [14:55, 10.58it/s]

Mean melanoma probability is: 3.330232644913129


10982it [16:25, 11.14it/s]


Starting on TTA round 4


2it [00:00,  2.50it/s]

Mean melanoma probability is: 3.3341358131970136


2503it [03:49, 11.26it/s]

Mean melanoma probability is: 3.343403534605884


5003it [07:39, 11.11it/s]

Mean melanoma probability is: 3.352068079257254


7502it [11:28, 10.10it/s]

Mean melanoma probability is: 3.3463686503699988


10003it [15:17, 11.47it/s]

Mean melanoma probability is: 3.3422524244466936


10982it [16:48, 10.89it/s]


Starting on TTA round 5


2it [00:00,  2.21it/s]

Mean melanoma probability is: 3.3445393801664856


2502it [03:52, 10.76it/s]

Mean melanoma probability is: 3.3485135746663137


5002it [07:44, 10.45it/s]

Mean melanoma probability is: 3.3516259561042037


7502it [11:31, 11.17it/s]

Mean melanoma probability is: 3.34404802507075


10002it [15:10, 11.44it/s]

Mean melanoma probability is: 3.3405691292681423


10982it [16:36, 11.03it/s]


Starting on TTA round 6


2it [00:00,  2.36it/s]

Mean melanoma probability is: 3.3409748146040354


2503it [03:40, 11.48it/s]

Mean melanoma probability is: 3.341910319379344


5003it [07:18, 11.61it/s]

Mean melanoma probability is: 3.3468633899003395


7502it [10:57, 11.20it/s]

Mean melanoma probability is: 3.3445957832231996


10002it [14:37, 11.18it/s]

Mean melanoma probability is: 3.339804554924582


10982it [16:03, 11.40it/s]


Improvements:
* create multiple test images using augmentations: augment+scan+multiple models

In [18]:
tcols = list(set(tot_res.columns)-set(['tta_run', 'arch', 'image_name']))

In [19]:
tw = tot_res[tcols+['image_name']].groupby('image_name').apply(smart_mean).reset_index()
tunw = tot_res[tcols+['image_name']].groupby('image_name').mean().reset_index()
tlog2 = tot_res[tcols+['image_name']].groupby('image_name').apply(log_mean).reset_index()

In [20]:
tw.columns = ['image_name', 'target']
tunw.columns = ['image_name', 'target']
tlog2.columns = ['image_name', 'target']

In [21]:
if get_all_probas:
    threshold=0.9
    def get_max(x):    
        _amax = np.argmax(x)
        if x[_amax]>threshold:
            return x.index[_amax]
        else:
            return np.nan
    tw['target'] = tw[tcols].apply(get_max, axis=1)
    tunw['target'] = tunw[tcols].apply(get_max, axis=1)
    tlog2['target'] = tlog2[tcols].apply(get_max, axis=1)

In [22]:
tw.to_csv('weighted_total.csv', sep=',', index=False)
tunw.to_csv('unweighted_total.csv', sep=',', index=False)
tlog2.to_csv('log2weighted_total.csv', sep=',', index=False)

In [23]:
if submit_to_kaggle:    
    #!kaggle competitions submit -c siim-isic-melanoma-classification -f /home/bramiozo/DATA/ISIC2020/weighted_total.csv -m "Weighted total"
    #!kaggle competitions submit -c siim-isic-melanoma-classification -f /home/bramiozo/DATA/ISIC2020/unweighted_total.csv -m "Unweighted total"
    #!kaggle competitions submit -c siim-isic-melanoma-classification -f /home/bramiozo/DATA/ISIC2020/unweighted_total.csv -m "Log2 weighted total"
    !kaggle competitions submit -c siim-isic-melanoma-classification -f /media/bramiozo/DATA-FAST/kaggle/image_classification/MEDICAL/melanoma/weighted_total.csv -m "Weighted total"
    !kaggle competitions submit -c siim-isic-melanoma-classification -f /media/bramiozo/DATA-FAST/kaggle/image_classification/MEDICAL/melanoma/unweighted_total.csv -m "Unweighted total"
    !kaggle competitions submit -c siim-isic-melanoma-classification -f /media/bramiozo/DATA-FAST/kaggle/image_classification/MEDICAL/melanoma/unweighted_total.csv -m "Log2 weighted total"
    !poweroff