## RSNA 2023 Abdominal Trauma Detection

## 1. Setup

In [1]:
!pip install /kaggle/input/rsna-2023-abdominal-trauma-detection-dataset/packages/monai-1.2.0-202306081546-py3-none-any.whl --no-index --find-links /kaggle/input/rsna-2023-abdominal-trauma-detection-dataset/packages

Looking in links: /kaggle/input/rsna-2023-abdominal-trauma-detection-dataset/packages
Processing /kaggle/input/rsna-2023-abdominal-trauma-detection-dataset/packages/monai-1.2.0-202306081546-py3-none-any.whl
Installing collected packages: monai
Successfully installed monai-1.2.0


In [2]:
import os
import yaml
from pathlib import Path
from tqdm import tqdm
import numpy as np
import pandas as pd
import cv2
import pydicom
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import monai.transforms as T

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [3]:
competition_dataset = Path('/kaggle/input/rsna-2023-abdominal-trauma-detection')
external_dataset = Path('/kaggle/input/rsna-2023-abdominal-trauma-detection-dataset')

In [4]:
df = pd.read_csv(competition_dataset / 'sample_submission.csv')
print(f'Dataset Shape: {df.shape}')

Dataset Shape: (3, 14)


## 2. DICOM Utilities

In [5]:
def shift_bits(image, dicom, bits_allocated=None, bits_stored=None):

    """
    Shift bits using allocated and stored bits

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    dicom: pydicom.dataset.FileDataset
        DICOM dataset

    bits_allocated: int, str ('dataset') or None
        Number of bits allocated

    bits_stored: int, str ('dataset') or None
        Number of bits stored

    Returns
    -------
    image: numpy.ndarray of shape (height, width)
        Image array with shifted bits
    """

    if bits_allocated == 'dataset':
        try:
            bits_allocated = dicom.BitsAllocated
        except AttributeError:
            bits_allocated = None

    if bits_stored == 'dataset':
        try:
            bits_stored = dicom.BitsStored
        except AttributeError:
            bits_stored = None

    if bits_allocated is not None and bits_stored is not None:
        bit_shift = bits_allocated - bits_stored
    else:
        bit_shift = None

    if bit_shift is not None:
        dtype = image.dtype
        image = (image << bit_shift).astype(dtype) >> bit_shift

    return image


def rescale_pixel_values(image, dicom, rescale_slope=None, rescale_intercept=None):

    """
    Rescale pixel values using rescale slope and intercept as a linear function

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    dicom: pydicom.dataset.FileDataset
        DICOM dataset

    rescale_slope: int, str ('dataset') or None
        Rescale slope for rescaling pixel values

    rescale_intercept: int, str ('dataset') or None
        Rescale intercept for rescaling pixel values

    Returns
    -------
    image: numpy.ndarray of shape (height, width)
        Image array with rescaled pixel values
    """

    if rescale_slope == 'dataset':
        try:
            rescale_slope = dicom.RescaleSlope
        except AttributeError:
            rescale_slope = None

    if rescale_intercept == 'dataset':
        try:
            rescale_intercept = dicom.RescaleIntercept
        except AttributeError:
            rescale_intercept = None

    if rescale_slope is not None and rescale_intercept is not None:
        image = image.astype(np.float32)
        image = image * rescale_slope + rescale_intercept

    return image


def window_pixel_values(image, dicom, window_center=None, window_width=None):

    """
    Window pixel values using window center and width

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    dicom: pydicom.dataset.FileDataset
        DICOM dataset

    window_center: int, str ('dataset') or None
        Window center for windowing pixel values

    window_width: int, str ('dataset') or None
        Window width for windowing pixel values

    Returns
    -------
    image: numpy.ndarray of shape (height, width)
        Image array with windowed pixel values
    """

    if window_center == 'dataset':
        try:
            window_center = dicom.WindowCenter
        except AttributeError:
            window_center = None

    if window_width == 'dataset':
        try:
            window_width = dicom.WindowWidth
        except AttributeError:
            window_width = None

    if window_center is not None and window_width is not None:
        image_min = window_center - window_width // 2
        image_max = window_center + window_width // 2
        image = np.clip(image.copy(), image_min, image_max)

    return image


def invert_pixel_values(image, dicom, photometric_interpretation=None, max_pixel_value=255):

    """
    Invert pixel values using given max pixel value

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    dicom: pydicom.dataset.FileDataset
        DICOM dataset

    photometric_interpretation: str or None
        Interpretation of the pixel data

    max_pixel_value: int or None
        Max pixel value used for inverting pixel values

    Returns
    -------
    image: numpy.ndarray of shape (height, width)
        Image array with inverted pixel values
    """

    if photometric_interpretation == 'dataset':
        try:
            photometric_interpretation = dicom.PhotometricInterpretation
        except AttributeError:
            photometric_interpretation = None

    if photometric_interpretation == 'MONOCHROME1':
        image = max_pixel_value - image

    return image


def adjust_pixel_values(
        image, dicom,
        bits_allocated=None, bits_stored=None,
        rescale_slope=None, rescale_intercept=None,
        window_centers=None, window_widths=None,
        photometric_interpretation=None, max_pixel_value=255
):

    """
    Adjust pixel values by shifting bits, windowing, rescaling and inverting

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    dicom: pydicom.dataset.FileDataset
        DICOM dataset

    bits_allocated: int, str ('dataset') or None
        Number of bits allocated

    bits_stored: int, str ('dataset') or None
        Number of bits stored

    rescale_slope: int, str ('dataset') or None
        Rescale slope for rescaling pixel values

    rescale_intercept: int, str ('dataset') or None
        Rescale intercept for rescaling pixel values

    window_centers: list of int, str ('dataset') or None
        List of window center values for windowing pixel values

    window_widths: list of int, str ('dataset') or None
        List of window width values for windowing pixel values

    photometric_interpretation: str or None
        Interpretation of the pixel data

    max_pixel_value: int or None
        Max pixel value used for inverting pixel values

    Returns
    -------
    image: numpy.ndarray of shape (height, width)
        Image array with adjusted pixel values
    """

    image = shift_bits(image=image, dicom=dicom, bits_allocated=bits_allocated, bits_stored=bits_stored)
    image = rescale_pixel_values(image=image, dicom=dicom, rescale_slope=rescale_slope, rescale_intercept=rescale_intercept)

    image = np.stack([
        window_pixel_values(image=np.copy(image), dicom=dicom, window_center=window_center, window_width=window_width)
        for window_center, window_width in zip(window_centers, window_widths)
    ], axis=-1)

    image_min = image.min(axis=(0, 1))
    image_max = image.max(axis=(0, 1))
    image = (image - image_min) / (image_max - image_min + 1e-6)
    image = invert_pixel_values(image=image, dicom=dicom, photometric_interpretation=photometric_interpretation, max_pixel_value=max_pixel_value)
    image = (image * 255.0).astype(np.uint8)

    return image


def adjust_pixel_spacing(image, dicom, current_pixel_spacing=None, new_pixel_spacing=(1.0, 1.0)):

    """
    Adjust pixel values by shifting bits, windowing, rescaling and inverting

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    dicom: pydicom.dataset.FileDataset
        DICOM dataset

    current_pixel_spacing: tuple, str ('dataset') or None
        Physical distance in the patient between the center of each pixel

    new_pixel_spacing: tuple
        Desired pixel spacing after resize operation

    Returns
    -------
    image: numpy.ndarray of shape (height, width)
        Image array with adjusted pixel spacing
    """

    if current_pixel_spacing == 'dataset':
        try:
            current_pixel_spacing = dicom.PixelSpacing
        except AttributeError:
            current_pixel_spacing = None

    if current_pixel_spacing is not None:
        resize_factor = np.array(current_pixel_spacing) / np.array(new_pixel_spacing)
        rounded_shape = np.round(image.shape[:2] * resize_factor)
        resize_factor = rounded_shape / image.shape[:2]
        image = cv2.resize(image, dsize=None, fx=resize_factor[1], fy=resize_factor[0], interpolation=cv2.INTER_NEAREST)

    return image


def read_image(dicom_file_path, output_directory, pixel_spacing=None):

    """
    Read DICOM file and process the image

    Parameters
    ----------
    dicom_file_path: str
        Path of the DICOM file

    output_directory: pathlib.Path
        Path of the directory image will be written to

    pixel_spacing: tuple
        Image pixel spacing after normalization
    """

    dicom = pydicom.dcmread(str(dicom_file_path))
    image = dicom.pixel_array
    image = adjust_pixel_values(
        image=image, dicom=dicom,
        bits_allocated='dataset', bits_stored='dataset',
        rescale_slope='dataset', rescale_intercept='dataset',
        window_centers=['dataset'], window_widths=['dataset'],
        photometric_interpretation='dataset', max_pixel_value=1
    )

    if pixel_spacing is not None:
        image = dicom_utilities.adjust_pixel_spacing(
            image=image,
            dicom=dicom,
            current_pixel_spacing='dataset',
            new_pixel_spacing=pixel_spacing
        )

    return image


def get_largest_contour(image):

    """
    Get the largest contour from the image

    Parameters
    ----------
    image: numpy.ndarray of shape (height, width)
        Image array

    Returns
    -------
    bounding_box: list of shape (4)
        Bounding box with x1, y1, x2, y2 values
    """

    thresholded_image = cv2.threshold(image, 20, 255, cv2.THRESH_BINARY)[1]
    contours, _ = cv2.findContours(thresholded_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    if len(contours) == 0:
        x1 = 0
        x2 = image.shape[1] + 1
        y1 = 0
        y2 = image.shape[0] + 1
    else:
        contour = max(contours, key=cv2.contourArea)
        mask = np.zeros(image.shape, np.uint8)
        cv2.drawContours(mask, [contour], -1, 255, cv2.FILLED)

        y1, y2 = np.min(contour[:, :, 1]), np.max(contour[:, :, 1])
        x1, x2 = np.min(contour[:, :, 0]), np.max(contour[:, :, 0])

        x1 = int(0.99 * x1)
        x2 = int(1.01 * x2)
        y1 = int(0.99 * y1)
        y2 = int(1.01 * y2)

    bounding_box = [x1, y1, x2, y2]

    return bounding_box


## 3. Models

In [6]:
class ClassificationHead(nn.Module):

    def __init__(self, input_dimensions):

        super(ClassificationHead, self).__init__()

        self.bowel_head = nn.Linear(input_dimensions, 1, bias=True)
        self.extravasation_head = nn.Linear(input_dimensions, 1, bias=True)
        self.kidney_head = nn.Linear(input_dimensions, 3, bias=True)
        self.liver_head = nn.Linear(input_dimensions, 3, bias=True)
        self.spleen_head = nn.Linear(input_dimensions, 3, bias=True)

    def forward(self, x):

        bowel_output = self.bowel_head(x)
        extravasation_output = self.extravasation_head(x)
        kidney_output = self.kidney_head(x)
        liver_output = self.liver_head(x)
        spleen_output = self.spleen_head(x)

        return bowel_output, extravasation_output, kidney_output, liver_output, spleen_output


In [7]:
class GeM(nn.Module):

    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)

    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1. / p)

    def __repr__(self):
        return self.__class__.__name__ + \
            '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
            ', ' + 'eps=' + str(self.eps) + ')'


class Attention(nn.Module):

    def __init__(self, sequence_length, dimensions, bias=True):

        super(Attention, self).__init__()

        weight = torch.zeros(dimensions, 1)
        nn.init.xavier_uniform_(weight)
        self.weight = nn.Parameter(weight)
        self.bias = bias
        if bias:
            self.b = nn.Parameter(torch.zeros(sequence_length))

    def forward(self, x):

        input_batch_size, input_sequence_length, input_dimensions = x.shape

        eij = torch.mm(
            x.contiguous().view(-1, input_dimensions),
            self.weight
        ).view(-1, input_sequence_length)

        if self.bias:
            eij = eij + self.b

        eij = torch.tanh(eij)
        a = torch.exp(eij)
        a = a / torch.sum(a, 1, keepdim=True) + 1e-10
        weighted_input = x * torch.unsqueeze(a, -1)
        output = torch.sum(weighted_input, 1)

        return output


In [8]:
class MILClassificationModel(nn.Module):

    def __init__(self, model_name, pretrained, backbone_args, mil_pooling_type, feature_pooling_type, dropout_rate, freeze_parameters):

        super(MILClassificationModel, self).__init__()

        self.backbone = timm.create_model(
            model_name=model_name,
            pretrained=pretrained,
            **backbone_args
        )

        if freeze_parameters:
            for parameter in self.backbone.parameters():
                parameter.requires_grad = False

        self.mil_pooling_type = mil_pooling_type
        self.feature_pooling_type = feature_pooling_type
        input_features = self.backbone.get_classifier().in_features
        self.backbone.classifier = nn.Identity()

        if self.feature_pooling_type == 'gem':
            self.pooling = GeM()
        elif self.feature_pooling_type == 'attention':
            self.pooling = nn.Sequential(
                nn.LayerNorm(normalized_shape=input_features),
                Attention(sequence_length=49, dimensions=input_features)
            )
        else:
            self.pooling = nn.Identity()

        self.dropout = nn.Dropout(dropout_rate) if dropout_rate > 0 else nn.Identity()
        self.head = ClassificationHead(input_dimensions=input_features)

    def forward(self, x):

        input_batch_size, input_channel, input_depth, input_height, input_width = x.shape
        x = x.view(input_batch_size * input_depth, input_channel, input_height, input_width)
        x = self.backbone.forward_features(x)
        feature_batch_size, feature_channel, feature_height, feature_width = x.shape

        if self.mil_pooling_type == 'avg':
            x = x.contiguous().view(input_batch_size, input_depth, feature_channel, feature_height, feature_width)
            x = torch.mean(x, dim=1)
        elif self.mil_pooling_type == 'max':
            x = x.contiguous().view(input_batch_size, input_depth, feature_channel, feature_height, feature_width)
            x = torch.max(x, dim=1)[0]
        elif self.mil_pooling_type == 'concat':
            x = x.contiguous().view(input_batch_size, input_depth * feature_channel, feature_height, feature_width)
        else:
            raise ValueError(f'Invalid MIL pooling type {self.mil_pooling_type}')

        if self.feature_pooling_type == 'avg':
            x = F.adaptive_avg_pool2d(x, output_size=(1, 1)).view(x.size(0), -1)
        elif self.feature_pooling_type == 'max':
            x = F.adaptive_max_pool2d(x, output_size=(1, 1)).view(x.size(0), -1)
        elif self.feature_pooling_type == 'concat':
            x = torch.cat([
                F.adaptive_avg_pool2d(x, output_size=(1, 1)).view(x.size(0), -1),
                F.adaptive_max_pool2d(x, output_size=(1, 1)).view(x.size(0), -1)
            ], dim=-1)
        elif self.feature_pooling_type == 'gem':
            x = self.pooling(x).view(x.size(0), -1)
        elif self.feature_pooling_type == 'attention':
            input_batch_size, feature_channel = x.shape[:2]
            x = x.contiguous().view(input_batch_size, feature_channel, -1).permute(0, 2, 1)
            x = self.pooling(x)
        else:
            raise ValueError(f'Invalid feature pooling type {self.feature_pooling_type}')

        x = self.dropout(x)
        bowel_output, extravasation_output, kidney_output, liver_output, spleen_output = self.head(x)

        return bowel_output, extravasation_output, kidney_output, liver_output, spleen_output


In [9]:
class RNNClassificationModel(nn.Module):

    def __init__(self, model_name, pretrained, backbone_args, feature_pooling_type, rnn_class, rnn_args, dropout_rate, freeze_parameters):

        super(RNNClassificationModel, self).__init__()

        self.backbone = timm.create_model(
            model_name=model_name,
            pretrained=pretrained,
            **backbone_args
        )

        if freeze_parameters:
            for parameter in self.backbone.parameters():
                parameter.requires_grad = False

        self.feature_pooling_type = feature_pooling_type
        input_features = self.backbone.get_classifier().in_features
        self.backbone.classifier = nn.Identity()

        if self.feature_pooling_type == 'gem':
            self.pooling = GeM()
        else:
            self.pooling = nn.Identity()

        self.rnn = getattr(nn, rnn_class)(input_size=input_features, **rnn_args)

        self.dropout = nn.Dropout(dropout_rate) if dropout_rate > 0 else nn.Identity()
        input_dimensions = rnn_args['hidden_size'] * (int(rnn_args['bidirectional']) + 1)
        self.head = ClassificationHead(input_dimensions=input_dimensions)

    def forward(self, x):

        input_batch_size, input_channel, input_depth, input_height, input_width = x.shape
        x = x.view(input_batch_size * input_depth, input_channel, input_height, input_width)
        x = self.backbone.forward_features(x)

        feature_batch_size, feature_channel, feature_height, feature_width = x.shape

        if self.feature_pooling_type == 'avg':
            x = F.adaptive_avg_pool2d(x, output_size=(1, 1)).view(x.size(0), -1)
        elif self.feature_pooling_type == 'max':
            x = F.adaptive_max_pool2d(x, output_size=(1, 1)).view(x.size(0), -1)
        elif self.feature_pooling_type == 'concat':
            x = torch.cat([
                F.adaptive_avg_pool2d(x, output_size=(1, 1)).view(x.size(0), -1),
                F.adaptive_max_pool2d(x, output_size=(1, 1)).view(x.size(0), -1)
            ], dim=-1)
        elif self.feature_pooling_type == 'gem':
            x = self.pooling(x).view(x.size(0), -1)
        else:
            raise ValueError(f'Invalid feature pooling type {self.feature_pooling_type}')

        x = x.contiguous().view(input_batch_size, input_depth, feature_channel)
        x, _ = self.rnn(x)
        x = torch.max(x, dim=1)[0]
        x = self.dropout(x)
        bowel_output, extravasation_output, kidney_output, liver_output, spleen_output = self.head(x)

        return bowel_output, extravasation_output, kidney_output, liver_output, spleen_output


In [10]:
def load_model(model_directory, model_file_names, device):
    
    """
    Load model and pretrained weights from the given model directory

    Parameters
    ----------
    model_directory: str
        Path of the model directory

    model_file_names: str
        Name of the model weights files

    device: torch.device
        Location of the model

    Returns
    -------
    model: torch.nn.Module
        Model with weights loaded
    """

    config = yaml.load(open(model_directory / 'config.yaml', 'r'), Loader=yaml.FullLoader)
    config['model']['model_args']['pretrained'] = False
        
    models = {}

    for model_file_name in tqdm(model_file_names):
        model = eval(config['model']['model_class'])(**config['model']['model_args'])
        model.load_state_dict(torch.load(model_directory / model_file_name))
        model.to(device)
        model.eval()
        models[model_file_name] = model

    return models, config


In [11]:
mil_efficientnetb0_models, mil_efficientnetb0_config = load_model(
    model_directory=external_dataset / 'mil_efficientnetb0_3d_1w_contour_cropped_96x256x256',
    model_file_names=[
        'model_fold1_best.pt',
        'model_fold2_best.pt',
        'model_fold3_best.pt',
        'model_fold4_best.pt',
        'model_fold5_best.pt',
    ],
    device=torch.device('cuda')
)

mil_densenet121_models, mil_densenet121_config = load_model(
    model_directory=external_dataset / 'mil_densenet121_3d_1w_contour_cropped_96x256x256',
    model_file_names=[
        'model_fold1_best.pt',
        'model_fold2_best.pt',
        'model_fold3_best.pt',
        'model_fold4_best.pt',
        'model_fold5_best.pt',
    ],
    device=torch.device('cuda')
)

lstm_efficientnetb0_models, lstm_efficientnetb0_config = load_model(
    model_directory=external_dataset / 'lstm_efficientnetb0_3d_1w_contour_cropped_96x256x256',
    model_file_names=[
        'model_fold1_best.pt',
        'model_fold2_best.pt',
        'model_fold3_best.pt',
        'model_fold4_best.pt',
        'model_fold5_best.pt',
    ],
    device=torch.device('cuda')
)

lstm_efficientnetv2t_models, lstm_efficientnetv2t_config = load_model(
    model_directory=external_dataset / 'lstm_efficientnetv2t_3d_1w_contour_cropped_96x256x256',
    model_file_names=[
        'model_fold1_best.pt',
        'model_fold2_best.pt',
        'model_fold3_best.pt',
        'model_fold4_best.pt',
        'model_fold5_best.pt',
    ],
    device=torch.device('cuda')
)

100%|██████████| 5/5 [00:07<00:00,  1.44s/it]
100%|██████████| 5/5 [00:02<00:00,  1.84it/s]
100%|██████████| 5/5 [00:04<00:00,  1.13it/s]
100%|██████████| 5/5 [00:04<00:00,  1.07it/s]


## 4. Transforms

In [12]:
def get_3d_classification_transforms(**transform_parameters):

    """
    Get transforms for classification dataset

    Parameters
    ----------
    transform_parameters: dict
        Dictionary of transform parameters

    Returns
    -------
    transforms: dict
        Transforms for training, validation and test sets
    """

    training_transforms = T.Compose([
        T.EnsureChannelFirst(channel_dim=0),
        T.RandFlip(spatial_axis=0, prob=transform_parameters['random_z_flip_probability']),
        T.RandFlip(spatial_axis=1, prob=transform_parameters['random_x_flip_probability']),
        T.RandFlip(spatial_axis=2, prob=transform_parameters['random_y_flip_probability']),
        T.RandRotate90(spatial_axes=(1, 2), max_k=3, prob=transform_parameters['random_axial_rotate_90_probability']),
        T.RandRotate(
            range_x=transform_parameters['random_rotate_range_x'],
            range_y=transform_parameters['random_rotate_range_y'],
            range_z=transform_parameters['random_rotate_range_z'],
            prob=transform_parameters['random_rotate_probability']
        ),
        T.OneOf([
            T.RandHistogramShift(num_control_points=transform_parameters['random_histogram_shift_num_control_points'], prob=transform_parameters['random_histogram_shift_probability']),
            T.RandAdjustContrast(gamma=transform_parameters['random_contrast_gamma'], prob=transform_parameters['random_contrast_probability'])
        ], weights=(0.5, 0.5)),
        T.RandSpatialCrop(roi_size=transform_parameters['crop_roi_size'], max_roi_size=None, random_center=True, random_size=False),
        T.RandCoarseDropout(
            holes=transform_parameters['cutout_holes'],
            spatial_size=transform_parameters['cutout_spatial_size'],
            dropout_holes=True,
            fill_value=0,
            max_holes=transform_parameters['cutout_max_holes'],
            max_spatial_size=transform_parameters['max_spatial_size'],
            prob=transform_parameters['cutout_probability']
        ),
        T.ToTensor(dtype=torch.float32, track_meta=False)
    ])

    inference_transforms = T.Compose([
        T.Resize(spatial_size=(96, 256, 256)),
        T.CenterSpatialCrop(roi_size=(-1, 224, 224))
    ])

    classification_transforms = {'training': training_transforms, 'inference': inference_transforms}
    return classification_transforms


## 5. Inference

In [13]:
device = torch.device('cuda')
amp = True
inference_transforms = T.Compose([
    T.Resize(spatial_size=(96, 256, 256)),
    T.CenterSpatialCrop(roi_size=(-1, 224, 224))
])
tta = True
tta_flip_dimensions = [(2, 3, 4), (2, 3), (2, 4), (3, 4)]

In [14]:
bowel_predictions = []
extravasation_predictions = []
kidney_predictions = []
liver_predictions = []
spleen_predictions = []
patient_ids_predictions = []
scan_ids_predictions = []

dicom_dataset_directory = competition_dataset / 'test_images'
patient_ids = sorted(os.listdir(dicom_dataset_directory), key=lambda filename: int(filename))

for patient_id in tqdm(patient_ids):

    patient_directory = dicom_dataset_directory / str(patient_id)
    patient_scans = sorted(os.listdir(patient_directory), key=lambda filename: int(filename))

    for scan_id in patient_scans:

        scan_directory = patient_directory / str(scan_id)
        file_names = sorted(os.listdir(scan_directory), key=lambda x: int(str(x).split('.')[0]))
                        
        if patient_id == '3124' and scan_id == '5842':
            # Remove corrupt DICOM file
            file_names.remove('514.dcm')
            
        z_positions = []
        patient_positions = []
        scan = []
        
        for file_idx, file_name in enumerate(file_names, start=1):
            
            dicom = pydicom.dcmread(str(scan_directory / file_name))

            try:
                patient_position = dicom.PatientPosition
            except AttributeError:
                patient_position = 'FFS'
            
            patient_positions.append(patient_position)
            
            try:
                z_position = float(dicom.ImagePositionPatient[-1])
            except AttributeError:
                z_position = file_idx * -1
                
            z_positions.append(z_position)
            
            image = dicom.pixel_array
            image = adjust_pixel_values(
                image=image, dicom=dicom,
                bits_allocated='dataset', bits_stored='dataset',
                rescale_slope='dataset', rescale_intercept='dataset',
                window_centers=['dataset'], window_widths=['dataset'],
                photometric_interpretation='dataset', max_pixel_value=1
            )
            image = np.squeeze(image, axis=-1)
            scan.append(image)
            
        scan = np.array(scan)
            
        # Sort CT scan slices by head to feet
        sorting_idx_z = np.argsort(z_positions)[::-1]
        scan = scan[sorting_idx_z]
        
        patient_position = pd.Series(patient_positions).value_counts().index[0]
        if patient_position is not None:
            if patient_position == 'HFS':
                # Flip x-axis if patient position is head first
                scan = np.flip(scan, axis=2)

        # Find partial slices by calculating sum of all zero vertical lines
        if scan.shape[0] != 1:
            scan_all_zero_vertical_line_transitions = np.diff(np.all(scan == 0, axis=1).sum(axis=1))
            # Heuristically select high and low transitions on z-axis and drop them
            slices_with_all_zero_vertical_lines = (scan_all_zero_vertical_line_transitions > 5) | (scan_all_zero_vertical_line_transitions < -5)
            slices_with_all_zero_vertical_lines = np.append(slices_with_all_zero_vertical_lines, slices_with_all_zero_vertical_lines[-1])
            scan = scan[~slices_with_all_zero_vertical_lines]
            del scan_all_zero_vertical_line_transitions, slices_with_all_zero_vertical_lines
        
        # Crop the largest contour
        largest_contour_bounding_boxes = np.array([get_largest_contour(image) for image in scan])
        largest_contour_bounding_box = [
            int(largest_contour_bounding_boxes[:, 0].min()),
            int(largest_contour_bounding_boxes[:, 1].min()),
            int(largest_contour_bounding_boxes[:, 2].max()),
            int(largest_contour_bounding_boxes[:, 3].max()),
        ]
        scan = scan[
            :,
            largest_contour_bounding_box[1]:largest_contour_bounding_box[3] + 1,
            largest_contour_bounding_box[0]:largest_contour_bounding_box[2] + 1,
        ]
        
        # Crop non-zero slices along xz, yz and xy planes
        mmin = np.array((scan > 0).nonzero()).min(axis=1)
        mmax = np.array((scan > 0).nonzero()).max(axis=1)
        scan = scan[
            mmin[0]:mmax[0] + 1,
            mmin[1]:mmax[1] + 1,
            mmin[2]:mmax[2] + 1,
        ]
        
        inputs = inference_transforms(torch.from_numpy(np.expand_dims(scan.copy(), axis=0)))
        inputs = torch.unsqueeze(inputs, dim=0)
        inputs /= 255.
        inputs = inputs.to(device)
        
        n_models = 4
        bowel_batch_predictions = torch.zeros(n_models, inputs.shape[0], 1)
        extravasation_batch_predictions = torch.zeros(n_models, inputs.shape[0], 1)
        kidney_batch_predictions = torch.zeros(n_models, inputs.shape[0], 3)
        liver_batch_predictions = torch.zeros(n_models, inputs.shape[0], 3)
        spleen_batch_predictions = torch.zeros(n_models, inputs.shape[0], 3)
        
        for model_idx, models in enumerate([mil_efficientnetb0_models, mil_densenet121_models, lstm_efficientnetb0_models, lstm_efficientnetv2t_models]):
            for model in models.values():
                with torch.no_grad():
                    if amp:
                        with torch.cuda.amp.autocast():
                            bowel_outputs, extravasation_outputs, kidney_outputs, liver_outputs, spleen_outputs = model(inputs.half())
                    else:
                        bowel_outputs, extravasation_outputs, kidney_outputs, liver_outputs, spleen_outputs = model(inputs)

                bowel_outputs = bowel_outputs.cpu()
                extravasation_outputs = extravasation_outputs.cpu()
                kidney_outputs = kidney_outputs.cpu()
                liver_outputs = liver_outputs.cpu()
                spleen_outputs = spleen_outputs.cpu()

                if tta:

                    tta_bowel_outputs = []
                    tta_extravasation_outputs = []
                    tta_kidney_outputs = []
                    tta_liver_outputs = []
                    tta_spleen_outputs = []

                    for dimensions in tta_flip_dimensions:

                        augmented_inputs = torch.flip(inputs, dims=dimensions).to(device)

                        with torch.no_grad():
                            augmented_bowel_outputs, augmented_extravasation_outputs, augmented_kidney_outputs, augmented_liver_outputs, augmented_spleen_outputs = model(augmented_inputs)

                        tta_bowel_outputs.append(augmented_bowel_outputs.cpu())
                        tta_extravasation_outputs.append(augmented_extravasation_outputs.cpu())
                        tta_kidney_outputs.append(augmented_kidney_outputs.cpu())
                        tta_liver_outputs.append(augmented_liver_outputs.cpu())
                        tta_spleen_outputs.append(augmented_spleen_outputs.cpu())

                    bowel_outputs = torch.stack(([bowel_outputs] + tta_bowel_outputs), dim=-1)
                    extravasation_outputs = torch.stack(([extravasation_outputs] + tta_extravasation_outputs), dim=-1)
                    kidney_outputs = torch.stack(([kidney_outputs] + tta_kidney_outputs), dim=-1)
                    liver_outputs = torch.stack(([liver_outputs] + tta_liver_outputs), dim=-1)
                    spleen_outputs = torch.stack(([spleen_outputs] + tta_spleen_outputs), dim=-1)

                    bowel_outputs = torch.mean(bowel_outputs, dim=-1)
                    extravasation_outputs = torch.mean(extravasation_outputs, dim=-1)
                    kidney_outputs = torch.mean(kidney_outputs, dim=-1)
                    liver_outputs = torch.mean(liver_outputs, dim=-1)
                    spleen_outputs = torch.mean(spleen_outputs, dim=-1)
                
                bowel_batch_predictions[model_idx] += bowel_outputs / len(models)
                extravasation_batch_predictions[model_idx] += extravasation_outputs / len(models)
                kidney_batch_predictions[model_idx] += kidney_outputs / len(models)
                liver_batch_predictions[model_idx] += liver_outputs / len(models)
                spleen_batch_predictions[model_idx] += spleen_outputs / len(models)
                
        bowel_predictions += [bowel_batch_predictions]
        extravasation_predictions += [extravasation_batch_predictions]
        kidney_predictions += [kidney_batch_predictions]
        liver_predictions += [liver_batch_predictions]
        spleen_predictions += [spleen_batch_predictions]
        
        patient_ids_predictions.append(patient_id)
        scan_ids_predictions.append(scan_id)

bowel_predictions = torch.sigmoid(torch.stack(bowel_predictions, dim=0)).numpy()
extravasation_predictions = torch.sigmoid(torch.stack(extravasation_predictions, dim=0)).numpy()
kidney_predictions = torch.softmax(torch.stack(kidney_predictions, dim=0), dim=-1).numpy()
liver_predictions = torch.softmax(torch.stack(liver_predictions, dim=0), dim=-1).numpy()
spleen_predictions = torch.softmax(torch.stack(spleen_predictions, dim=0), dim=-1).numpy()
patient_ids_predictions = np.array(patient_ids_predictions).reshape(-1, 1)
scan_ids_predictions = np.array(scan_ids_predictions).reshape(-1, 1)

100%|██████████| 3/3 [00:32<00:00, 10.94s/it]


In [15]:
df_mil_efficientnetb0_predictions = pd.DataFrame(np.hstack([
    patient_ids_predictions,
    scan_ids_predictions,
    bowel_predictions[:, 0, :, :].reshape(-1, 1),
    extravasation_predictions[:, 0, :, :].reshape(-1, 1),
    kidney_predictions[:, 0, :, :].reshape(-1, 3),
    liver_predictions[:, 0, :, :].reshape(-1, 3),
    spleen_predictions[:, 0, :, :].reshape(-1, 3)
]), columns=[
    'patient_id', 'scan_id',
    'bowel_injury', 'extravasation_injury',
    'kidney_healthy', 'kidney_low', 'kidney_high',
    'liver_healthy', 'liver_low', 'liver_high',
    'spleen_healthy', 'spleen_low', 'spleen_high',
])

df_mil_efficientnetb0_predictions[df_mil_efficientnetb0_predictions.columns[:2]] = df_mil_efficientnetb0_predictions[df_mil_efficientnetb0_predictions.columns[:2]].astype(np.int64)
df_mil_efficientnetb0_predictions[df_mil_efficientnetb0_predictions.columns[2:]] = df_mil_efficientnetb0_predictions[df_mil_efficientnetb0_predictions.columns[2:]].astype(np.float32)

df_mil_efficientnetb0_predictions

Unnamed: 0,patient_id,scan_id,bowel_injury,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,62825,0.164637,0.104537,0.819386,0.09168,0.088935,0.837702,0.159296,0.003002,0.537523,0.399659,0.062818
1,50046,24574,0.204688,0.317012,0.873833,0.072329,0.053838,0.928078,0.065482,0.00644,0.719728,0.181671,0.098601
2,63706,39279,0.069994,0.092992,0.746002,0.154351,0.099647,0.658108,0.335896,0.005996,0.785513,0.199809,0.014678


In [16]:
df_mil_densenet121_predictions = pd.DataFrame(np.hstack([
    patient_ids_predictions,
    scan_ids_predictions,
    bowel_predictions[:, 1, :, :].reshape(-1, 1),
    extravasation_predictions[:, 1, :, :].reshape(-1, 1),
    kidney_predictions[:, 1, :, :].reshape(-1, 3),
    liver_predictions[:, 1, :, :].reshape(-1, 3),
    spleen_predictions[:, 1, :, :].reshape(-1, 3)
]), columns=[
    'patient_id', 'scan_id',
    'bowel_injury', 'extravasation_injury',
    'kidney_healthy', 'kidney_low', 'kidney_high',
    'liver_healthy', 'liver_low', 'liver_high',
    'spleen_healthy', 'spleen_low', 'spleen_high',
])

df_mil_densenet121_predictions[df_mil_densenet121_predictions.columns[:2]] = df_mil_densenet121_predictions[df_mil_densenet121_predictions.columns[:2]].astype(np.int64)
df_mil_densenet121_predictions[df_mil_densenet121_predictions.columns[2:]] = df_mil_densenet121_predictions[df_mil_densenet121_predictions.columns[2:]].astype(np.float32)

df_mil_densenet121_predictions

Unnamed: 0,patient_id,scan_id,bowel_injury,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,62825,0.120605,0.244441,0.736048,0.154959,0.108994,0.309662,0.421304,0.269034,0.515987,0.335879,0.148134
1,50046,24574,0.264846,0.4523,0.579204,0.258205,0.162591,0.706886,0.201292,0.091821,0.420232,0.302215,0.277553
2,63706,39279,0.274485,0.273311,0.563648,0.265263,0.171089,0.647463,0.323459,0.029078,0.557635,0.340355,0.10201


In [17]:
df_lstm_efficientnetb0_predictions = pd.DataFrame(np.hstack([
    patient_ids_predictions,
    scan_ids_predictions,
    bowel_predictions[:, 2, :, :].reshape(-1, 1),
    extravasation_predictions[:, 2, :, :].reshape(-1, 1),
    kidney_predictions[:, 2, :, :].reshape(-1, 3),
    liver_predictions[:, 2, :, :].reshape(-1, 3),
    spleen_predictions[:, 2, :, :].reshape(-1, 3)
]), columns=[
    'patient_id', 'scan_id',
    'bowel_injury', 'extravasation_injury',
    'kidney_healthy', 'kidney_low', 'kidney_high',
    'liver_healthy', 'liver_low', 'liver_high',
    'spleen_healthy', 'spleen_low', 'spleen_high',
])

df_lstm_efficientnetb0_predictions[df_lstm_efficientnetb0_predictions.columns[:2]] = df_lstm_efficientnetb0_predictions[df_lstm_efficientnetb0_predictions.columns[:2]].astype(np.int64)
df_lstm_efficientnetb0_predictions[df_lstm_efficientnetb0_predictions.columns[2:]] = df_lstm_efficientnetb0_predictions[df_lstm_efficientnetb0_predictions.columns[2:]].astype(np.float32)

df_lstm_efficientnetb0_predictions

Unnamed: 0,patient_id,scan_id,bowel_injury,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,62825,0.15375,0.146527,0.805675,0.124133,0.070192,0.850459,0.134133,0.015409,0.616913,0.278651,0.104436
1,50046,24574,0.247409,0.264111,0.799575,0.146055,0.05437,0.863379,0.113796,0.022825,0.677229,0.218331,0.10444
2,63706,39279,0.156947,0.196334,0.753282,0.181841,0.064877,0.816679,0.163509,0.019812,0.805249,0.162147,0.032604


In [18]:
df_lstm_efficientnetv2t_predictions = pd.DataFrame(np.hstack([
    patient_ids_predictions,
    scan_ids_predictions,
    bowel_predictions[:, 3, :, :].reshape(-1, 1),
    extravasation_predictions[:, 3, :, :].reshape(-1, 1),
    kidney_predictions[:, 3, :, :].reshape(-1, 3),
    liver_predictions[:, 3, :, :].reshape(-1, 3),
    spleen_predictions[:, 3, :, :].reshape(-1, 3)
]), columns=[
    'patient_id', 'scan_id',
    'bowel_injury', 'extravasation_injury',
    'kidney_healthy', 'kidney_low', 'kidney_high',
    'liver_healthy', 'liver_low', 'liver_high',
    'spleen_healthy', 'spleen_low', 'spleen_high',
])

df_lstm_efficientnetv2t_predictions[df_lstm_efficientnetv2t_predictions.columns[:2]] = df_lstm_efficientnetv2t_predictions[df_lstm_efficientnetv2t_predictions.columns[:2]].astype(np.int64)
df_lstm_efficientnetv2t_predictions[df_lstm_efficientnetv2t_predictions.columns[2:]] = df_lstm_efficientnetv2t_predictions[df_lstm_efficientnetv2t_predictions.columns[2:]].astype(np.float32)

df_lstm_efficientnetv2t_predictions

Unnamed: 0,patient_id,scan_id,bowel_injury,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,62825,0.298447,0.342589,0.716321,0.171425,0.112254,0.563836,0.334755,0.101409,0.642525,0.235669,0.121805
1,50046,24574,0.129184,0.192317,0.860961,0.095597,0.043441,0.870007,0.092623,0.03737,0.821583,0.122738,0.055679
2,63706,39279,0.34644,0.369482,0.713816,0.191068,0.095115,0.574562,0.323844,0.101594,0.590749,0.272673,0.136577


## 6. Post processing

In [19]:
df_predictions = pd.DataFrame(columns=[
    'patient_id', 'scan_id',
    'bowel_injury', 'extravasation_injury',
    'kidney_healthy', 'kidney_low', 'kidney_high',
    'liver_healthy', 'liver_low', 'liver_high',
    'spleen_healthy', 'spleen_low', 'spleen_high',
])

df_predictions['patient_id'] = patient_ids_predictions.reshape(-1).astype(int)
df_predictions['scan_id'] = scan_ids_predictions.reshape(-1).astype(int)

mil_efficientnetb0_bowel_weight = 0.45
mil_densenet121_bowel_weight = 0.25
lstm_efficientnetb0_bowel_weight = 0.15
lstm_efficientnetv2t_bowel_weight = 0.15

df_predictions['bowel_injury'] = (df_mil_efficientnetb0_predictions['bowel_injury'] * mil_efficientnetb0_bowel_weight) +\
                                 (df_mil_densenet121_predictions['bowel_injury'] * mil_densenet121_bowel_weight) +\
                                 (df_lstm_efficientnetb0_predictions['bowel_injury'] * lstm_efficientnetb0_bowel_weight) +\
                                 (df_lstm_efficientnetv2t_predictions['bowel_injury'] * lstm_efficientnetv2t_bowel_weight)

mil_efficientnetb0_extravasation_weight = 0.3
mil_densenet121_extravasation_weight = 0.3
lstm_efficientnetb0_extravasation_weight = 0.3
lstm_efficientnetv2t_extravasation_weight = 0.1

df_predictions['extravasation_injury'] = (df_mil_efficientnetb0_predictions['extravasation_injury'] * mil_efficientnetb0_extravasation_weight) +\
                                         (df_mil_densenet121_predictions['extravasation_injury'] * mil_densenet121_extravasation_weight) +\
                                         (df_lstm_efficientnetb0_predictions['extravasation_injury'] * lstm_efficientnetb0_extravasation_weight) +\
                                         (df_lstm_efficientnetv2t_predictions['extravasation_injury'] * lstm_efficientnetv2t_extravasation_weight)

mil_efficientnetb0_kidney_weight = 0.25
mil_densenet121_kidney_weight = 0.25
lstm_efficientnetb0_kidney_weight = 0.25
lstm_efficientnetv2t_kidney_weight = 0.25

df_predictions['kidney_healthy'] = (df_mil_efficientnetb0_predictions['kidney_healthy'] * mil_efficientnetb0_kidney_weight) +\
                                   (df_mil_densenet121_predictions['kidney_healthy'] * mil_densenet121_kidney_weight) +\
                                   (df_lstm_efficientnetb0_predictions['kidney_healthy'] * lstm_efficientnetb0_kidney_weight) +\
                                   (df_lstm_efficientnetv2t_predictions['kidney_healthy'] * lstm_efficientnetv2t_kidney_weight)

df_predictions['kidney_low'] = (df_mil_efficientnetb0_predictions['kidney_low'] * mil_efficientnetb0_kidney_weight) +\
                               (df_mil_densenet121_predictions['kidney_low'] * mil_densenet121_kidney_weight) +\
                               (df_lstm_efficientnetb0_predictions['kidney_low'] * lstm_efficientnetb0_kidney_weight) +\
                               (df_lstm_efficientnetv2t_predictions['kidney_low'] * lstm_efficientnetv2t_kidney_weight)

df_predictions['kidney_high'] = (df_mil_efficientnetb0_predictions['kidney_high'] * mil_efficientnetb0_kidney_weight) +\
                                (df_mil_densenet121_predictions['kidney_high'] * mil_densenet121_kidney_weight) +\
                                (df_lstm_efficientnetb0_predictions['kidney_high'] * lstm_efficientnetb0_kidney_weight) +\
                                (df_lstm_efficientnetv2t_predictions['kidney_high'] * lstm_efficientnetv2t_kidney_weight)

mil_efficientnetb0_liver_weight = 0.25
mil_densenet121_liver_weight = 0.25
lstm_efficientnetb0_liver_weight = 0.25
lstm_efficientnetv2t_liver_weight = 0.25

df_predictions['liver_healthy'] = (df_mil_efficientnetb0_predictions['liver_healthy'] * mil_efficientnetb0_liver_weight) +\
                                  (df_mil_densenet121_predictions['liver_healthy'] * mil_densenet121_liver_weight) +\
                                  (df_lstm_efficientnetb0_predictions['liver_healthy'] * lstm_efficientnetb0_liver_weight) +\
                                  (df_lstm_efficientnetv2t_predictions['liver_healthy'] * lstm_efficientnetv2t_liver_weight)

df_predictions['liver_low'] = (df_mil_efficientnetb0_predictions['liver_low'] * mil_efficientnetb0_liver_weight) +\
                              (df_mil_densenet121_predictions['liver_low'] * mil_densenet121_liver_weight) +\
                              (df_lstm_efficientnetb0_predictions['liver_low'] * lstm_efficientnetb0_liver_weight) +\
                              (df_lstm_efficientnetv2t_predictions['liver_low'] * lstm_efficientnetv2t_liver_weight)

df_predictions['liver_high'] = (df_mil_efficientnetb0_predictions['liver_high'] * mil_efficientnetb0_liver_weight) +\
                               (df_mil_densenet121_predictions['liver_high'] * mil_densenet121_liver_weight) +\
                               (df_lstm_efficientnetb0_predictions['liver_high'] * lstm_efficientnetb0_liver_weight) +\
                               (df_lstm_efficientnetv2t_predictions['liver_high'] * lstm_efficientnetv2t_liver_weight)

mil_efficientnetb0_spleen_weight = 0.25
mil_densenet121_spleen_weight = 0.25
lstm_efficientnetb0_spleen_weight = 0.25
lstm_efficientnetv2t_spleen_weight = 0.25

df_predictions['spleen_healthy'] = (df_mil_efficientnetb0_predictions['spleen_healthy'] * mil_efficientnetb0_spleen_weight) +\
                                   (df_mil_densenet121_predictions['spleen_healthy'] * mil_densenet121_spleen_weight) +\
                                   (df_lstm_efficientnetb0_predictions['spleen_healthy'] * lstm_efficientnetb0_spleen_weight) +\
                                   (df_lstm_efficientnetv2t_predictions['spleen_healthy'] * lstm_efficientnetv2t_spleen_weight)

df_predictions['spleen_low'] = (df_mil_efficientnetb0_predictions['spleen_low'] * mil_efficientnetb0_spleen_weight) +\
                               (df_mil_densenet121_predictions['spleen_low'] * mil_densenet121_spleen_weight) +\
                               (df_lstm_efficientnetb0_predictions['spleen_low'] * lstm_efficientnetb0_spleen_weight) +\
                               (df_lstm_efficientnetv2t_predictions['spleen_low'] * lstm_efficientnetv2t_spleen_weight)

df_predictions['spleen_high'] = (df_mil_efficientnetb0_predictions['spleen_high'] * mil_efficientnetb0_spleen_weight) +\
                                (df_mil_densenet121_predictions['spleen_high'] * mil_densenet121_spleen_weight) +\
                                (df_lstm_efficientnetb0_predictions['spleen_high'] * lstm_efficientnetb0_spleen_weight) +\
                                (df_lstm_efficientnetv2t_predictions['spleen_high'] * lstm_efficientnetv2t_spleen_weight)

df_predictions

Unnamed: 0,patient_id,scan_id,bowel_injury,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,62825,0.172067,0.182911,0.769357,0.135549,0.095093,0.640415,0.262372,0.097213,0.578237,0.312464,0.109298
1,50046,24574,0.21481,0.329259,0.778393,0.143047,0.07856,0.842088,0.118298,0.039614,0.659693,0.206239,0.134068
2,63706,39279,0.175627,0.205739,0.694187,0.198131,0.107682,0.674203,0.286677,0.03912,0.684787,0.243746,0.071467


In [20]:
df_predictions['bowel_healthy'] = 1 - df_predictions['bowel_injury']
df_predictions['bowel_injury'] *= 1.

df_predictions['extravasation_healthy'] = 1 - df_predictions['extravasation_injury']
df_predictions['extravasation_injury'] *= 1.4

df_predictions['kidney_low'] *= 1.1
df_predictions['kidney_high'] *= 1.1

df_predictions['liver_low'] *= 1.3
df_predictions['liver_high'] *= 1.3

df_predictions['spleen_low'] *= 1.75
df_predictions['spleen_high'] *= 1.75

df_predictions

Unnamed: 0,patient_id,scan_id,bowel_injury,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,bowel_healthy,extravasation_healthy
0,48843,62825,0.172067,0.256075,0.769357,0.149104,0.104603,0.640415,0.341083,0.126377,0.578237,0.546813,0.191272,0.827933,0.817089
1,50046,24574,0.21481,0.460962,0.778393,0.157351,0.086416,0.842088,0.153788,0.051499,0.659693,0.360918,0.23462,0.78519,0.670741
2,63706,39279,0.175627,0.288035,0.694187,0.217944,0.11845,0.674203,0.37268,0.050856,0.684787,0.426556,0.125068,0.824373,0.794261


In [21]:
prediction_columns = [
    'bowel_healthy', 'bowel_injury',
    'extravasation_healthy', 'extravasation_injury',
    'kidney_healthy', 'kidney_low', 'kidney_high',
    'liver_healthy', 'liver_low', 'liver_high',
    'spleen_healthy', 'spleen_low', 'spleen_high'
]
df_predictions = df_predictions.groupby('patient_id')[prediction_columns].max().reset_index()

df_predictions

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,0.827933,0.172067,0.817089,0.256075,0.769357,0.149104,0.104603,0.640415,0.341083,0.126377,0.578237,0.546813,0.191272
1,50046,0.78519,0.21481,0.670741,0.460962,0.778393,0.157351,0.086416,0.842088,0.153788,0.051499,0.659693,0.360918,0.23462
2,63706,0.824373,0.175627,0.794261,0.288035,0.694187,0.217944,0.11845,0.674203,0.37268,0.050856,0.684787,0.426556,0.125068


## 7. Submission

In [22]:
df = df[['patient_id']].merge(df_predictions, on='patient_id', how='left')
df

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,0.827933,0.172067,0.817089,0.256075,0.769357,0.149104,0.104603,0.640415,0.341083,0.126377,0.578237,0.546813,0.191272
1,50046,0.78519,0.21481,0.670741,0.460962,0.778393,0.157351,0.086416,0.842088,0.153788,0.051499,0.659693,0.360918,0.23462
2,63706,0.824373,0.175627,0.794261,0.288035,0.694187,0.217944,0.11845,0.674203,0.37268,0.050856,0.684787,0.426556,0.125068


In [23]:
df.to_csv('submission.csv', index=False)