In [4]:
"""
[V1]
* resnest50_fast_2s2x40d
* Add Max./Min. Channels

[V2]
* resnest50_fast_2s2x40d
* final_drop = 0.2
* dropblock_prob = 0.0

[TODO]
* Separate gene expression, cell vaibility and other features
* PCGrad (Project Conflicting Gradients)
* Tuning resolution and image size

ResNeSt:
https://github.com/zhanghang1989/ResNeSt
"""

kernel_mode = False
training_mode = True

import sys
if kernel_mode:
    sys.path.insert(0, "../input/iterative-stratification")
    sys.path.insert(0, "../input/pytorch-lightning")
    sys.path.insert(0, "../input/resnest")
    sys.path.insert(0, "../input/pytorch-optimizer")
    sys.path.insert(0, "../input/pytorch-ranger")

import os
import numpy as np
import pandas as pd
import time
import random
import math
import pickle
from pickle import dump, load
import glob

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.cm import get_cmap
from matplotlib import rcParams

import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler, \
    RobustScaler, QuantileTransformer, PowerTransformer
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import TSNE

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
from torch.autograd import Function
import torch.optim as optim
from torch.nn import Linear, BatchNorm1d, ReLU
from torchvision import transforms

import torch_optimizer

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.metrics.functional import classification

import resnest
from resnest.torch import resnest50, resnest101, resnest200, resnest269, \
    resnest50_fast_2s2x40d, resnest50_fast_1s2x40d, resnest50_fast_1s1x64d

import cv2
import imgaug as ia
from imgaug.augmenters.size import CropToFixedSize

import warnings
warnings.filterwarnings('ignore')

pd.options.display.max_columns = None
sns.set(style="darkgrid")

import gc
gc.enable()

rand_seed = 1120

print(f"PyTorch Version: {torch.__version__}")
print(f"PyTorch Lightning Version: {pl.__version__}")

PyTorch Version: 1.6.0+cu101
PyTorch Lightning Version: 1.0.4


In [None]:
# if kernel_mode:
#     !mkdir -p /root/.cache/torch/hub/checkpoints/
#     !cp ../input/deepinsight-resnest-v2-resnest50-output/*.pth /root/.cache/torch/hub/checkpoints/
#     !ls -la /root/.cache/torch/hub/checkpoints/

In [5]:
model_type = "resnest50"
pretrained_model = f"resnest50_fast_2s2x40d"
experiment_name = f"deepinsight_ResNeSt_v2_{model_type}"

if kernel_mode:
    dataset_folder = "../input/lish-moa"
    model_output_folder = f"./{experiment_name}" if training_mode \
        else f"../input/deepinsight-resnest-v2-resnest50-output/{experiment_name}"
else:
    dataset_folder = "/workspace/Kaggle/MoA"
    model_output_folder = f"{dataset_folder}/{experiment_name}" if training_mode \
        else f"/workspace/Kaggle/MoA/completed/deepinsight_ResNeSt_v2_resnest50/{experiment_name}"

if training_mode:
    os.makedirs(model_output_folder, exist_ok=True)

    # Dedicated logger for experiment
    exp_logger = TensorBoardLogger(model_output_folder,
                                   name=f"overall_logs",
                                   default_hp_metric=False)

# debug_mode = True
debug_mode = False

num_workers = 2 if kernel_mode else 6
# gpus = [0, 1]
gpus = [0]
# gpus = [1]

epochs = 200
patience = 16

# learning_rate = 1e-3
learning_rate = 0.000352  # Suggested Learning Rate from LR finder (V7)
learning_rate *= len(gpus)
weight_decay = 1e-6
# weight_decay = 0

# T_max = 10  # epochs
T_max = 5  # epochs
T_0 = 5  # epochs

accumulate_grad_batches = 1
gradient_clip_val = 10.0

if "resnest50" in model_type:
    batch_size = 128
    infer_batch_size = 256 if not kernel_mode else 256
    image_size = 224
    resolution = 224
elif model_type == "resnest101":
    batch_size = 48
    infer_batch_size = 96
    image_size = 256
    resolution = 256
elif model_type == "resnest200":
    batch_size = 12
    infer_batch_size = 24
    image_size = 320
    resolution = 320
elif model_type == "resnest269":
    batch_size = 4
    infer_batch_size = 8
    image_size = 416
    resolution = 416

# Prediction Clipping Thresholds
prob_min = 0.001
prob_max = 0.999

# Swap Noise
swap_prob = 0.1
swap_portion = 0.15

label_smoothing = 0.001

# DeepInsight Transform
perplexity = 5

fc_size = 512

final_drop = 0.2
dropblock_prob = 0.0

In [6]:
train_features = pd.read_csv(
    f"{dataset_folder}/train_features.csv", engine='c')
train_labels = pd.read_csv(
    f"{dataset_folder}/train_targets_scored.csv", engine='c')

train_extra_labels = pd.read_csv(
    f"{dataset_folder}/train_targets_nonscored.csv", engine='c')

test_features = pd.read_csv(
    f"{dataset_folder}/test_features.csv", engine='c')

sample_submission = pd.read_csv(
    f"{dataset_folder}/sample_submission.csv", engine='c')

In [4]:
# Sort by sig_id to ensure that all row orders match
train_features = train_features.sort_values(
    by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)
train_labels = train_labels.sort_values(by=["sig_id"], axis=0,
                                        inplace=False).reset_index(drop=True)
train_extra_labels = train_extra_labels.sort_values(
    by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)

sample_submission = sample_submission.sort_values(
    by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)

In [5]:
train_features.shape, train_labels.shape, train_extra_labels.shape

((23814, 876), (23814, 207), (23814, 403))

In [6]:
test_features.shape

(3982, 876)

In [7]:
category_features = ["cp_type", "cp_dose"]
numeric_features = [c for c in train_features.columns if c != "sig_id" and c not in category_features]
all_features = category_features + numeric_features
gene_experssion_features = [c for c in numeric_features if c.startswith("g-")]
cell_viability_features = [c for c in numeric_features if c.startswith("c-")]
len(numeric_features), len(gene_experssion_features), len(cell_viability_features)

(873, 772, 100)

In [8]:
train_classes = [c for c in train_labels.columns if c != "sig_id"]
train_extra_classes = [c for c in train_extra_labels.columns if c != "sig_id"]
len(train_classes), len(train_extra_classes)

(206, 402)

## Label Encoding

In [9]:
for df in [train_features, test_features]:
    df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1})
    df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
    df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1})

In [10]:
train_features["cp_type"].value_counts()

1    21948
0     1866
Name: cp_type, dtype: int64

In [11]:
train_features["cp_dose"].value_counts()

0    12147
1    11667
Name: cp_dose, dtype: int64

In [12]:
train_features["cp_time"].value_counts()

0.5    8250
1.0    7792
0.0    7772
Name: cp_time, dtype: int64

## DeepInsight Transform (t-SNE)
Based on https://github.com/alok-ai-lab/DeepInsight, but with some minor corrections

### Implementation

In [13]:
# Modified from DeepInsight Transform
# https://github.com/alok-ai-lab/DeepInsight/blob/master/pyDeepInsight/image_transformer.py

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import TSNE
from scipy.spatial import ConvexHull
from matplotlib import pyplot as plt
import inspect


class DeepInsightTransformer:
    """Transform features to an image matrix using dimensionality reduction

    This class takes in data normalized between 0 and 1 and converts it to a
    CNN compatible 'image' matrix

    """
    def __init__(self,
                 feature_extractor='tsne',
                 perplexity=30,
                 pixels=100,
                 random_state=None,
                 n_jobs=None):
        """Generate an ImageTransformer instance

        Args:
            feature_extractor: string of value ('tsne', 'pca', 'kpca') or a
                class instance with method `fit_transform` that returns a
                2-dimensional array of extracted features.
            pixels: int (square matrix) or tuple of ints (height, width) that
                defines the size of the image matrix.
            random_state: int or RandomState. Determines the random number
                generator, if present, of a string defined feature_extractor.
            n_jobs: The number of parallel jobs to run for a string defined
                feature_extractor.
        """
        self.random_state = random_state
        self.n_jobs = n_jobs

        if isinstance(feature_extractor, str):
            fe = feature_extractor.casefold()
            if fe == 'tsne_exact'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='exact',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'tsne'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='barnes_hut',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'pca'.casefold():
                fe = PCA(n_components=2, random_state=self.random_state)
            elif fe == 'kpca'.casefold():
                fe = KernelPCA(n_components=2,
                               kernel='rbf',
                               random_state=self.random_state,
                               n_jobs=self.n_jobs)
            else:
                raise ValueError(("Feature extraction method '{}' not accepted"
                                  ).format(feature_extractor))
            self._fe = fe
        elif hasattr(feature_extractor, 'fit_transform') and \
                inspect.ismethod(feature_extractor.fit_transform):
            self._fe = feature_extractor
        else:
            raise TypeError('Parameter feature_extractor is not a '
                            'string nor has method "fit_transform"')

        if isinstance(pixels, int):
            pixels = (pixels, pixels)

        # The resolution of transformed image
        self._pixels = pixels
        self._xrot = None

    def fit(self, X, y=None, plot=False):
        """Train the image transformer from the training set (X)

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            y: Ignored. Present for continuity with scikit-learn
            plot: boolean of whether to produce a scatter plot showing the
                feature reduction, hull points, and minimum bounding rectangle

        Returns:
            self: object
        """
        # Transpose to get (n_features, n_samples)
        X = X.T

        # Perform dimensionality reduction
        x_new = self._fe.fit_transform(X)

        # Get the convex hull for the points
        chvertices = ConvexHull(x_new).vertices
        hull_points = x_new[chvertices]

        # Determine the minimum bounding rectangle
        mbr, mbr_rot = self._minimum_bounding_rectangle(hull_points)

        # Rotate the matrix
        # Save the rotated matrix in case user wants to change the pixel size
        self._xrot = np.dot(mbr_rot, x_new.T).T

        # Determine feature coordinates based on pixel dimension
        self._calculate_coords()

        # plot rotation diagram if requested
        if plot is True:
            # Create subplots
            fig, ax = plt.subplots(1, 1, figsize=(10, 7), squeeze=False)
            ax[0, 0].scatter(x_new[:, 0],
                             x_new[:, 1],
                             cmap=plt.cm.get_cmap("jet", 10),
                             marker="x",
                             alpha=1.0)
            ax[0, 0].fill(x_new[chvertices, 0],
                          x_new[chvertices, 1],
                          edgecolor='r',
                          fill=False)
            ax[0, 0].fill(mbr[:, 0], mbr[:, 1], edgecolor='g', fill=False)
            plt.gca().set_aspect('equal', adjustable='box')
            plt.show()
        return self

    @property
    def pixels(self):
        """The image matrix dimensions

        Returns:
            tuple: the image matrix dimensions (height, width)

        """
        return self._pixels

    @pixels.setter
    def pixels(self, pixels):
        """Set the image matrix dimension

        Args:
            pixels: int or tuple with the dimensions (height, width)
            of the image matrix

        """
        if isinstance(pixels, int):
            pixels = (pixels, pixels)
        self._pixels = pixels
        # recalculate coordinates if already fit
        if hasattr(self, '_coords'):
            self._calculate_coords()

    def _calculate_coords(self):
        """Calculate the matrix coordinates of each feature based on the
        pixel dimensions.
        """
        ax0_coord = np.digitize(self._xrot[:, 0],
                                bins=np.linspace(min(self._xrot[:, 0]),
                                                 max(self._xrot[:, 0]),
                                                 self._pixels[0])) - 1
        ax1_coord = np.digitize(self._xrot[:, 1],
                                bins=np.linspace(min(self._xrot[:, 1]),
                                                 max(self._xrot[:, 1]),
                                                 self._pixels[1])) - 1
        self._coords = np.stack((ax0_coord, ax1_coord))

    def transform(self, X, empty_value=0):
        """Transform the input matrix into image matrices

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
                where n_features matches the training set.
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """

        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        img_coords = pd.DataFrame(np.vstack(
            (self._coords, X.clip(0, 1))).T).groupby(
                [0, 1],  # (x1, y1)
                as_index=False).mean()

        img_matrices = []
        blank_mat = np.zeros(self._pixels)
        if empty_value != 0:
            blank_mat[:] = empty_value
        for z in range(2, img_coords.shape[1]):
            img_matrix = blank_mat.copy()
            img_matrix[img_coords[0].astype(int),
                       img_coords[1].astype(int)] = img_coords[z]
            img_matrices.append(img_matrix)

        return img_matrices

    def transform_3d(self, X, empty_value=0):
        """Transform the input matrix into image matrices

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
                where n_features matches the training set.
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """

        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        img_coords = pd.DataFrame(np.vstack(
            (self._coords, X.clip(0, 1))).T).groupby(
                [0, 1],  # (x1, y1)
                as_index=False)
        avg_img_coords = img_coords.mean()
        min_img_coords = img_coords.min()
        max_img_coords = img_coords.max()

        img_matrices = []
        blank_mat = np.zeros((3, self._pixels[0], self._pixels[1]))
        if empty_value != 0:
            blank_mat[:, :, :] = empty_value
        for z in range(2, avg_img_coords.shape[1]):
            img_matrix = blank_mat.copy()
            img_matrix[0, avg_img_coords[0].astype(int),
                       avg_img_coords[1].astype(int)] = avg_img_coords[z]
            img_matrix[1, min_img_coords[0].astype(int),
                       min_img_coords[1].astype(int)] = min_img_coords[z]
            img_matrix[2, max_img_coords[0].astype(int),
                       max_img_coords[1].astype(int)] = max_img_coords[z]
            img_matrices.append(img_matrix)

        return img_matrices

    def fit_transform(self, X, empty_value=0):
        """Train the image transformer from the training set (X) and return
        the transformed data.

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """
        self.fit(X)
        return self.transform(X, empty_value=empty_value)

    def fit_transform_3d(self, X, empty_value=0):
        """Train the image transformer from the training set (X) and return
        the transformed data.

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """
        self.fit(X)
        return self.transform_3d(X, empty_value=empty_value)

    def feature_density_matrix(self):
        """Generate image matrix with feature counts per pixel

        Returns:
            img_matrix (ndarray): matrix with feature counts per pixel
        """
        fdmat = np.zeros(self._pixels)
        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        coord_cnt = (
            pd.DataFrame(self._coords.T).assign(count=1).groupby(
                [0, 1],  # (x1, y1)
                as_index=False).count())
        fdmat[coord_cnt[0].astype(int),
              coord_cnt[1].astype(int)] = coord_cnt['count']
        return fdmat

    @staticmethod
    def _minimum_bounding_rectangle(hull_points):
        """Find the smallest bounding rectangle for a set of points.

        Modified from JesseBuesking at https://stackoverflow.com/a/33619018
        Returns a set of points representing the corners of the bounding box.

        Args:
            hull_points : an nx2 matrix of hull coordinates

        Returns:
            (tuple): tuple containing
                coords (ndarray): coordinates of the corners of the rectangle
                rotmat (ndarray): rotation matrix to align edges of rectangle
                    to x and y
        """

        pi2 = np.pi / 2.

        # Calculate edge angles
        edges = hull_points[1:] - hull_points[:-1]
        angles = np.arctan2(edges[:, 1], edges[:, 0])
        angles = np.abs(np.mod(angles, pi2))
        angles = np.unique(angles)

        # Find rotation matrices
        rotations = np.vstack([
            np.cos(angles),
            np.cos(angles - pi2),
            np.cos(angles + pi2),
            np.cos(angles)
        ]).T
        rotations = rotations.reshape((-1, 2, 2))

        # Apply rotations to the hull
        rot_points = np.dot(rotations, hull_points.T)

        # Find the bounding points
        min_x = np.nanmin(rot_points[:, 0], axis=1)
        max_x = np.nanmax(rot_points[:, 0], axis=1)
        min_y = np.nanmin(rot_points[:, 1], axis=1)
        max_y = np.nanmax(rot_points[:, 1], axis=1)

        # Find the box with the best area
        areas = (max_x - min_x) * (max_y - min_y)
        best_idx = np.argmin(areas)

        # Return the best box
        x1 = max_x[best_idx]
        x2 = min_x[best_idx]
        y1 = max_y[best_idx]
        y2 = min_y[best_idx]
        rotmat = rotations[best_idx]

        # Generate coordinates
        coords = np.zeros((4, 2))
        coords[0] = np.dot([x1, y2], rotmat)
        coords[1] = np.dot([x2, y2], rotmat)
        coords[2] = np.dot([x2, y1], rotmat)
        coords[3] = np.dot([x1, y1], rotmat)

        return coords, rotmat

In [14]:
class LogScaler:
    """Log normalize and scale data

    Log normalization and scaling procedure as described as norm-2 in the
    DeepInsight paper supplementary information.
    
    Note: The dimensions of input matrix is (N samples, d features)
    """
    def __init__(self):
        self._min0 = None
        self._max = None

    """
    Use this as a preprocessing step in inference mode.
    """

    def fit(self, X, y=None):
        # Min. of training set per feature
        self._min0 = X.min(axis=0)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Global max. of training set from X_norm
        self._max = X_norm.max()

    """
    For training set only.
    """

    def fit_transform(self, X, y=None):
        # Min. of training set per feature
        self._min0 = X.min(axis=0)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Global max. of training set from X_norm
        self._max = X_norm.max()

        # Normalized again by global max. of training set
        return (X_norm / self._max).clip(0, 1)

    """
    For validation and test set only.
    """

    def transform(self, X, y=None):
        # Adjust min. of each feature of X by _min0
        for i in range(X.shape[1]):
            X[:, i] = X[:, i].clip(min=self._min0[i], max=None)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Normalized again by global max. of training set
        return (X_norm / self._max).clip(0, 1)

## Dataset

In [15]:
class MoAImageSwapDataset(torch.utils.data.Dataset):
    def __init__(self,
                 features,
                 labels,
                 transformer,
                 swap_prob=0.15,
                 swap_portion=0.1):
        self.features = features
        self.labels = labels
        self.transformer = transformer
        self.swap_prob = swap_prob
        self.swap_portion = swap_portion

        self.crop = CropToFixedSize(width=image_size, height=image_size)

    def __getitem__(self, index):
        normalized = self.features[index, :]

        # Swap row featurs randomly
        normalized = self.add_swap_noise(index, normalized)
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=0
        image = self.transformer.transform_3d(normalized, empty_value=0)[0]

        # Resize to target size
        image = cv2.resize(image.transpose((1, 2, 0)),
                           (image_size, image_size),
                           interpolation=cv2.INTER_CUBIC)
        image = image.transpose((2, 0, 1))

        return {"x": image, "y": self.labels[index, :]}

    def add_swap_noise(self, index, X):
        if np.random.rand() < self.swap_prob:
            swap_index = np.random.randint(self.features.shape[0], size=1)[0]
            # Select only gene expression and cell viability features
            swap_features = np.random.choice(
                np.array(range(3, self.features.shape[1])),
                size=int(self.features.shape[1] * self.swap_portion),
                replace=False)
            X[swap_features] = self.features[swap_index, swap_features]

        return X

    def __len__(self):
        return self.features.shape[0]

In [16]:
class MoAImageDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transformer):
        self.features = features
        self.labels = labels
        self.transformer = transformer

    def __getitem__(self, index):
        normalized = self.features[index, :]
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=0
        image = self.transformer.transform_3d(normalized, empty_value=0)[0]

        # Resize to target size
        image = cv2.resize(image.transpose((1, 2, 0)),
                           (image_size, image_size),
                           interpolation=cv2.INTER_CUBIC)
        image = image.transpose((2, 0, 1))

        return {"x": image, "y": self.labels[index, :]}

    def __len__(self):
        return self.features.shape[0]


class TestDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transformer):
        self.features = features
        self.labels = labels
        self.transformer = transformer

    def __getitem__(self, index):
        normalized = self.features[index, :]
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=0
        image = self.transformer.transform_3d(normalized, empty_value=0)[0]

        # Resize to target size
        image = cv2.resize(image.transpose((1, 2, 0)),
                           (image_size, image_size),
                           interpolation=cv2.INTER_CUBIC)
        image = image.transpose((2, 0, 1))

        return {"x": image, "y": -1}

    def __len__(self):
        return self.features.shape[0]

## Model Definition

In [17]:
from torch.nn.modules.loss import _WeightedLoss


# https://www.kaggle.com/vbmokin/moa-pytorch-rankgauss-pca-nn-upgrade-3d-visual#4.7-Smoothing
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets: torch.Tensor, n_labels: int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
                                           self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets, self.weight)

        if self.reduction == 'sum':
            loss = loss.sum()
        elif self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [18]:
def initialize_weights(layer):
    for m in layer.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1.0)
            m.bias.data.zero_()
        elif isinstance(m, nn.Linear):
            fan_out = m.weight.size(0)  # fan-out
            fan_in = 0
            init_range = 1.0 / math.sqrt(fan_in + fan_out)
            m.weight.data.uniform_(-init_range, init_range)
            m.bias.data.zero_()

In [19]:
class MoAResNeSt(pl.LightningModule):
    def __init__(
            self,
            pretrained_model_name,
            training_set=(None, None),  # tuple
            valid_set=(None, None),  # tuple
            test_set=None,
            transformer=None,
            num_classes=206,
            final_drop=0.0,
            dropblock_prob=0,
            fc_size=512,
            learning_rate=1e-3):
        super(MoAResNeSt, self).__init__()

        self.train_data, self.train_labels = training_set
        self.valid_data, self.valid_labels = valid_set
        self.test_data = test_set
        self.transformer = transformer

        self.backbone = getattr(resnest.torch, pretrained_model)(
            pretrained=True,
            final_drop=final_drop)

        self.backbone.fc = nn.Sequential(
            nn.Linear(self.backbone.fc.in_features, fc_size, bias=True),
            nn.ELU(), nn.Linear(fc_size, num_classes, bias=True))

        if self.training:
            initialize_weights(self.backbone.fc)

        # Save passed hyperparameters
        self.save_hyperparameters("pretrained_model_name", "num_classes",
                                  "final_drop", "dropblock_prob", "fc_size",
                                  "learning_rate")

    def forward(self, x):
        return self.backbone(x)

    def training_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)

        # loss = F.binary_cross_entropy_with_logits(logits, y, reduction="mean")

        # Label smoothing
        loss = SmoothBCEwLogits(smoothing=label_smoothing)(logits, y)

        self.log('train_loss',
                 loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)

        val_loss = F.binary_cross_entropy_with_logits(logits,
                                                      y,
                                                      reduction="mean")

        self.log('val_loss',
                 val_loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True)

        return val_loss

    def test_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)
        return {"pred_logits": logits}

    def test_epoch_end(self, output_results):
        all_outputs = torch.cat([out["pred_logits"] for out in output_results],
                                dim=0)
        print("Logits:", all_outputs)
        pred_probs = F.sigmoid(all_outputs).detach().cpu().numpy()
        print("Predictions: ", pred_probs)
        return {"pred_probs": pred_probs}

    def setup(self, stage=None):
        #         self.train_dataset = MoAImageDataset(self.train_data,
        #                                              self.train_labels,
        #                                              self.transformer)
        self.train_dataset = MoAImageSwapDataset(self.train_data,
                                                 self.train_labels,
                                                 self.transformer,
                                                 swap_prob=swap_prob,
                                                 swap_portion=swap_portion)

        self.val_dataset = MoAImageDataset(self.valid_data, self.valid_labels,
                                           self.transformer)

        self.test_dataset = TestDataset(self.test_data, None, self.transformer)

    def train_dataloader(self):
        train_dataloader = DataLoader(self.train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers,
                                      pin_memory=True,
                                      drop_last=False)
        print(f"Train iterations: {len(train_dataloader)}")
        return train_dataloader

    def val_dataloader(self):
        val_dataloader = DataLoader(self.val_dataset,
                                    batch_size=infer_batch_size,
                                    shuffle=False,
                                    num_workers=num_workers,
                                    pin_memory=True,
                                    drop_last=False)
        print(f"Validate iterations: {len(val_dataloader)}")
        return val_dataloader

    def test_dataloader(self):
        test_dataloader = DataLoader(self.test_dataset,
                                     batch_size=infer_batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     pin_memory=True,
                                     drop_last=False)
        print(f"Test iterations: {len(test_dataloader)}")
        return test_dataloader

    def configure_optimizers(self):
        print(f"Initial Learning Rate: {self.hparams.learning_rate:.6f}")
        #         optimizer = optim.Adam(self.parameters(),
        #                                lr=self.hparams.learning_rate,
        #                                weight_decay=weight_decay)
        #         optimizer = torch.optim.SGD(self.parameters(),
        #                                     lr=self.hparams.learning_rate,
        #                                     momentum=0.9,
        #                                     dampening=0,
        #                                     weight_decay=weight_decay,
        #                                     nesterov=False)

        optimizer = torch_optimizer.RAdam(
            self.parameters(),
            lr=self.hparams.learning_rate,
            betas=(0.9, 0.999),
            eps=1e-8,
            weight_decay=weight_decay,
        )

        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=T_max,
                                                         eta_min=0,
                                                         last_epoch=-1)

        #         scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        #             optimizer,
        #             T_0=T_0,
        #             T_mult=1,
        #             eta_min=0,
        #             last_epoch=-1)

        #         scheduler = optim.lr_scheduler.OneCycleLR(
        #             optimizer=optimizer,
        #             pct_start=0.1,
        #             div_factor=1e3,
        #             max_lr=1e-1,
        #             # max_lr=1e-2,
        #             epochs=epochs,
        #             steps_per_epoch=len(self.train_images) // batch_size)

        return [optimizer], [scheduler]

In [20]:
# model = MoAResNeSt(
#     pretrained_model,
#     training_set=(None, None),  # tuple
#     valid_set=(None, None),  # tuple
#     test_set=None,
#     transformer=None,
#     num_classes=206,
#     final_drop=0.0,
#     dropblock_prob=0,
#     fc_size=fc_size,
#     learning_rate=learning_rate)
# print(model)

## Training/Inference

In [21]:
kfolds = 10
skf = MultilabelStratifiedKFold(n_splits=kfolds,
                                shuffle=True,
                                random_state=rand_seed)

label_counts = np.sum(train_labels.drop("sig_id", axis=1), axis=0)
y_labels = label_counts.index.tolist()

In [22]:
def get_model(training_set, valid_set, test_set, transformer, model_path=None):
    if training_mode:
        model = MoAResNeSt(
            pretrained_model_name=pretrained_model,
            training_set=training_set,  # tuple
            valid_set=valid_set,  # tuple
            test_set=test_set,
            transformer=transformer,
            num_classes=len(train_classes),
            final_drop=final_drop,
            dropblock_prob=dropblock_prob,
            fc_size=fc_size,
            learning_rate=learning_rate)
    else:
        model = MoAResNeSt.load_from_checkpoint(
            model_path,
            pretrained_model_name=pretrained_model,
            training_set=training_set,  # tuple
            valid_set=valid_set,  # tuple
            test_set=test_set,
            transformer=transformer,
            num_classes=len(train_classes),
            fc_size=fc_size)
        model.freeze()
        model.eval()
    return model


def save_pickle(obj, model_output_folder, fold_i, name):
    dump(obj, open(f"{model_output_folder}/fold{fold_i}_{name}.pkl", 'wb'),
         pickle.HIGHEST_PROTOCOL)


def load_pickle(model_output_folder, fold_i, name):
    return load(open(f"{model_output_folder}/fold{fold_i}_{name}.pkl", 'rb'))

In [23]:
def norm2_normalization(train, valid, test):
    scaler = LogScaler()
    train = scaler.fit_transform(train)
    valid = scaler.transform(valid)
    test = scaler.transform(test)
    return train, valid, test, scaler


def quantile_transform(train, valid, test):
    q_scaler = QuantileTransformer(n_quantiles=1000,
                                   output_distribution='normal',
                                   ignore_implicit_zeros=False,
                                   subsample=100000,
                                   random_state=rand_seed)
    train = q_scaler.fit_transform(train)
    valid = q_scaler.transform(valid)
    test = q_scaler.transform(test)

    # Transform to [0, 1]
    min_max_scaler = MinMaxScaler(feature_range=(0, 1))
    train = min_max_scaler.fit_transform(train)
    valid = min_max_scaler.transform(valid)
    test = min_max_scaler.transform(test)

    return train, valid, test, q_scaler, min_max_scaler


def extract_feature_map(train,
                        feature_extractor='tsne_exact',
                        resolution=100,
                        perplexity=30):
    transformer = DeepInsightTransformer(feature_extractor=feature_extractor,
                                         pixels=resolution,
                                         perplexity=perplexity,
                                         random_state=rand_seed,
                                         n_jobs=-1)
    transformer.fit(train)
    return transformer

In [24]:
def mean_logloss(y_pred, y_true):
    logloss = (1 - y_true) * np.log(1 - y_pred +
                                    1e-15) + y_true * np.log(y_pred + 1e-15)
    return np.mean(-logloss)

In [25]:
# Ensure Reproducibility
seed_everything(rand_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

best_model = None
oof_predictions = np.zeros((train_features.shape[0], len(train_classes)))
kfold_submit_preds = np.zeros((test_features.shape[0], len(train_classes)))
for i, (train_index, val_index) in enumerate(
        skf.split(train_features, train_labels[y_labels])):
    if training_mode:
        print(f"Training on Fold {i} ......")
        print(train_index.shape, val_index.shape)

        logger = TensorBoardLogger(model_output_folder,
                                   name=f"fold{i}/logs",
                                   default_hp_metric=False)

        train = train_features.loc[train_index, all_features].copy().values
        fold_train_labels = train_labels.loc[train_index,
                                             train_classes].copy().values
        valid = train_features.loc[val_index, all_features].copy().values
        fold_valid_labels = train_labels.loc[val_index,
                                             train_classes].copy().values
        test = test_features[all_features].copy().values

        # LogScaler (Norm-2 Normalization)
        print("Running norm-2 normalization ......")
        train, valid, test, scaler = norm2_normalization(train, valid, test)
        save_pickle(scaler, model_output_folder, i, "log-scaler")

        # Extract DeepInsight Feature Map
        print("Extracting feature map ......")
        transformer = extract_feature_map(train,
                                          feature_extractor='tsne_exact',
                                          resolution=resolution,
                                          perplexity=perplexity)
        save_pickle(transformer, model_output_folder, i,
                    "deepinsight-transform")

        model = get_model(training_set=(train, fold_train_labels),
                          valid_set=(valid, fold_valid_labels),
                          test_set=test,
                          transformer=transformer)

        callbacks = [
            EarlyStopping(monitor='val_loss_epoch',
                          min_delta=1e-6,
                          patience=patience,
                          verbose=True,
                          mode='min',
                          strict=True),
            LearningRateMonitor(logging_interval='step')
        ]
        # https://pytorch-lightning.readthedocs.io/en/latest/generated/pytorch_lightning.callbacks.ModelCheckpoint.html#pytorch_lightning.callbacks.ModelCheckpoint
        checkpoint_callback = ModelCheckpoint(
            filepath=f"{model_output_folder}/fold{i}" +
            "/{epoch}-{train_loss_epoch:.6f}-{val_loss_epoch:.6f}" +
            f"-image_size={image_size}-resolution={resolution}-perplexity={perplexity}-fc={fc_size}",
            save_top_k=1,
            save_weights_only=False,
            save_last=False,
            verbose=True,
            monitor='val_loss_epoch',
            mode='min',
            prefix='')

        if debug_mode:
            # Find best LR
            # https://pytorch-lightning.readthedocs.io/en/latest/lr_finder.html
            trainer = Trainer(
                gpus=[gpus[0]],
                distributed_backend="dp",  # multiple-gpus, 1 machine
                auto_lr_find=True,
                benchmark=False,
                deterministic=True,
                logger=logger,
                accumulate_grad_batches=accumulate_grad_batches,
                gradient_clip_val=gradient_clip_val,
                precision=16,
                max_epochs=1)

            # Run learning rate finder
            lr_finder = trainer.tuner.lr_find(
                model,
                min_lr=1e-7,
                max_lr=1e2,
                num_training=100,
                mode='exponential',
                early_stop_threshold=100.0,
            )
            fig = lr_finder.plot(suggest=True)
            fig.show()

            # Pick point based on plot, or get suggestion
            suggested_lr = lr_finder.suggestion()

            # Update hparams of the model
            model.hparams.learning_rate = suggested_lr
            print(
                f"Suggested Learning Rate: {model.hparams.learning_rate:.6f}")

        else:
            trainer = Trainer(
                gpus=gpus,
                distributed_backend="dp",  # multiple-gpus, 1 machine
                max_epochs=epochs,
                benchmark=False,
                deterministic=True,
                # fast_dev_run=True,
                checkpoint_callback=checkpoint_callback,
                callbacks=callbacks,
                accumulate_grad_batches=accumulate_grad_batches,
                gradient_clip_val=gradient_clip_val,
                precision=16,
                logger=logger)
            trainer.fit(model)

            # Load best model
            seed_everything(rand_seed)
            best_model = MoAResNeSt.load_from_checkpoint(
                checkpoint_callback.best_model_path,
                pretrained_model_name=pretrained_model,
                training_set=(train, fold_train_labels),  # tuple
                valid_Set=(valid, fold_valid_labels),  # tuple
                test_set=test,
                transformer=transformer,
                fc_size=fc_size)
            best_model.freeze()

            print("Predicting on validation set ......")
            output = trainer.test(ckpt_path="best",
                                  test_dataloaders=model.val_dataloader(),
                                  verbose=False)[0]
            fold_preds = output["pred_probs"]
            oof_predictions[val_index, :] = fold_preds

            print(fold_preds[:5, :])
            fold_valid_loss = mean_logloss(fold_preds, fold_valid_labels)
            print(f"Fold {i} Validation Loss: {fold_valid_loss:.6f}")

            # Generate submission predictions
            print("Predicting on test set ......")
            best_model.setup()
            output = trainer.test(best_model, verbose=False)[0]
            submit_preds = output["pred_probs"]
            print(test_features.shape, submit_preds.shape)

            kfold_submit_preds += submit_preds / kfolds

        del model, trainer, train, valid, test, scaler, transformer
    else:
        print(f"Inferencing on Fold {i} ......")
        print(train_index.shape, val_index.shape)

        model_path = glob.glob(f'{model_output_folder}/fold{i}/epoch*.ckpt')[0]

        test = test_features[all_features].copy().values

        # Load LogScaler (Norm-2 Normalization)
        scaler = load_pickle(f'{model_output_folder}', i, "log-scaler")
        test = scaler.transform(test)

        # Load DeepInsight Feature Map
        transformer = load_pickle(f'{model_output_folder}', i,
                                  "deepinsight-transform")

        print(f"Loading model from {model_path}")
        model = get_model(training_set=(None, None),
                          valid_set=(None, None),
                          test_set=test,
                          transformer=transformer,
                          model_path=model_path)

        trainer = Trainer(
            logger=False,
            gpus=gpus,
            distributed_backend="dp",  # multiple-gpus, 1 machine
            precision=16,
            benchmark=False,
            deterministic=True)
        output = trainer.test(model, verbose=False)[0]
        submit_preds = output["pred_probs"]
        kfold_submit_preds += submit_preds / kfolds

        del model, trainer, scaler, transformer, test

    torch.cuda.empty_cache()
    gc.collect()

    if debug_mode:
        break

Inferencing on Fold 0 ......
(21432,) (2382,)
Loading model from /workspace/Kaggle/MoA/completed/deepinsight_ResNeSt_v2_resnest50/deepinsight_ResNeSt_v2_resnest50/fold0/epoch=25-train_loss_epoch=0.016863-val_loss_epoch=0.014446-image_size=224-resolution=224-perplexity=5-fc=512.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.1992, -8.0703, -6.8320,  ..., -7.0938, -6.2148, -5.9062],
        [-9.7656, -9.1875, -7.5547,  ..., -7.7031, -9.1328, -5.5781],
        [-7.8945, -7.7383, -7.6133,  ..., -7.5938, -7.8750, -7.5664],
        ...,
        [-5.6328, -6.8633, -7.2227,  ..., -6.8398, -7.9336, -6.5352],
        [-7.1055, -7.6250, -7.3008,  ..., -6.4844, -7.0312, -5.6172],
        [-6.2266, -6.8242, -6.8398,  ..., -6.6523, -7.4258, -6.6133]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[7.4673e-04 3.1257e-04 1.0777e-03 ... 8.2970e-04 1.9951e-03 2.7142e-03]
 [5.7399e-05 1.0228e-04 5.2357e-04 ... 4.5133e-04 1.0806e-04 3.7651e-03]
 [3.7265e-04 4.3559e-04 4.9353e-04 ... 5.0354e-04 3.8004e-04 5.1737e-04]
 ...
 [3.5648e-03 1.0443e-03 7.2956e-04 ... 1.0691e-03 3.5834e-04 1.4496e-03]
 [8.2016e-04 4.8780e-04 6.7472e-04 ... 1.5249e-03 8.8310e-04 3.6221e-03]
 [1.9722e-03 1.0862e-03 1.0691e-03 ... 1.2894e-03 5.9557e-04 1.3409e-03]]

Inferencing on Fold 1 ......
(21432,) (2382,)
Loading 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.6172, -7.1875, -6.1602,  ..., -6.3945, -7.7422, -6.9023],
        [-7.2188, -7.5273, -8.4922,  ..., -7.6484, -8.6016, -5.4727],
        [-7.7266, -7.9688, -7.5938,  ..., -7.7344, -7.8945, -7.4609],
        ...,
        [-5.1680, -5.4336, -6.4062,  ..., -6.8125, -7.7227, -6.8359],
        [-6.8945, -7.6484, -7.9375,  ..., -6.4414, -6.6953, -5.7422],
        [-6.5469, -8.0859, -6.7852,  ..., -7.6758, -7.4922, -7.3555]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[0.0004916 0.0007553 0.002108  ... 0.001668  0.000434  0.001004 ]
 [0.000732  0.000538  0.000205  ... 0.0004766 0.0001838 0.00418  ]
 [0.0004408 0.000346  0.0005035 ... 0.0004373 0.0003726 0.0005746]
 ...
 [0.005665  0.00435   0.001649  ... 0.001099  0.0004425 0.001073 ]
 [0.001012  0.0004766 0.000357  ... 0.001592  0.001235  0.003197 ]
 [0.001432  0.0003078 0.001129  ... 0.0004637 0.000557  0.0006385]]

Inferencing on Fold 2 ......
(21433,) (2381,)
Loading model from /workspace/Kaggle/MoA/com

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.1875, -6.4922, -6.4883,  ..., -6.7227, -6.5898, -7.9453],
        [-8.6406, -8.7344, -7.5000,  ..., -7.9453, -8.6641, -6.6719],
        [-8.2422, -8.1562, -7.3008,  ..., -7.5469, -7.3711, -7.6836],
        ...,
        [-6.2344, -6.5820, -6.6953,  ..., -6.3906, -8.2031, -6.8438],
        [-6.7734, -7.5820, -7.4922,  ..., -6.8164, -7.7461, -6.6445],
        [-6.5000, -7.1289, -6.4141,  ..., -6.6328, -8.5547, -7.1016]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[0.0007553 0.001513  0.001519  ... 0.001202  0.001372  0.0003543]
 [0.0001768 0.0001609 0.0005527 ... 0.0003543 0.0001726 0.001265 ]
 [0.0002632 0.0002868 0.0006747 ... 0.0005274 0.000629  0.0004601]
 ...
 [0.001957  0.001383  0.001235  ... 0.001675  0.0002737 0.001065 ]
 [0.0011425 0.0005093 0.000557  ... 0.001095  0.0004323 0.0013   ]
 [0.001501  0.000801  0.001636  ... 0.001315  0.0001926 0.000823 ]]

Inferencing on Fold 3 ......
(21433,) (2381,)
Loading model from /workspace/Kaggle/MoA/com

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-6.5898, -6.5391, -7.5352,  ..., -5.7148, -5.7578, -6.8125],
        [-9.1719, -9.7500, -7.5273,  ..., -8.6484, -8.9219, -7.7617],
        [-7.3242, -7.6289, -7.3008,  ..., -7.5820, -7.6797, -7.6875],
        ...,
        [-5.7148, -6.0000, -7.7891,  ..., -6.9492, -7.2617, -6.7422],
        [-7.3672, -7.5859, -6.0234,  ..., -5.3711, -8.0078, -6.0742],
        [-6.8281, -7.5312, -6.2305,  ..., -6.3008, -7.8203, -6.6484]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[1.372e-03 1.444e-03 5.336e-04 ... 3.286e-03 3.147e-03 1.099e-03]
 [1.039e-04 5.829e-05 5.379e-04 ... 1.754e-04 1.334e-04 4.256e-04]
 [6.590e-04 4.859e-04 6.747e-04 ... 5.093e-04 4.618e-04 4.582e-04]
 ...
 [3.286e-03 2.472e-03 4.141e-04 ... 9.584e-04 7.014e-04 1.179e-03]
 [6.313e-04 5.074e-04 2.415e-03 ... 4.627e-03 3.328e-04 2.296e-03]
 [1.081e-03 5.360e-04 1.965e-03 ... 1.831e-03 4.013e-04 1.294e-03]]

Inferencing on Fold 4 ......
(21433,) (2381,)
Loading model from /workspace/Kaggle/MoA/com

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-6.3984, -6.3203, -6.1797,  ..., -6.1484, -7.2578, -6.6016],
        [-8.5703, -7.7227, -7.4180,  ..., -7.4688, -8.1094, -5.9062],
        [-7.3945, -7.5625, -7.1094,  ..., -7.4180, -7.7344, -7.0547],
        ...,
        [-6.3086, -5.9062, -6.3438,  ..., -6.3125, -7.6719, -6.0000],
        [-6.4688, -6.5469, -6.5156,  ..., -6.1094, -7.7461, -6.3359],
        [-6.5352, -7.2656, -6.5898,  ..., -6.7344, -7.2461, -6.2500]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[0.001661  0.001796  0.002068  ... 0.002132  0.0007043 0.001356 ]
 [0.0001897 0.0004425 0.0006    ... 0.0005703 0.0003006 0.002714 ]
 [0.000614  0.0005193 0.000817  ... 0.0006    0.0004373 0.0008626]
 ...
 [0.001818  0.002714  0.001755  ... 0.00181   0.0004656 0.002472 ]
 [0.001549  0.001432  0.001478  ... 0.002216  0.0004323 0.001768 ]
 [0.00145   0.0006986 0.001372  ... 0.001188  0.0007124 0.001926 ]]

Inferencing on Fold 5 ......
(21433,) (2381,)
Loading model from /workspace/Kaggle/MoA/com

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.8906, -6.8594, -6.5547,  ..., -6.1133, -6.4922, -7.8086],
        [-7.5938, -9.4609, -8.2734,  ..., -9.6328, -8.7656, -5.8516],
        [-7.7578, -7.2031, -7.3828,  ..., -7.3828, -8.3281, -7.6523],
        ...,
        [-4.7812, -5.9258, -7.1133,  ..., -7.6250, -8.4766, -7.5469],
        [-7.6328, -7.8984, -6.6562,  ..., -6.3945, -7.6836, -6.1602],
        [-7.1133, -6.7734, -5.8594,  ..., -6.0078, -7.9805, -7.0156]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[3.7408e-04 1.0481e-03 1.4210e-03 ... 2.2087e-03 1.5125e-03 4.0603e-04]
 [5.0354e-04 7.7844e-05 2.5511e-04 ... 6.5565e-05 1.5593e-04 2.8667e-03]
 [4.2725e-04 7.4387e-04 6.2132e-04 ... 6.2132e-04 2.4152e-04 4.7469e-04]
 ...
 [8.3160e-03 2.6627e-03 8.1348e-04 ... 4.8780e-04 2.0826e-04 5.2738e-04]
 [4.8399e-04 3.7122e-04 1.2846e-03 ... 1.6680e-03 4.6015e-04 2.1076e-03]
 [8.1348e-04 1.1425e-03 2.8458e-03 ... 2.4529e-03 3.4189e-04 8.9693e-04]]

Inferencing on Fold 6 ......
(21433,) (2381,)
Loading 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.9414, -7.6016, -6.3906,  ..., -6.6133, -4.0391, -7.3008],
        [-9.5625, -9.4922, -6.1719,  ..., -8.0312, -7.8477, -6.4961],
        [-7.5938, -7.4297, -7.1289,  ..., -7.4531, -7.5352, -7.5977],
        ...,
        [-5.4102, -6.4961, -6.6406,  ..., -6.8711, -7.5547, -7.2305],
        [-6.8438, -7.1055, -7.7188,  ..., -6.8320, -7.0312, -6.2422],
        [-7.3789, -6.8438, -6.2344,  ..., -6.1641, -7.9336, -7.1406]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[3.555e-04 4.992e-04 1.675e-03 ... 1.341e-03 1.730e-02 6.747e-04]
 [7.033e-05 7.546e-05 2.083e-03 ... 3.250e-04 3.905e-04 1.507e-03]
 [5.035e-04 5.932e-04 8.011e-04 ... 5.794e-04 5.336e-04 5.012e-04]
 ...
 [4.452e-03 1.507e-03 1.305e-03 ... 1.037e-03 5.236e-04 7.238e-04]
 [1.065e-03 8.202e-04 4.442e-04 ... 1.078e-03 8.831e-04 1.942e-03]
 [6.237e-04 1.065e-03 1.957e-03 ... 2.100e-03 3.583e-04 7.915e-04]]

Inferencing on Fold 7 ......
(21432,) (2382,)
Loading model from /workspace/Kaggle/MoA/com

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.3516, -7.5703, -6.6016,  ..., -6.0195, -8.0469, -7.8750],
        [-8.4219, -8.1250, -8.0000,  ..., -8.5547, -9.0547, -5.4375],
        [-7.9961, -7.7422, -7.4102,  ..., -7.3828, -8.0703, -7.5781],
        ...,
        [-5.8789, -6.7695, -7.7773,  ..., -7.0039, -7.8438, -6.6953],
        [-7.5898, -7.9492, -6.6680,  ..., -6.3164, -8.2344, -6.4492],
        [-7.3477, -8.1719, -6.9102,  ..., -6.7109, -8.5312, -7.0234]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[0.0006413 0.0005155 0.001356  ... 0.002424  0.00032   0.00038  ]
 [0.00022   0.0002959 0.0003355 ... 0.0001926 0.0001168 0.00433  ]
 [0.0003366 0.000434  0.0006046 ... 0.0006213 0.0003126 0.000511 ]
 ...
 [0.00279   0.001147  0.000419  ... 0.0009074 0.000392  0.001235 ]
 [0.0005054 0.0003529 0.001269  ... 0.001803  0.0002654 0.001579 ]
 [0.0006437 0.0002825 0.000997  ... 0.001216  0.0001972 0.00089  ]]

Inferencing on Fold 8 ......
(21433,) (2381,)
Loading model from /workspace/Kaggle/MoA/com

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[ -6.6641,  -6.1797,  -5.9805,  ...,  -6.3398,  -7.5195,  -6.3984],
        [-10.2344,  -9.3828,  -6.0742,  ...,  -7.9531,  -9.8125,  -6.5547],
        [ -7.7539,  -7.6133,  -7.2617,  ...,  -7.7812,  -8.0000,  -7.7617],
        ...,
        [ -6.9258,  -6.4062,  -6.7109,  ...,  -6.3906,  -8.4219,  -5.7266],
        [ -6.9688,  -6.8945,  -6.2266,  ...,  -6.4219,  -6.2773,  -5.0391],
        [ -7.2031,  -8.0234,  -7.2148,  ...,  -5.8984,  -9.2109,  -8.0000]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[1.274e-03 2.068e-03 2.522e-03 ... 1.761e-03 5.422e-04 1.661e-03]
 [3.594e-05 8.416e-05 2.296e-03 ... 3.514e-04 5.478e-05 1.421e-03]
 [4.289e-04 4.935e-04 7.014e-04 ... 4.172e-04 3.355e-04 4.256e-04]
 ...
 [9.813e-04 1.649e-03 1.216e-03 ... 1.675e-03 2.199e-04 3.248e-03]
 [9.398e-04 1.012e-03 1.972e-03 ... 1.623e-03 1.875e-03 6.439e-03]
 [7.439e-04 3.276e-04 7.353e-04 ... 2.737e-03 9.996e-05 3.355e-04]]

Inferencing on Fold 9 ......
(21432,) (2382,)
Loading 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.


Test iterations: 16


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-7.9219, -6.7930, -6.6914,  ..., -7.2734, -8.3047, -8.8906],
        [-9.5234, -8.7969, -7.2188,  ..., -8.3828, -9.5547, -8.2812],
        [-7.5586, -7.4297, -7.3281,  ..., -7.5156, -7.6484, -7.2852],
        ...,
        [-5.9375, -6.2383, -7.2227,  ..., -6.8984, -8.4453, -6.8633],
        [-6.3867, -7.2695, -6.4180,  ..., -6.6797, -8.3516, -6.4297],
        [-7.1875, -7.6133, -7.1875,  ..., -7.0625, -8.6641, -7.5703]],
       device='cuda:1', dtype=torch.float16)
Predictions:  [[3.626e-04 1.121e-03 1.240e-03 ... 6.933e-04 2.472e-04 1.377e-04]
 [7.313e-05 1.512e-04 7.319e-04 ... 2.288e-04 7.087e-05 2.532e-04]
 [5.212e-04 5.932e-04 6.566e-04 ... 5.441e-04 4.766e-04 6.852e-04]
 ...
 [2.632e-03 1.949e-03 7.296e-04 ... 1.008e-03 2.148e-04 1.044e-03]
 [1.681e-03 6.962e-04 1.629e-03 ... 1.255e-03 2.359e-04 1.611e-03]
 [7.553e-04 4.935e-04 7.553e-04 ... 8.559e-04 1.726e-04 5.155e-04]]



In [26]:
if training_mode:
    print(oof_predictions.shape)
else:
    oof_predictions = glob.glob(f'{model_output_folder}/../oof_*.npy')[0]
    oof_predictions = np.load(oof_predictions)

oof_loss = mean_logloss(oof_predictions,
                        train_labels[train_classes].values)
print(f"OOF Validation Loss: {oof_loss:.6f}")

OOF Validation Loss: 0.014560


In [27]:
# oof_filename = "_".join(
#     [f"{k}={v}" for k, v in dict(model.hparams).items()])
# with open(f'oof_{experiment_name}_{oof_loss}.npy', 'wb') as f:
#     np.save(f, oof_predictions)

# with open(f'oof_{experiment_name}_{oof_loss}.npy', 'rb') as f:
#     tmp = np.load(f)
#     print(tmp.shape)

In [28]:
# [ResNeSt]
# OOF Validation Loss: 0.014620
# "dropblock_prob":        0.0
# "fc_size":               512
# "final_drop":            0.0
# "learning_rate":         0.000352
# "num_classes":           206
# "pretrained_model_name": resnest50_fast_2s2x40d

# OOF Validation Loss: 0.014560
# "dropblock_prob":        0.0
# "fc_size":               512
# "final_drop":            0.2
# "learning_rate":         0.000352
# "num_classes":           206
# "pretrained_model_name": resnest50_fast_2s2x40d

In [29]:
if training_mode and best_model is not None:
    print(best_model.hparams)
    extra_params = {
        "gpus": len(gpus),
        # "pos_weight": True
    }
    exp_logger.experiment.add_hparams(hparam_dict={
        **dict(best_model.hparams),
        **extra_params
    },
                                      metric_dict={"oof_loss": oof_loss})

    oof_filename = "_".join(
        [f"{k}={v}" for k, v in dict(best_model.hparams).items()])
    with open(f'oof_{experiment_name}_{oof_loss}.npy', 'wb') as f:
        np.save(f, oof_predictions)

    with open(f'oof_{experiment_name}_{oof_loss}.npy', 'rb') as f:
        tmp = np.load(f)
        print(tmp.shape)

    # Rename model filename to remove `=` for Kaggle Dataset rule
    model_files = glob.glob(f'{model_output_folder}/fold*/epoch*.ckpt')
    for f in model_files:
        new_filename = f.replace("=", "")
        os.rename(f, new_filename)
        print(new_filename)

    del best_model
    torch.cuda.empty_cache()
    gc.collect()

## Submission

In [30]:
print(kfold_submit_preds.shape)

submission = pd.DataFrame(data=test_features["sig_id"].values,
                          columns=["sig_id"])
submission = submission.reindex(columns=["sig_id"] + train_classes)
submission[train_classes] = kfold_submit_preds
# Set control type to 0 as control perturbations have no MoAs
submission.loc[test_features['cp_type'] == 0, submission.columns[1:]] = 0
# submission.to_csv('submission.csv', index=False)
submission.to_csv('submission_resnest_v2.csv', index=False)

(3982, 206)


In [31]:
submission

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.000804,0.001107,0.001552,0.012778,0.027563,0.004221,0.002629,0.002348,0.000242,0.010979,0.016184,0.000525,0.000290,0.000565,0.002206,0.002196,0.001404,0.005212,0.007125,0.001231,0.006157,0.002454,0.000383,0.001867,0.000498,0.000347,0.001066,0.001036,0.005490,0.002314,0.001287,0.003297,0.003332,0.000286,0.000420,0.000452,0.002812,0.000208,0.000269,0.000753,0.005327,0.011861,0.001741,0.011480,0.008336,0.011977,0.000353,0.003873,0.000540,0.001218,0.001852,0.001621,0.000335,0.001050,0.017963,0.000730,0.002243,0.001256,0.001328,0.001112,0.002170,0.006627,0.001558,0.000832,0.003404,0.000290,0.005302,0.002511,0.004380,0.000646,0.000676,0.039417,0.005271,0.001348,0.001345,0.000546,0.004117,0.016118,0.008389,0.020471,0.000656,0.000860,0.000475,0.016148,0.000868,0.002394,0.000279,0.000885,0.000340,0.000278,0.000209,0.001585,0.000535,0.024740,0.010955,0.000620,0.001000,0.001426,0.008225,0.037561,0.002485,0.000631,0.003174,0.000960,0.005191,0.017476,0.000220,0.000510,0.002525,0.001765,0.000742,0.000646,0.001008,0.002798,0.002246,0.001754,0.001179,0.002642,0.001632,0.000492,0.000557,0.000954,0.002884,0.001556,0.002021,0.000659,0.000272,0.000436,0.006742,0.002239,0.001190,0.005102,0.000290,0.001324,0.005559,0.000931,0.004504,0.000972,0.000967,0.000648,0.001571,0.000454,0.001113,0.002337,0.004671,0.001482,0.000365,0.000677,0.000561,0.000346,0.000740,0.016724,0.002532,0.001410,0.000988,0.003393,0.006565,0.001740,0.002463,0.009314,0.000864,0.004303,0.007720,0.000300,0.003334,0.000378,0.005113,0.000906,0.003468,0.000565,0.000521,0.000437,0.000656,0.000497,0.001046,0.004637,0.017574,0.011928,0.002010,0.004528,0.000960,0.002169,0.012823,0.004890,0.000860,0.000974,0.000516,0.001946,0.000207,0.000418,0.001092,0.001815,0.000786,0.001763,0.000674,0.000956,0.001470,0.000418,0.002208,0.001864,0.000958,0.000638,0.000670,0.001755,0.002758,0.000979
1,id_001897cda,0.000216,0.000199,0.000812,0.000222,0.000884,0.001523,0.003238,0.004895,0.094656,0.042344,0.004660,0.001159,0.000084,0.014063,0.000160,0.000209,0.000577,0.001075,0.002302,0.002993,0.000283,0.000561,0.000176,0.000220,0.000265,0.000996,0.000503,0.000155,0.001769,0.000863,0.000180,0.000226,0.000346,0.000644,0.000317,0.000485,0.001228,0.008904,0.006638,0.000366,0.000903,0.000424,0.000283,0.001100,0.000706,0.000428,0.000152,0.000575,0.000332,0.004717,0.000702,0.005311,0.011216,0.000098,0.009096,0.001966,0.002292,0.001225,0.001355,0.000512,0.000197,0.002049,0.000871,0.001990,0.000587,0.000139,0.000254,0.008152,0.000387,0.000480,0.000211,0.000933,0.001126,0.000643,0.000878,0.000305,0.000564,0.000584,0.000597,0.001438,0.000202,0.000448,0.000482,0.000912,0.000355,0.001022,0.000112,0.001637,0.001401,0.000933,0.000645,0.000379,0.000280,0.000780,0.001472,0.002694,0.000162,0.000759,0.001240,0.002954,0.000373,0.001444,0.012573,0.000221,0.000151,0.001725,0.004133,0.004272,0.000772,0.000173,0.000644,0.002015,0.000400,0.000723,0.000436,0.000685,0.002498,0.003980,0.001885,0.000383,0.000276,0.000311,0.003627,0.000499,0.001112,0.000191,0.000223,0.000129,0.000798,0.000403,0.000198,0.001236,0.000953,0.001851,0.000207,0.000863,0.000676,0.000409,0.000344,0.000149,0.000153,0.000402,0.000145,0.000312,0.002038,0.000615,0.001490,0.001550,0.005183,0.001085,0.001027,0.056598,0.000134,0.004380,0.000834,0.001120,0.000923,0.001564,0.001870,0.000145,0.000894,0.000313,0.000584,0.000175,0.000854,0.000321,0.000301,0.001264,0.000386,0.000442,0.000270,0.001660,0.000195,0.167084,0.000245,0.000876,0.051338,0.001505,0.000763,0.000711,0.001013,0.003767,0.004358,0.000075,0.103723,0.000096,0.001210,0.000811,0.001363,0.000247,0.000183,0.000221,0.000181,0.002601,0.000129,0.000675,0.000307,0.000142,0.001385,0.001087,0.001686,0.000111,0.005895,0.000319,0.000169,0.002273
2,id_002429b5b,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,id_00276f245,0.000406,0.000342,0.001809,0.019944,0.016188,0.003674,0.004747,0.003792,0.000416,0.006956,0.026644,0.000803,0.000463,0.001530,0.001523,0.002350,0.001517,0.002825,0.002842,0.002633,0.002262,0.002457,0.000453,0.003317,0.001261,0.000571,0.002749,0.001686,0.005939,0.001304,0.001121,0.002876,0.002217,0.001043,0.001045,0.000506,0.004553,0.000378,0.001292,0.002494,0.001601,0.002456,0.001669,0.022744,0.002584,0.003642,0.000658,0.000881,0.000689,0.003060,0.002672,0.000831,0.001435,0.002065,0.030823,0.000626,0.003148,0.001044,0.000781,0.000788,0.000962,0.014030,0.000994,0.001004,0.003469,0.000515,0.003697,0.002644,0.002653,0.000453,0.000728,0.023565,0.004060,0.001202,0.000708,0.000604,0.001110,0.008322,0.006427,0.023314,0.015707,0.000481,0.001018,0.002826,0.002366,0.003670,0.000687,0.000772,0.006028,0.000700,0.000503,0.000790,0.000881,0.004523,0.003371,0.000737,0.001133,0.000633,0.002011,0.014185,0.002502,0.000872,0.003380,0.001260,0.003488,0.036866,0.001704,0.003076,0.002141,0.012254,0.000484,0.002562,0.000277,0.002049,0.003738,0.001888,0.000970,0.002302,0.001025,0.000329,0.000537,0.001208,0.001391,0.001829,0.001103,0.004075,0.000593,0.004360,0.005538,0.011145,0.000561,0.005146,0.000567,0.001076,0.003721,0.001442,0.002147,0.000443,0.002267,0.001844,0.000797,0.000442,0.000649,0.004586,0.003254,0.003114,0.000705,0.000606,0.000591,0.000472,0.000452,0.007174,0.005331,0.004520,0.002408,0.005485,0.007542,0.000996,0.002492,0.001594,0.001108,0.002889,0.002548,0.000437,0.007801,0.000304,0.006022,0.001472,0.001625,0.000270,0.001100,0.000280,0.001309,0.000966,0.002024,0.001297,0.020443,0.028215,0.004444,0.002027,0.002681,0.001761,0.003448,0.003011,0.000873,0.001462,0.000421,0.004121,0.000410,0.001121,0.001033,0.002138,0.000936,0.000773,0.000833,0.001007,0.000877,0.001652,0.001582,0.018524,0.043101,0.000489,0.003454,0.002697,0.002451,0.002619
4,id_0027f1083,0.003126,0.002142,0.001250,0.014584,0.023363,0.006292,0.005174,0.001502,0.000418,0.012243,0.020616,0.000866,0.000440,0.000802,0.001782,0.001807,0.001988,0.006296,0.003873,0.001591,0.008162,0.006484,0.000795,0.002455,0.001006,0.000365,0.000635,0.001060,0.009362,0.003649,0.002450,0.001761,0.002654,0.000507,0.000520,0.000483,0.002588,0.000302,0.000180,0.000811,0.005312,0.005567,0.004221,0.011342,0.020814,0.008728,0.000499,0.004075,0.000437,0.002150,0.002016,0.001391,0.000387,0.000861,0.007170,0.003475,0.001236,0.002368,0.003463,0.001504,0.001939,0.003916,0.001867,0.001096,0.004525,0.000322,0.004169,0.001767,0.003436,0.000878,0.000952,0.017922,0.006860,0.002457,0.001512,0.000931,0.004036,0.036373,0.006430,0.008718,0.000810,0.001096,0.000667,0.010122,0.000596,0.000990,0.000384,0.002626,0.000462,0.000317,0.000319,0.001637,0.000354,0.012724,0.010719,0.000320,0.001755,0.001218,0.006160,0.024165,0.002873,0.000525,0.002477,0.001391,0.002697,0.018805,0.000276,0.000645,0.002431,0.000949,0.001147,0.000234,0.000736,0.002164,0.002229,0.003334,0.001696,0.003417,0.000256,0.000794,0.000745,0.000871,0.005263,0.000944,0.002672,0.000503,0.000302,0.000280,0.013951,0.000845,0.000931,0.005377,0.000338,0.001084,0.003646,0.001527,0.003425,0.000925,0.003129,0.000737,0.003368,0.000687,0.000951,0.002628,0.005610,0.005280,0.000746,0.000762,0.001540,0.000342,0.001289,0.011176,0.001534,0.000778,0.002164,0.004500,0.003904,0.004489,0.001289,0.005545,0.000622,0.002669,0.003282,0.000478,0.002196,0.000703,0.006067,0.001427,0.009717,0.000548,0.001243,0.001383,0.000678,0.001219,0.002407,0.002227,0.008630,0.014813,0.002045,0.002820,0.001129,0.000999,0.013145,0.002797,0.000519,0.001803,0.000469,0.003767,0.000381,0.001439,0.004180,0.002265,0.001172,0.001772,0.001429,0.000751,0.001215,0.000780,0.002286,0.001355,0.001175,0.000853,0.000805,0.001314,0.000476,0.001909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,0.000316,0.000447,0.000539,0.002161,0.008044,0.000694,0.000622,0.000738,0.000170,0.001570,0.002695,0.000798,0.000650,0.001936,0.000420,0.000469,0.000733,0.001071,0.000997,0.000833,0.000620,0.000234,0.002061,0.000823,0.000190,0.003543,0.000839,0.000664,0.002030,0.000362,0.000457,0.002626,0.001542,0.000621,0.000439,0.000106,0.006735,0.002218,0.157218,0.000663,0.000807,0.000262,0.000644,0.000926,0.000820,0.005397,0.000783,0.000470,0.058630,0.002481,0.000842,0.000586,0.001715,0.000350,0.016042,0.000673,0.000165,0.000565,0.000640,0.000179,0.000825,0.001156,0.000545,0.000447,0.000954,0.002901,0.000763,0.000500,0.000436,0.001072,0.001689,0.002628,0.000726,0.000257,0.001066,0.000529,0.039458,0.043610,0.037702,0.004902,0.001245,0.000385,0.000337,0.002045,0.000180,0.000714,0.002631,0.000520,0.000289,0.258514,0.000411,0.000451,0.000674,0.002740,0.001623,0.003646,0.000517,0.003816,0.001208,0.004917,0.000349,0.000104,0.008690,0.000301,0.000834,0.001211,0.000489,0.006738,0.000289,0.000400,0.002690,0.004522,0.004142,0.001032,0.002016,0.000234,0.000467,0.000661,0.055602,0.000862,0.000365,0.000653,0.002865,0.000523,0.002541,0.000214,0.000251,0.001084,0.000567,0.002358,0.000525,0.000715,0.000726,0.000805,0.001622,0.000941,0.002699,0.000652,0.000168,0.000481,0.000673,0.000340,0.001013,0.004874,0.000562,0.000671,0.000397,0.000264,0.002293,0.001816,0.000861,0.002908,0.000345,0.007659,0.000258,0.000351,0.001293,0.002197,0.000149,0.001221,0.000614,0.000230,0.001011,0.000348,0.000980,0.000311,0.001196,0.000915,0.000446,0.000171,0.000321,0.000083,0.000246,0.000523,0.000352,0.000781,0.016550,0.001820,0.001042,0.001182,0.000879,0.000386,0.005733,0.000307,0.010416,0.000129,0.000279,0.000852,0.000149,0.000344,0.000207,0.001781,0.000124,0.000985,0.000313,0.000513,0.000788,0.003557,0.002756,0.310272,0.003709,0.001189,0.006384,0.000460,0.000433,0.000413
3978,id_ff925dd0d,0.002801,0.003624,0.001073,0.009997,0.017406,0.005986,0.005878,0.003193,0.000239,0.010948,0.031299,0.000739,0.000707,0.000340,0.000497,0.000770,0.005622,0.005969,0.004484,0.002312,0.003288,0.004844,0.000946,0.002014,0.001184,0.000693,0.000510,0.000718,0.003423,0.004115,0.003509,0.001998,0.003181,0.000290,0.000542,0.000500,0.002792,0.000230,0.000302,0.000325,0.004455,0.003804,0.002587,0.013557,0.008016,0.013115,0.000404,0.001176,0.001920,0.003709,0.001062,0.000945,0.000589,0.000703,0.009278,0.004361,0.003054,0.002456,0.001899,0.001291,0.001537,0.005391,0.003754,0.001393,0.002932,0.000264,0.003083,0.001628,0.002447,0.000813,0.001471,0.016444,0.007398,0.001516,0.003585,0.002110,0.001046,0.013999,0.006953,0.013498,0.000510,0.000806,0.000481,0.016369,0.002465,0.000486,0.000250,0.001429,0.000596,0.000329,0.000340,0.003761,0.001218,0.004587,0.011004,0.000609,0.001565,0.000993,0.008681,0.026024,0.001859,0.000842,0.002412,0.001293,0.006116,0.011154,0.000505,0.000905,0.004999,0.000238,0.001685,0.000266,0.000774,0.001701,0.000969,0.001453,0.000718,0.003755,0.000295,0.001314,0.001140,0.001083,0.002305,0.000668,0.002236,0.000369,0.000386,0.000528,0.004170,0.000493,0.001346,0.005663,0.000950,0.001635,0.003986,0.001444,0.005406,0.001158,0.002247,0.000386,0.002053,0.001318,0.000913,0.004750,0.006411,0.004601,0.000642,0.002118,0.003136,0.001291,0.002029,0.014763,0.001309,0.000960,0.001312,0.003698,0.003021,0.002550,0.000798,0.006198,0.000443,0.002370,0.006111,0.000387,0.001064,0.000665,0.002161,0.002018,0.004339,0.000490,0.001059,0.000859,0.000868,0.000810,0.001614,0.001295,0.010742,0.016646,0.002892,0.001920,0.001186,0.001012,0.025152,0.000391,0.001350,0.000473,0.001264,0.002325,0.000233,0.001401,0.001351,0.003280,0.000589,0.002187,0.001571,0.001840,0.000547,0.000667,0.003309,0.001734,0.001168,0.001049,0.002244,0.002268,0.000337,0.000828
3979,id_ffb710450,0.003546,0.002088,0.001026,0.007327,0.031978,0.010311,0.002705,0.001655,0.000223,0.007468,0.017680,0.000916,0.000519,0.000470,0.000776,0.000837,0.003819,0.005969,0.003790,0.001268,0.003334,0.008837,0.000777,0.001824,0.001616,0.000499,0.000448,0.000685,0.004023,0.003108,0.002968,0.003252,0.003577,0.000349,0.000529,0.000385,0.002331,0.000208,0.000140,0.000402,0.004427,0.004015,0.004002,0.010184,0.015766,0.006532,0.000594,0.001072,0.000472,0.002050,0.001345,0.001064,0.000336,0.000745,0.005701,0.004456,0.001688,0.003175,0.001553,0.002116,0.001480,0.002991,0.003589,0.001203,0.002901,0.000314,0.003778,0.001884,0.002513,0.000967,0.001014,0.028746,0.006329,0.001788,0.002888,0.001470,0.001183,0.017496,0.005091,0.014491,0.001100,0.000939,0.000447,0.010293,0.001257,0.000434,0.000276,0.001194,0.000401,0.000193,0.000365,0.001900,0.000368,0.004455,0.010769,0.000326,0.001743,0.000533,0.005282,0.020011,0.001519,0.000557,0.001237,0.001026,0.005583,0.008883,0.000417,0.000579,0.003961,0.000488,0.000844,0.000294,0.000615,0.001863,0.001001,0.001665,0.000920,0.002427,0.000154,0.000421,0.000813,0.001004,0.003263,0.000675,0.001390,0.000391,0.000263,0.000339,0.006540,0.000533,0.001241,0.005200,0.000422,0.001352,0.005203,0.002000,0.002784,0.001090,0.003875,0.000518,0.004858,0.001212,0.001315,0.003446,0.008796,0.005620,0.001102,0.001096,0.001347,0.000282,0.001238,0.009069,0.001349,0.000669,0.001524,0.003551,0.003603,0.003563,0.001130,0.004771,0.000430,0.002556,0.004302,0.000546,0.001187,0.000608,0.003693,0.001720,0.006995,0.000298,0.001005,0.003721,0.000564,0.001580,0.002300,0.001064,0.006967,0.025555,0.001889,0.001778,0.001383,0.000668,0.022095,0.001082,0.000499,0.001047,0.000476,0.002612,0.000349,0.002415,0.003111,0.002938,0.000759,0.001405,0.003265,0.001330,0.000636,0.000853,0.002785,0.001060,0.001870,0.000727,0.000656,0.001172,0.000380,0.001402
3980,id_ffbb869f2,0.000983,0.000667,0.001208,0.022553,0.023339,0.003471,0.009129,0.002413,0.000569,0.020582,0.038303,0.001100,0.000341,0.001273,0.001287,0.002114,0.002246,0.006058,0.002607,0.002416,0.004723,0.004993,0.000549,0.002733,0.001065,0.000563,0.000942,0.000839,0.009034,0.003443,0.001568,0.001574,0.002824,0.000467,0.000542,0.000461,0.001774,0.000446,0.000176,0.001434,0.003442,0.003079,0.003409,0.013794,0.007205,0.009421,0.000510,0.001130,0.000544,0.004619,0.002684,0.001267,0.000803,0.000851,0.010757,0.001375,0.002989,0.001599,0.002476,0.001328,0.001038,0.004080,0.001561,0.001803,0.003318,0.000394,0.002608,0.001643,0.003366,0.000582,0.000950,0.013470,0.003378,0.001802,0.000955,0.000632,0.002038,0.017729,0.008616,0.019443,0.000682,0.000888,0.000546,0.003367,0.001197,0.001746,0.000328,0.002523,0.000663,0.000280,0.000380,0.001331,0.000429,0.007446,0.007532,0.000401,0.001969,0.000797,0.005870,0.020015,0.003247,0.000584,0.003483,0.001463,0.001972,0.023331,0.000638,0.000841,0.003988,0.002035,0.000694,0.000578,0.000589,0.001760,0.002511,0.003635,0.004757,0.005413,0.000321,0.000529,0.000580,0.000681,0.005528,0.001248,0.001732,0.000514,0.000457,0.000366,0.008281,0.001732,0.000497,0.004721,0.000435,0.002168,0.004122,0.002276,0.002363,0.000629,0.004066,0.001225,0.001369,0.000562,0.000626,0.005690,0.003802,0.002267,0.000474,0.001298,0.000755,0.000338,0.000843,0.017355,0.001621,0.002505,0.002646,0.009847,0.006062,0.003715,0.001563,0.003406,0.000812,0.002177,0.003061,0.000259,0.002632,0.000530,0.003300,0.001624,0.006160,0.000390,0.000702,0.000379,0.000683,0.001049,0.003205,0.001328,0.013278,0.018281,0.003389,0.001829,0.001521,0.002172,0.012715,0.001954,0.000652,0.000941,0.000494,0.006241,0.000332,0.001985,0.002054,0.001958,0.001307,0.002296,0.000978,0.000757,0.001164,0.000624,0.001806,0.001202,0.001308,0.000482,0.000661,0.001848,0.000704,0.002586


In [32]:
torch.cuda.empty_cache()
gc.collect()

20

## EOF