In [1]:
kernel_mode = False

import os
import numpy as np
import pandas as pd
import time
import random
import math
import glob

import optuna

import warnings
warnings.filterwarnings('ignore')

import gc
gc.enable()

rand_seed = 1120

optuna_mode = False
study_name = "moa_blend_effnetv7b3_resnestv1v2"
n_trials = 500
# n_trials = 3000

In [2]:
dataset_folder = "../input/lish-moa" if kernel_mode else "/workspace/Kaggle/MoA/"

# Add your model inference script here
# Tuple Format: (script, oof_filename, output_filename, weight)
model_list = [
    ("deepinsight_efficientnet_lightning_v7_b3_infer.py",
     "oof_deepinsight_efficientnet_v7_b3_0.014802440208660929.npy",
     "submission_effnet_v7_b3.csv", 0.3287684605023437),
    
    ("deepinsight_resnest_lightning_v1_infer.py",
     "oof_deepinsight_ResNeSt_v1_resnest50_0.014619621213185928.npy",
     "submission_resnest_v1.csv", 0.2763485706536088),
    
    ("deepinsight_resnest_lightning_v2_infer.py",
     "oof_deepinsight_ResNeSt_v2_resnest50_0.01455961217985703.npy",
     "submission_resnest_v2.csv", 0.3859487453003219),
]

model_path = "../input" if kernel_mode else f"{dataset_folder}/completed"

In [3]:
train_labels = pd.read_csv(f'{dataset_folder}/train_targets_scored.csv',
                           engine='c')
train_classes = [c for c in train_labels.columns if c != "sig_id"]

submission = pd.read_csv(f'{dataset_folder}/sample_submission.csv')
submission.iloc[:, 1:] = 0

In [4]:
def mean_logloss(y_pred, y_true):
    logloss = (1 - y_true) * np.log(1 - y_pred +
                                    1e-15) + y_true * np.log(y_pred + 1e-15)
    return np.mean(-logloss)

In [5]:
total_start = time.time()
if not optuna_mode:
    for i, (script, oof_filename, output_filename, weight) in enumerate(model_list):
        print(f"Generating submission file from {script} ......")
        infer_start = time.time()
        !python {script}
        infer_elapsed = time.time() - infer_start
        print(f"Time spent on inference: {infer_elapsed/60:.2f} minutes.")

        model_submit = pd.read_csv(output_filename, engine='c')
        print(model_submit.head(5))
        print(model_submit.shape)
        submission.iloc[:, 1:] += weight * model_submit.iloc[:, 1:]
else:
    ## Search Best Blend Weights by Optuna ##
    model_oofs = []

    for i, (script, oof_filename, output_filename, weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")
        oof_loss = mean_logloss(oof, train_labels[train_classes].values)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}")
        model_oofs.append(oof)

    def objective(trial):
        weights = []
        for i in range(len(model_list)):
            weights.append(trial.suggest_float(f"w{i}", 0, 1.0))

        blend = np.zeros(model_oofs[0].shape)
        for i in range(len(model_list)):
            blend += weights[i] * model_oofs[i]
        blend = np.clip(blend, 0, 1.0)

        loss = mean_logloss(blend, train_labels[train_classes].values)
        return loss

    pruner = optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=0,
        interval_steps=1,
    )
    sampler = optuna.samplers.TPESampler(seed=rand_seed)
    study = optuna.create_study(direction="minimize",
                                pruner=pruner,
                                sampler=sampler,
                                study_name=study_name,
                                storage=f'sqlite:///{study_name}.db',
                                load_if_exists=True)

    study.optimize(objective,
                   n_trials=n_trials,
                   timeout=None,
                   gc_after_trial=True,
                   n_jobs=-1)

    trial = study.best_trial

    for i, (script, oof_filename, output_filename, _) in enumerate(model_list):
        optimal_weight = trial.params[f"w{i}"]
        print(f"Generating submission file from {script} ...... (Weight: {optimal_weight})")
        infer_start = time.time()
        !python {script}
        infer_elapsed = time.time() - infer_start
        print(f"Time spent on inference: {infer_elapsed/60:.2f} minutes.")

        model_submit = pd.read_csv(output_filename, engine='c')
        print(model_submit.head(5))
        print(model_submit.shape)
        submission.iloc[:, 1:] += optimal_weight * model_submit.iloc[:, 1:]

    print("Number of finished trials: {}".format(len(study.trials)))
    print("Best trial:")
    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

Generating submission file from deepinsight_efficientnet_lightning_v7_b3_infer.py ......
PyTorch Version: 1.6.0+cu101
PyTorch Lightning Version: 1.0.4
Inferencing on Fold 0 ......
(21432,) (2382,)
Loading model from /workspace/Kaggle/MoA/completed/deepinsight_efficientnet_v7_b3/deepinsight_efficientnet_v7_b3/fold0/epoch24-train_loss_epoch0.016526-val_loss_epoch0.014525-image_size300-resolution300-perplexity5-fc512.ckpt
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.
Test iterations: 42
Testing: 100%|██████████████████████████████████| 42/42 [00:10<00:00,  5.33it/s]Logits: tensor([[ -5.9648,  -5.7891,  -6.5156,  ...,  -6.2695,  -8.2188,  -6.5938],
        [-10.2031,  -8.5469,  -8.2656,  ...,  -8.4375,  -8.7656,  -6.9492],
        [ -7.4727,  -7.3242,  -7.0898,  ...,  -7.5469,  -7.7734,  -6.9453],
        ...,
        [ -5.5820,  -6.8477,  -7.1406,  ...,  -6.8594,  -8.3125,  -6.9766],
     

Test iterations: 42
Testing: 100%|██████████████████████████████████| 42/42 [00:09<00:00,  5.26it/s]Logits: tensor([[ -7.5195,  -7.1797,  -5.7695,  ...,  -5.7656,  -5.8906,  -6.6289],
        [ -9.2266,  -8.9688,  -7.6719,  ...,  -7.7109, -12.4219,  -6.6680],
        [ -8.0469,  -8.3359,  -7.4805,  ...,  -7.5156,  -8.4531,  -7.3125],
        ...,
        [ -5.4414,  -7.2773,  -8.2266,  ...,  -7.5508,  -9.5469,  -6.4297],
        [ -7.6445,  -7.4336,  -6.3711,  ...,  -6.5156,  -9.1406,  -5.6562],
        [ -7.6172,  -6.6367,  -6.3750,  ...,  -5.7422,  -7.8867,  -6.3633]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[5.422e-04 7.615e-04 3.111e-03 ... 3.124e-03 2.758e-03 1.320e-03]
 [9.841e-05 1.273e-04 4.656e-04 ... 4.478e-04 4.053e-06 1.269e-03]
 [3.200e-04 2.397e-04 5.636e-04 ... 5.441e-04 2.131e-04 6.666e-04]
 ...
 [4.314e-03 6.905e-04 2.673e-04 ... 5.255e-04 7.141e-05 1.611e-03]
 [4.785e-04 5.908e-04 1.707e-03 ... 1.478e-03 1.072e-04 3.483e-03]
 [4.916e-04 1.309e-03 1

Generating submission file from deepinsight_resnest_lightning_v1_infer.py ......
PyTorch Version: 1.6.0+cu101
PyTorch Lightning Version: 1.0.4
Inferencing on Fold 0 ......
(21432,) (2382,)
Loading model from /workspace/Kaggle/MoA/completed/deepinsight_ResNeSt_v1_resnest50/deepinsight_ResNeSt_v1_resnest50/fold0/epoch25-train_loss_epoch0.016572-val_loss_epoch0.014525-image_size224-resolution224-perplexity5-fc512.ckpt
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.
Test iterations: 16
Testing: 100%|██████████████████████████████████| 16/16 [00:14<00:00,  1.37it/s]Logits: tensor([[ -7.0039,  -7.3164,  -6.5586,  ...,  -6.7266,  -6.0352,  -5.7812],
        [-10.5312,  -8.0078,  -8.2500,  ...,  -7.3086, -10.0625,  -5.7656],
        [ -7.7461,  -7.5820,  -7.5547,  ...,  -7.6875,  -8.0078,  -7.4961],
        ...,
        [ -5.9141,  -6.7227,  -7.0273,  ...,  -6.9570,  -7.1562,  -6.5312],
        [

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.
Test iterations: 16
Testing: 100%|██████████████████████████████████| 16/16 [00:13<00:00,  1.40it/s]Logits: tensor([[-7.5156, -6.6094, -6.1016,  ..., -6.6055, -5.6953, -6.1602],
        [-8.2031, -9.0078, -8.7109,  ..., -8.6094, -9.7188, -7.5508],
        [-7.5117, -7.5625, -7.2070,  ..., -7.6992, -7.6680, -7.4688],
        ...,
        [-5.1680, -5.4453, -7.6836,  ..., -7.3086, -8.1719, -6.2969],
        [-6.2930, -6.7188, -7.0078,  ..., -6.3281, -6.9883, -6.4062],
        [-6.2734, -7.4570, -6.4258,  ..., -6.9883, -8.2344, -6.8125]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[5.4407e-04 1.3456e-03 2.2335e-03 ... 1.3514e-03 3.3512e-03 2.1076e-03]
 [2.7370e-04 1.2243e-04 1.6475e-04 ... 1.8239e-04 6.0141e-05 5.2547e-04]
 [5.4646e-04 5.1928e-04 7.4100e-04 ... 4.5300e-04 4.6730e-04 5.7030e-04]
 ...
 [5.6648e-03 4.2992e-03 4.60

PyTorch Version: 1.6.0+cu101
PyTorch Lightning Version: 1.0.4
Inferencing on Fold 0 ......
(21432,) (2382,)
Loading model from /workspace/Kaggle/MoA/completed/deepinsight_ResNeSt_v2_resnest50/deepinsight_ResNeSt_v2_resnest50/fold0/epoch25-train_loss_epoch0.016863-val_loss_epoch0.014446-image_size224-resolution224-perplexity5-fc512.ckpt
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.
Test iterations: 16
Testing: 100%|██████████████████████████████████| 16/16 [00:14<00:00,  1.42it/s]Logits: tensor([[-7.1992, -8.0703, -6.8320,  ..., -7.0938, -6.2148, -5.9062],
        [-9.7656, -9.1875, -7.5547,  ..., -7.7031, -9.1328, -5.5781],
        [-7.8945, -7.7383, -7.6133,  ..., -7.5938, -7.8750, -7.5664],
        ...,
        [-5.6328, -6.8633, -7.2227,  ..., -6.8398, -7.9336, -6.5352],
        [-7.1055, -7.6250, -7.3008,  ..., -6.4844, -7.0312, -5.6172],
        [-6.2266, -6.8242, -6.8398,  ..., -6

Inferencing on Fold 6 ......
(21433,) (2381,)
Loading model from /workspace/Kaggle/MoA/completed/deepinsight_ResNeSt_v2_resnest50/deepinsight_ResNeSt_v2_resnest50/fold6/epoch35-train_loss_epoch0.015504-val_loss_epoch0.014640-image_size224-resolution224-perplexity5-fc512.ckpt
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Using native 16bit precision.
Test iterations: 16
Testing: 100%|██████████████████████████████████| 16/16 [00:13<00:00,  1.43it/s]Logits: tensor([[-7.9414, -7.6016, -6.3906,  ..., -6.6133, -4.0391, -7.3008],
        [-9.5625, -9.4922, -6.1719,  ..., -8.0312, -7.8477, -6.4961],
        [-7.5938, -7.4297, -7.1289,  ..., -7.4531, -7.5352, -7.5977],
        ...,
        [-5.4102, -6.4961, -6.6406,  ..., -6.8711, -7.5547, -7.2305],
        [-6.8438, -7.1055, -7.7188,  ..., -6.8320, -7.0312, -6.2422],
        [-7.3789, -6.8438, -6.2344,  ..., -6.1641, -7.9336, -7.1406]],
       device='cuda:0', dtype=torch

In [6]:
total_elapsed = time.time() - total_start
print(f"Total time spent: {total_elapsed/60:.2f} minutes.")

Total time spent: 7.57 minutes.


In [7]:
# Number of finished trials: 500
# Best trial:
#   Value: 0.014158536219669974
#   Params: 
#     w0: 0.3287684605023437
#     w1: 0.2763485706536088
#     w2: 0.3859487453003219

In [8]:
# Number of finished trials: 3000
# Best trial:
#   Value: 0.014287989662394677
#   Params: 
#     w0: 0.4397214034464735
#     w1: 0.5466210401214696

In [9]:
print(submission.shape)
submission

(3982, 207)


Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.001142,0.001169,0.001979,0.016599,0.028272,0.004113,0.003349,0.003262,0.000252,...,0.001502,0.000646,0.002619,0.001827,0.000812,0.000746,0.000759,0.002110,0.002084,0.001311
1,id_001897cda,0.000193,0.000270,0.000733,0.000333,0.000743,0.001449,0.002928,0.006153,0.057622,...,0.000324,0.000144,0.001727,0.000766,0.005054,0.000174,0.004172,0.000434,0.000189,0.002181
2,id_002429b5b,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,id_00276f245,0.000511,0.000412,0.001808,0.019777,0.016180,0.004175,0.003904,0.003917,0.000429,...,0.000816,0.001290,0.001753,0.011384,0.030801,0.000447,0.004316,0.002943,0.002088,0.003025
4,id_0027f1083,0.003644,0.001856,0.001491,0.015456,0.023494,0.004936,0.005170,0.001394,0.000332,...,0.001316,0.000740,0.002615,0.001142,0.001231,0.000870,0.001134,0.001477,0.000478,0.001660
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,0.000260,0.000488,0.000591,0.002085,0.004219,0.000715,0.000625,0.001194,0.000384,...,0.000657,0.009988,0.002164,0.349718,0.007415,0.001033,0.004636,0.000503,0.000811,0.000353
3978,id_ff925dd0d,0.004736,0.003260,0.001049,0.010592,0.020994,0.006347,0.005953,0.002867,0.000272,...,0.000629,0.000636,0.003026,0.001602,0.001148,0.001073,0.002233,0.001827,0.000365,0.000893
3979,id_ffb710450,0.004128,0.002012,0.000940,0.009298,0.030897,0.009727,0.003315,0.001772,0.000248,...,0.000615,0.000695,0.002347,0.000969,0.001489,0.000763,0.000839,0.001159,0.000322,0.001656
3980,id_ffbb869f2,0.001578,0.000899,0.001131,0.027585,0.025055,0.003754,0.009296,0.002481,0.000656,...,0.000970,0.000553,0.002376,0.001234,0.001722,0.000495,0.000786,0.002352,0.000560,0.002996


In [10]:
submission.to_csv('submission.csv', index=False)