In [1]:
kernel_mode = True

import os
import numpy as np
import pandas as pd
import time
import random
import math
import glob

import optuna

import warnings
warnings.filterwarnings('ignore')

import gc
gc.enable()

rand_seed = 1120

optuna_mode = False
study_name = "moa_blend_effnetv7b3_resnestv1v2"
n_trials = 500
# n_trials = 3000

In [2]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp ../input/gen-efficientnet-pretrained/tf_efficientnet_*.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/deepinsight-resnest-v1-resnest50/*.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/deepinsight-resnest-v2-resnest50-output/*.pth /root/.cache/torch/hub/checkpoints/
!ls -la /root/.cache/torch/hub/checkpoints/

total 866948
drwxr-xr-x 2 root root      4096 Nov 22 13:30 .
drwxr-xr-x 3 root root      4096 Nov 22 13:30 ..
-rw-r--r-- 1 root root 108143566 Nov 22 13:30 resnest50_fast_2s2x40d-9d126481.pth
-rw-r--r-- 1 root root  21386709 Nov 22 13:30 tf_efficientnet_b0_ns-c0e6a31c.pth
-rw-r--r-- 1 root root  31516408 Nov 22 13:30 tf_efficientnet_b1_ns-99dd0c41.pth
-rw-r--r-- 1 root root  36801803 Nov 22 13:30 tf_efficientnet_b2_ns-00306e48.pth
-rw-r--r-- 1 root root  49385734 Nov 22 13:30 tf_efficientnet_b3_ns-9d44bf68.pth
-rw-r--r-- 1 root root  77995057 Nov 22 13:30 tf_efficientnet_b4_ns-d6313a46.pth
-rw-r--r-- 1 root root 122404944 Nov 22 13:30 tf_efficientnet_b5_ns-6f26d0cf.pth
-rw-r--r-- 1 root root 173239537 Nov 22 13:30 tf_efficientnet_b6_ns-51548356.pth
-rw-r--r-- 1 root root 266853140 Nov 22 13:30 tf_efficientnet_b7_ns-1dbc32de.pth


In [3]:
!cp ../input/kaggle-moa-team/scripts/* .
!ls -la

total 176
drwxr-xr-x 2 root root  4096 Nov 22 13:30 .
drwxr-xr-x 6 root root  4096 Nov 22 13:30 ..
---------- 1 root root 13169 Nov 22 13:30 __notebook__.ipynb
-rw-r--r-- 1 root root 56594 Nov 22 13:30 deepinsight_efficientnet_lightning_v7_b3_infer.py
-rw-r--r-- 1 root root 48198 Nov 22 13:30 deepinsight_resnest_lightning_v1_infer.py
-rw-r--r-- 1 root root 48812 Nov 22 13:30 deepinsight_resnest_lightning_v2_infer.py


In [4]:
dataset_folder = "../input/lish-moa" if kernel_mode else "/workspace/Kaggle/MoA/"

# Add your model inference script here
# Tuple Format: (script, oof_filename, output_filename, weight)
model_list = [
    ("deepinsight_efficientnet_lightning_v7_b3_infer.py",
     "oof_deepinsight_efficientnet_v7_b3_0.014802440208660929.npy",
     "submission_effnet_v7_b3.csv", 0.3287684605023437),
    
    ("deepinsight_resnest_lightning_v1_infer.py",
     "oof_deepinsight_ResNeSt_v1_resnest50_0.014619621213185928.npy",
     "submission_resnest_v1.csv", 0.2763485706536088),
    
    ("deepinsight_resnest_lightning_v2_infer.py",
     "oof_deepinsight_ResNeSt_v2_resnest50_0.01455961217985703.npy",
     "submission_resnest_v2.csv", 0.3859487453003219),
]

model_path = "." if kernel_mode else dataset_folder

In [5]:
train_labels = pd.read_csv(f'{dataset_folder}/train_targets_scored.csv',
                           engine='c')
train_classes = [c for c in train_labels.columns if c != "sig_id"]

submission = pd.read_csv(f'{dataset_folder}/sample_submission.csv')
submission.iloc[:, 1:] = 0

In [6]:
def mean_logloss(y_pred, y_true):
    logloss = (1 - y_true) * np.log(1 - y_pred +
                                    1e-15) + y_true * np.log(y_pred + 1e-15)
    return np.mean(-logloss)

In [7]:
total_start = time.time()
if not optuna_mode:
    for i, (script, oof_filename, output_filename, weight) in enumerate(model_list):
        print(f"Generating submission file from {script} ......")
        infer_start = time.time()
        !python {model_path}/{script}
        infer_elapsed = time.time() - infer_start
        print(f"Time spent on inference: {infer_elapsed/60:.2f} minutes.")

        model_submit = pd.read_csv(output_filename, engine='c')
        print(model_submit.head(5))
        print(model_submit.shape)
        submission.iloc[:, 1:] += weight * model_submit.iloc[:, 1:]
else:
    ## Search Best Blend Weights by Optuna ##
    model_oofs = []

    for i, (script, oof_filename, output_filename, weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")
        oof_loss = mean_logloss(oof, train_labels[train_classes].values)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}")
        model_oofs.append(oof)

    def objective(trial):
        weights = []
        for i in range(len(model_list)):
            weights.append(trial.suggest_float(f"w{i}", 0, 1.0))

        blend = np.zeros(model_oofs[0].shape)
        for i in range(len(model_list)):
            blend += weights[i] * model_oofs[i]
        blend = np.clip(blend, 0, 1.0)

        loss = mean_logloss(blend, train_labels[train_classes].values)
        return loss

    pruner = optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=0,
        interval_steps=1,
    )
    sampler = optuna.samplers.TPESampler(seed=rand_seed)
    study = optuna.create_study(direction="minimize",
                                pruner=pruner,
                                sampler=sampler,
                                study_name=study_name,
                                storage=f'sqlite:///{study_name}.db',
                                load_if_exists=True)

    study.optimize(objective,
                   n_trials=n_trials,
                   timeout=None,
                   gc_after_trial=True,
                   n_jobs=-1)

    trial = study.best_trial

    for i, (script, oof_filename, output_filename, _) in enumerate(model_list):
        optimal_weight = trial.params[f"w{i}"]
        print(f"Generating submission file from {script} ...... (Weight: {optimal_weight})")
        infer_start = time.time()
        !python {model_path}/{script}
        infer_elapsed = time.time() - infer_start
        print(f"Time spent on inference: {infer_elapsed/60:.2f} minutes.")

        model_submit = pd.read_csv(output_filename, engine='c')
        print(model_submit.head(5))
        print(model_submit.shape)
        submission.iloc[:, 1:] += optimal_weight * model_submit.iloc[:, 1:]

    print("Number of finished trials: {}".format(len(study.trials)))
    print("Best trial:")
    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

Generating submission file from deepinsight_efficientnet_lightning_v7_b3_infer.py ......
PyTorch Version: 1.6.0
PyTorch Lightning Version: 1.0.4
Inferencing on Fold 0 ......
(21432,) (2382,)
Loading model from ../input/deepinsight-efficientnet-v7-b3/deepinsight_efficientnet_v7_b3/fold0/epoch24-train_loss_epoch0.016526-val_loss_epoch0.014525-image_size300-resolution300-perplexity5-fc512.ckpt
GPU available: True, used: True
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
Using native 16bit precision.
Test iterations: 16
Testing: 100%|██████████████████████████████████| 16/16 [00:50<00:00,  1.26s/it]Logits: tensor([[ -5.9648,  -5.7852,  -6.5195,  ...,  -6.2734,  -8.2266,  -6.5938],
        [-10.2109,  -8.5547,  -8.2656,  ...,  -8.4375,  -8.7578,  -6.9492],
        [ -7.4766,  -7.3281,  -7.0898,  

FileNotFoundError: [Errno 2] No such file or directory: 'submission_resnest_v2.csv'

In [8]:
total_elapsed = time.time() - total_start
print(f"Total time spent: {total_elapsed/60:.2f} minutes.")

Total time spent: 17.10 minutes.


In [9]:
# Number of finished trials: 500
# Best trial:
#   Value: 0.014158536219669974
#   Params: 
#     w0: 0.3287684605023437
#     w1: 0.2763485706536088
#     w2: 0.3859487453003219

In [10]:
# Number of finished trials: 3000
# Best trial:
#   Value: 0.014287989662394677
#   Params: 
#     w0: 0.4397214034464735
#     w1: 0.5466210401214696

In [11]:
print(submission.shape)
submission

(3982, 207)


Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.000832,0.000743,0.001382,0.011709,0.017629,0.002488,0.002338,0.002365,0.000158,...,0.000937,0.000485,0.001765,0.001104,0.000445,0.000499,0.000500,0.001435,0.001015,0.000934
1,id_001897cda,0.000110,0.000193,0.000419,0.000248,0.000401,0.000861,0.001707,0.004245,0.021504,...,0.000209,0.000088,0.001194,0.000349,0.004353,0.000132,0.001885,0.000312,0.000123,0.001306
2,id_002429b5b,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,id_00276f245,0.000355,0.000280,0.001107,0.012041,0.009963,0.002757,0.002065,0.002451,0.000268,...,0.000479,0.000636,0.001140,0.003832,0.014282,0.000257,0.002981,0.001909,0.001132,0.002031
4,id_0027f1083,0.002427,0.001028,0.001010,0.009859,0.014466,0.002492,0.003168,0.000819,0.000171,...,0.000847,0.000438,0.001736,0.000618,0.000776,0.000541,0.000822,0.000973,0.000294,0.000925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,0.000139,0.000317,0.000386,0.001252,0.001120,0.000448,0.000385,0.000913,0.000319,...,0.000354,0.008652,0.001103,0.229512,0.005964,0.000575,0.002160,0.000326,0.000639,0.000194
3978,id_ff925dd0d,0.003649,0.001856,0.000636,0.006748,0.014266,0.004038,0.003687,0.001636,0.000180,...,0.000418,0.000379,0.001752,0.000931,0.000701,0.000668,0.001373,0.000955,0.000233,0.000573
3979,id_ffb710450,0.002756,0.001206,0.000544,0.006468,0.018521,0.005750,0.002268,0.001135,0.000163,...,0.000370,0.000366,0.001271,0.000562,0.000770,0.000484,0.000585,0.000705,0.000176,0.001119
3980,id_ffbb869f2,0.001199,0.000641,0.000667,0.018918,0.016052,0.002414,0.005760,0.001559,0.000437,...,0.000523,0.000313,0.001677,0.000772,0.001217,0.000308,0.000531,0.001643,0.000287,0.002001


In [12]:
submission.to_csv('submission.csv', index=False)

In [None]:
!rm ./*.py
!ls -la