In [1]:
"""
[V6]
Blend 4 Models:
* kibuna-nn-hs-1024-last-train (aka. 2stage-NN, LB: 0.01822)
* simpleNN-oldcv (LB: 0.01836)
* deepinsight-efficientnet-v7-b3-infer (LB: 0.01850)
* deepinsight_resnest_lightning_v2_infer (LB: 0.01854)

Removed for now due to low weights:
* 503-203-tabnet-with-nonscored-features (LB: 0.01836)
* fork-of-2heads-looper-super-puper-markpeng-infer (LB: 0.1836)

"""

kernel_mode = False

import os
import numpy as np
import pandas as pd
import time
import random
import math
import datetime
import pickle
from pickle import dump, load
import glob

from numba import njit
from scipy.optimize import minimize, fsolve

import optuna

import warnings
warnings.filterwarnings('ignore')

import gc
gc.enable()

rand_seed = 1120

search_mode = False
run_submit_script = True

# method = "CV"
method = "scipy_per_target"
# method = "scipy"
# method = "optuna"
study_name = "blend_search_optuna_v6_per_target"
# n_trials = 500
n_trials = 3000
# n_trials = 5000

In [2]:
# !mkdir -p /root/.cache/torch/hub/checkpoints/
# !cp ../input/gen-efficientnet-pretrained/tf_efficientnet_*.pth /root/.cache/torch/hub/checkpoints/
# !cp ../input/deepinsight-resnest-v1-resnest50/*.pth /root/.cache/torch/hub/checkpoints/
# !cp ../input/deepinsight-resnest-v2-resnest50-output/*.pth /root/.cache/torch/hub/checkpoints/
# !ls -la /root/.cache/torch/hub/checkpoints/

In [3]:
# !cp ../input/kaggle-moa-team/scripts/* .
# !cp ../input/kaggle-moa-team/blends/*.pkl .
# !ls -la

In [4]:
dataset_folder = "../input/lish-moa" if kernel_mode else "/workspace/Kaggle/MoA/"

# Add your model inference script here
# Tuple Format: (script, oof_filename, output_filename, weight)
model_list = [
    ("2stageNN_with_ns_oldcv.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py",
     "../../Github/kaggle_moa_team/oof/oof_2stageNN_ns_oldcv.npy",
     "submission_2stageNN_with_ns_oldcv_0.01822.csv",
     0.323528084383917),
    
    ("script_simpleNN_oldcv.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/script_simpleNN_oldcv.py",
     "../../Github/kaggle_moa_team/oof/oof_script_simpleNN_oldcv.npy",
     "submission_script_simpleNN_oldcv_0.01836.csv",
     0.08786476491118465),
    
    #     ("fork-of-2heads-looper-super-puper-markpeng-infer.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/fork-of-2heads-looper-super-puper-markpeng-infer.py",
    #      "../../Github/kaggle_moa_team/oof/oof_fork-of-2heads-looper-super-puper-markpeng.npy",
    #      "submission_2heads-looper-super-puper_0.01836.csv",
    #      0.018966959973949222),
    
    ("deepinsight_efficientnet_lightning_v7_b3_infer.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py",
     "../../Github/kaggle_moa_team/oof/oof_deepinsight_efficientnet_lightning_v7_b3_0.01850.npy",
     "submission_effnet_v7_b3_0.01850.csv",
     0.21849845883367852),
    
    #     ("script_tabnet_ns_oldcv.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/script_tabnet_ns_oldcv.py",
    #      "../../Github/kaggle_moa_team/oof/oof_tabnet_ns_oldcv.npy",
    #      "submission_tabnet_ns_oldcv_0.01836.csv",
    #      0.0013224625996093413),
    
    ("deepinsight_resnest_lightning_v2_infer.py" if kernel_mode else "../../Github/kaggle_moa_team/scripts/deepinsight_resnest_lightning_v2_infer.py",
     "../../Github/kaggle_moa_team/oof/oof_deepinsight_ResNeSt_v2_resnest50_0.01854.npy",
     "submission_resnest_v2_0.01854.csv",
     0.3704230222796271),
]

model_path = "." if kernel_mode else dataset_folder

In [5]:
train_features = pd.read_csv(f"{dataset_folder}/train_features.csv",
                             engine='c')
train_labels = pd.read_csv(f'{dataset_folder}/train_targets_scored.csv',
                           engine='c')
train_classes = [c for c in train_labels.columns if c != "sig_id"]

non_control_group_rows = train_features["cp_type"] == "trt_cp"
non_control_group_train_labels = train_labels.loc[
    non_control_group_rows, :].copy().reset_index(drop=True)

submission = pd.read_csv(f'{dataset_folder}/sample_submission.csv')
submission.iloc[:, 1:] = 0

In [6]:
def mean_logloss(y_pred, y_true):
    logloss = (1 - y_true) * np.log(1 - y_pred +
                                    1e-15) + y_true * np.log(y_pred + 1e-15)
    return np.nanmean(-logloss)


def save_pickle(obj, folder, name):
    dump(obj, open(f"{folder}/{name}.pkl", 'wb'), pickle.HIGHEST_PROTOCOL)


def load_pickle(path):
    return load(open(path, 'rb'))

In [7]:
# Reference: https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0/notebook
# CPMP's logloss from https://www.kaggle.com/c/lish-moa/discussion/183010
def log_loss_numpy(y_pred, y_true):
    y_true_ravel = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = np.where(y_true_ravel == 1, -np.log(y_pred),
                    -np.log(1 - y_pred))
    return loss.mean()

def func_numpy_metric(weights):
    oof_blend = np.tensordot(weights, all_oof, axes=((0), (0)))
    return log_loss_numpy(oof_blend, y_true)

@njit
def grad_func_jit(weights):
    oof_clip = np.minimum(1 - 1e-15, np.maximum(all_oof, 1e-15))
    gradients = np.zeros(all_oof.shape[0])
    for i in range(all_oof.shape[0]):
        a, b, c = y_true, oof_clip[i], np.zeros(
            (all_oof.shape[1], all_oof.shape[2]))
        for j in range(oof.shape[0]):
            if j != i:
                c += weights[j] * oof_clip[j]
        gradients[i] = -np.mean(
            (-a * b + (b**2) * weights[i] + b * c) /
            ((b**2) *
             (weights[i]**2) + 2 * b * c * weights[i] - b * weights[i] +
             (c**2) - c))
    return gradients

## Bayesian Optimization and Sequential Least Squares Programming (SLSQP)
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.samplers.TPESampler.html#optuna.samplers.TPESampler

https://docs.scipy.org/doc/scipy/reference/optimize.minimize-slsqp.html

In [8]:
def run_inference_scripts(submission, weights=None, target_weights=None):
    for i, (script, oof_filename, output_filename, weight) in enumerate(model_list):
        print(f"Generating submission file from {script} ......")
        infer_start = time.time()
        !python {model_path}/{script}
        infer_elapsed = time.time() - infer_start
        print(f"Time spent on inference: {infer_elapsed/60:.2f} minutes.")

        model_submit = pd.read_csv(output_filename, engine='c')
        print(model_submit.head(5))
        print(model_submit.shape)

        if target_weights is not None:
            for j, target in enumerate(train_classes):
                print(f"Blending {script} for {target} with weight: {optimized_target_weights[j][i]} ......")
                submission.iloc[:, j+1] += model_submit.iloc[:, j+1] * optimized_target_weights[j][i]
        elif weights is None:
            print(f"Blending {script} with weight: {weight} ......")
            submission.iloc[:, 1:] += weight * model_submit.iloc[:, 1:]
        else:
            print(f"Blending {script} with weight: {weights[i]} ......")
            submission.iloc[:, 1:] += weights[i] * model_submit.iloc[:, 1:]

    return submission

In [9]:
total_start = time.time()

if not search_mode and run_submit_script:
    if method == "scipy_per_target":
        weights_path = glob.glob(f'{model_path}/{study_name}_*.pkl')[0]
        print(f"Loading target-wise optimized weights from {weights_path} ......")
        optimized_target_weights = load_pickle(weights_path)

        # For 206 target weights
        submission = run_inference_scripts(
            submission, target_weights=optimized_target_weights)
    else:
        submission = run_inference_scripts(submission)

elif search_mode and method == "CV":
    y_true = non_control_group_train_labels[train_classes].values

    all_oof = np.zeros(
        (len(model_list), non_control_group_train_labels.shape[0], 206))
    blend_oof = np.zeros((non_control_group_train_labels.shape[0], 206))
    print(all_oof.shape)
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        all_oof[i, :, :] = oof
        blend_oof += oof * weight

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")

    blend_oof_loss = mean_logloss(blend_oof, y_true)
    print(f"Blend OOF Validation Loss: {blend_oof_loss:.6f}\n")

elif search_mode and method == "optuna":
    print("[Optuna]")
    ## Search Best Blend Weights by Optuna ##
    model_oofs = []

    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        oof_loss = mean_logloss(
            oof, non_control_group_train_labels[train_classes].values)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")
        model_oofs.append(oof)

    def objective(trial):
        weights = []
        for i in range(len(model_list)):
            weights.append(trial.suggest_float(f"w{i}", 0, 1.0))

        blend = np.zeros(model_oofs[0].shape)
        for i in range(len(model_list)):
            blend += weights[i] * model_oofs[i]
        blend = np.clip(blend, 0, 1.0)

        loss = mean_logloss(
            blend, non_control_group_train_labels[train_classes].values)
        return loss

    pruner = optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=0,
        interval_steps=1,
    )
    sampler = optuna.samplers.TPESampler(seed=rand_seed)
    study = optuna.create_study(direction="minimize",
                                pruner=pruner,
                                sampler=sampler,
                                study_name=study_name,
                                storage=f'sqlite:///{study_name}.db',
                                load_if_exists=True)

    study.optimize(objective,
                   n_trials=n_trials,
                   timeout=None,
                   gc_after_trial=True,
                   n_jobs=-1)

    trial = study.best_trial

    if run_submit_script:
        optimal_weights = []
        for i, (script, oof_filename, output_filename,
                _) in enumerate(model_list):
            optimal_weights.append(trial.params[f"w{i}"])
        submission = run_inference_scripts(submission, weights=optimal_weights)

    print("\n[Optuna]")
    print("Number of finished trials: {}".format(len(study.trials)))
    print("Best trial:")
    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

elif search_mode and method == "scipy":
    print("[Scipy SLSQP]")
    # Optimise Blending Weights with Bonus
    # https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0/notebook
    model_oofs = []
    y_true = non_control_group_train_labels[train_classes].values

    all_oof = np.zeros(
        (len(model_list), non_control_group_train_labels.shape[0], 206))
    print(all_oof.shape)
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        all_oof[i, :, :] = oof

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")
        model_oofs.append(oof)

    tol = 1e-10
    init_guess = [1 / all_oof.shape[0]] * all_oof.shape[0]
    bnds = [(0, 1) for _ in range(all_oof.shape[0])]
    cons = {
        'type': 'eq',
        'fun': lambda x: np.sum(x) - 1,
        'jac': lambda x: [1] * len(x)
    }

    print('Inital Blend OOF:', func_numpy_metric(init_guess))

    start_time = time.time()
    res_scipy = minimize(
        fun=func_numpy_metric,
        x0=init_guess,
        method='SLSQP',
        # jac=grad_func_jit,  # grad_func
        bounds=bnds,
        constraints=cons,
        tol=tol)
    print("\n[Scipy SLSQP]")
    print(
        f'[{str(datetime.timedelta(seconds = time.time() - start_time))[2:7]}] Optimised Blend OOF:',
        res_scipy.fun)
    print(f'Optimised Weights: {res_scipy.x}\n')

    if run_submit_script:
        submission = run_inference_scripts(submission, weights=res_scipy.x)

# Target-wise Weight Optimization #

elif search_mode and method == "scipy_per_target":
    print("[Scipy SLSQP]")
    # Optimise Blending Weights with Bonus
    # https://www.kaggle.com/gogo827jz/optimise-blending-weights-with-bonus-0/notebook
    model_oofs = []
    y_true = non_control_group_train_labels[train_classes].values

    all_oof = np.zeros(
        (len(model_list), non_control_group_train_labels.shape[0], 206))
    print(all_oof.shape)
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        all_oof[i, :, :] = oof

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")
        model_oofs.append(oof)

    print("\n[Scipy SLSQP Per Target]")
    optimized_target_weights = []
    for i, target in enumerate(train_classes):
        tol = 1e-10
        init_guess = [1 / all_oof.shape[0]] * all_oof.shape[0]
        bnds = [(0, 1) for _ in range(all_oof.shape[0])]
        cons = {
            'type': 'eq',
            'fun': lambda x: np.sum(x) - 1,
            'jac': lambda x: [1] * len(x)
        }

        def func_numpy_metric_targes(weights):
            oof_blend = np.tensordot(weights,
                                     all_oof[:, :, i],
                                     axes=((0), (0)))
            return log_loss_numpy(oof_blend, y_true[:, i])

        start_time = time.time()
        res_scipy = minimize(
            fun=func_numpy_metric_targes,
            x0=init_guess,
            method='SLSQP',
            # jac=grad_func_jit,  # grad_func
            bounds=bnds,
            constraints=cons,
            tol=tol)

        print(
            f'[{str(datetime.timedelta(seconds = time.time() - start_time))[2:7]}] ' + \
            f'Optimised Blend OOF for {target}:', res_scipy.fun)
        print(f'Optimised Weights for {target}: {res_scipy.x}\n')
        optimized_target_weights.append(res_scipy.x)

    blend_targets_oof = np.zeros(
        (non_control_group_train_labels.shape[0], 206))
    for i, (script, oof_filename, output_filename,
            weight) in enumerate(model_list):
        print(f"Loading OOF from {oof_filename} ......")
        oof = np.load(f"{dataset_folder}/{oof_filename}")

        if oof.shape[0] == 23814:
            oof = oof[non_control_group_rows, :]

        for j in range(206):
            blend_targets_oof[:,
                              j] += oof[:, j] * optimized_target_weights[j][i]

        oof_loss = mean_logloss(oof, y_true)
        print(f"OOF Validation Loss of {script}: {oof_loss:.6f}\n")

    blend_targets_oof_loss = mean_logloss(blend_targets_oof, y_true)
    print(
        f"Blend Target-Wise OOF Validation Loss: {blend_targets_oof_loss:.6f}\n"
    )

    # Save optimized weights per target
    save_pickle(optimized_target_weights, model_path,
                f"{study_name}_{blend_targets_oof_loss}")

    if run_submit_script:
        # For 206 target weights
        submission = run_inference_scripts(
            submission, target_weights=optimized_target_weights)

Loading target-wise optimized weights from /workspace/Kaggle/MoA/blend_search_optuna_v6_per_target_0.015043750643197006.pkl ......
[array([7.31185043e-02, 0.00000000e+00, 9.26881496e-01, 9.32413868e-18]), array([3.40439482e-17, 3.31189054e-16, 3.77387396e-01, 6.22612604e-01]), array([0.00000000e+00, 2.20784731e-17, 3.68673280e-01, 6.31326720e-01]), array([0.28153496, 0.22105365, 0.10254067, 0.39487072]), array([0.24035698, 0.19081849, 0.097873  , 0.47095153]), array([4.98732999e-17, 6.14668784e-01, 0.00000000e+00, 3.85331216e-01]), array([0.21497833, 0.        , 0.41854842, 0.36647325]), array([0.39113975, 0.        , 0.23852339, 0.37033685]), array([0.23497636, 0.12310886, 0.        , 0.64191478]), array([0.75510405, 0.19338802, 0.03291862, 0.01858931]), array([0.40646662, 0.16906572, 0.22189454, 0.20257312]), array([0.46420062, 0.12281461, 0.06797296, 0.34501181]), array([1.00000000e+00, 0.00000000e+00, 1.38777878e-17, 5.55111512e-17]), array([0.        , 0.00332533, 0.42133996, 0.57

Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for histamine_receptor_agonist with weight: 4.016969049058794e-17 ......
Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for histamine_receptor_antagonist with weight: 0.22381218928690005 ......
Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for histone_lysine_demethylase_inhibitor with weight: 0.8852564981832582 ......
Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for histone_lysine_methyltransferase_inhibitor with weight: 0.07960369528444401 ......
Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for hiv_inhibitor with weight: 0.420774321778737 ......
Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for hmgcr_inhibitor with weight: 0.1487136052176428 ......
Blending ../../Github/kaggle_moa_team/scripts/2stageNN_with_ns_oldcv.py for hsp_inhibitor with weight: 0.4368396574101606 ......
Blending .

         sig_id  5-alpha_reductase_inhibitor  11-beta-hsd1_inhibitor  \
0  id_0004d9e33                          0.0                     0.0   
1  id_001897cda                          0.0                     0.0   
2  id_002429b5b                          0.0                     0.0   
3  id_00276f245                          0.0                     0.0   
4  id_0027f1083                          0.0                     0.0   

   acat_inhibitor  acetylcholine_receptor_agonist  \
0             0.0                             0.0   
1             0.0                             0.0   
2             0.0                             0.0   
3             0.0                             0.0   
4             0.0                             0.0   

   acetylcholine_receptor_antagonist  acetylcholinesterase_inhibitor  \
0                                0.0                             0.0   
1                                0.0                             0.0   
2                               

Blending ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py for monoacylglycerol_lipase_inhibitor with weight: 0.2960045910628982 ......
Blending ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py for monoamine_oxidase_inhibitor with weight: 0.18876721793867998 ......
Blending ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py for monopolar_spindle_1_kinase_inhibitor with weight: 0.5857484659359645 ......
Blending ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py for mtor_inhibitor with weight: 0.33801950002168935 ......
Blending ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py for mucolytic_agent with weight: 0.05393889002225008 ......
Blending ../../Github/kaggle_moa_team/scripts/deepinsight_efficientnet_lightning_v7_b3_infer.py for neuropeptide_receptor_antagonist with weight: 0.09699003292405743 ......

In [10]:
total_elapsed = time.time() - total_start
print(f"Total time spent: {total_elapsed/60:.2f} minutes.")

Total time spent: 0.01 minutes.


In [11]:
# [V6 - without TabNet, 2heads]
# Total time spent: 0.68 minutes.
# Blend Target-Wise OOF Validation Loss: 0.015044

In [12]:
# [V6 - without TabNet, 2heads]
# [Optuna]
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015171999561900233
#   Params: 
#     w0: 0.323528084383917
#     w1: 0.08786476491118465
#     w2: 0.21849845883367852
#     w3: 0.3704230222796271

# [Scipy SLSQP]
# [00:22] Optimised Blend OOF: 0.015172004593585666
# Optimised Weights: [0.32020133 0.09043987 0.22122948 0.36812932]

In [13]:
# [V6 - without TabNet]
# [Optuna]
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015172424601530761
#   Params: 
#     w0: 0.3138176484100186
#     w1: 0.07850519440561339
#     w2: 0.0007183363099561991
#     w3: 0.23849563017967007
#     w4: 0.3694870328388392

# [Scipy SLSQP]
# [00:21] Optimised Blend OOF: 0.015172004898867827
# Optimised Weights: [0.32045559 0.09026525 0.         0.22069638 0.36858278]

In [14]:
# [V6]
# [Optuna]
# Number of finished trials: 5000
# Best trial:
#   Value: 0.015173437622007157
#   Params: 
#     w0: 0.30923325055652684
#     w1: 0.09831493504786226
#     w2: 0.018966959973949222
#     w3: 0.19863369862866234
#     w4: 0.0013224625996093413
#     w5: 0.3728865483320761

# [Scipy SLSQP]
# [00:36] Optimised Blend OOF: 0.015172005464591968
# Optimised Weights: [3.20472642e-01 9.01191588e-02 1.78893358e-18 2.20448482e-01
#  3.27971157e-18 3.68959717e-01]

In [15]:
# [V5]
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015344701181290615
#   Params: 
#     w0: 0.5141433844379889
#     w1: 0.11747776562133813
#     w2: 0.3668324643717302

# [00:14] Optimised Blend OOF: 0.015344695215068541
# Optimised Weights: [0.51922623 0.11292509 0.36784869]

In [16]:
# [V4]
# [Optuna]
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015331901615194453
#   Params: 
#     w0: 0.4505928450756189
#     w1: 0.13010257032841785
#     w2: 0.06308933354044946
#     w3: 0.35639153615958885
#
# [Scipy]
# [00:23] Optimised Blend OOF: 0.015331777381591449
# Optimised Weights: [0.44090106 0.14508641 0.05945655 0.35455598]

In [17]:
# [V3]
# improving-mark-s-2-heads-model-infer
# Number of finished trials: 3000
# Best trial:
#   Value: 0.01515466145873492
#   Params: 
#     w0: 0.0002980690037490555
#     w1: 0.29771381784976886
#     w2: 0.1569191862042946
#     w3: 0.18156875605872544
#     w4: 0.36371774630338105

In [18]:
# [V3]
# fork-of-2heads-looper-super-puper-markpeng-infer
# Number of finished trials: 3000
# Best trial:
#   Value: 0.015170138066049686
#   Params: 
#     w0: 0.00019903389488299251
#     w1: 0.3853752127955825
#     w2: 0.015968332256452233
#     w3: 0.22945916769823432
#     w4: 0.3711290150522236

In [19]:
if search_mode and method == "scipy_per_target":
    # OOF scores per target
    target_oof_losses = []
    for i, target in enumerate(train_classes):
        print(target)
        # print(y_true[:, i])

        oof_loss = mean_logloss(blend_targets_oof[:, i], y_true[:, i])
        target_oof_losses.append(oof_loss)
        print(f"Blend OOF Validation Loss of {target}: {oof_loss:.6f}\n")

    target_loss_df = pd.DataFrame(
        data={
            "target": train_classes,
            "oof_logloss": target_oof_losses
        },
        columns=["target", "oof_logloss"
                 ]).sort_values(by="oof_logloss",
                                ascending=False).reset_index(drop=True)
    print(target_loss_df)

In [20]:
if run_submit_script:
    print(submission.shape)
    print(submission)
    submission.to_csv('submission.csv', index=False)

(3982, 207)
            sig_id  5-alpha_reductase_inhibitor  11-beta-hsd1_inhibitor  \
0     id_0004d9e33                          0.0                     0.0   
1     id_001897cda                          0.0                     0.0   
2     id_002429b5b                          0.0                     0.0   
3     id_00276f245                          0.0                     0.0   
4     id_0027f1083                          0.0                     0.0   
...            ...                          ...                     ...   
3977  id_ff7004b87                          0.0                     0.0   
3978  id_ff925dd0d                          0.0                     0.0   
3979  id_ffb710450                          0.0                     0.0   
3980  id_ffbb869f2                          0.0                     0.0   
3981  id_ffd5800b6                          0.0                     0.0   

      acat_inhibitor  acetylcholine_receptor_agonist  \
0                0.0           

## EOF

In [21]:
if kernel_mode:
    !rm ./*.py
    !ls -la