# Description

### This code will show you a way to search for the best threshold. You can use this method searching for a better threshold instead of using 0.5 as final threshold.

### You can find the training code from <br/> https://www.kaggle.com/a763337092/neural-network-starter-pytorch-version/comments and <br/> https://www.kaggle.com/a763337092/pytorch-resnet-starter-training

## Upvoting if it helps🔥🔥🔥

# Load models

In [1]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import tensorflow_addons as tfa

import numpy as np
import pandas as pd
import datatable as dt
from tqdm import tqdm
from random import choices
import gc

SEED = 1111
np.random.seed(SEED)

In [2]:
# fit
def create_mlp(num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)
    
    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tfa.optimizers.RectifiedAdam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=tf.keras.metrics.AUC(name="AUC"),
    )

    return model

In [3]:
features = [f'feature_{i}' for i in range(130)]
hidden_units = [160, 160, 160]
dropout_rates = [0.2, 0.2, 0.2, 0.2]
label_smoothing = 1e-2
learning_rate = 1e-3
tf.keras.backend.clear_session()
tf.random.set_seed(SEED)
clf = create_mlp(
    len(features), 5, hidden_units, dropout_rates, label_smoothing, learning_rate
    )

In [4]:
clf.load_weights('tf_model.h5')

In [15]:
model_list = [clf]

# Get offline predictions

For I use the last 50 date data as my offline validation data, so I use the 5 models‘ average prediction on last 50 date data to search for the best threshold.

In [13]:
# train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
train = dt.fread('/kaggle/working/input/train.csv').to_pandas()
valid = train.loc[(train.date >= 450) & (train.date < 500)].reset_index(drop=True)
valid.fillna(train.mean(), inplace=True)
target_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']
del train

In [14]:
X_valid = valid.loc[:, features]
y_valid = np.stack([(valid[c] > 0).astype('int') for c in target_cols]).T
gc.collect()

2158

In [16]:
valid_pred = np.zeros((len(valid), len(target_cols)))
for model in model_list:
    valid_pred += model(X_valid.values).numpy() / len(model_list)
#     valid_pred += inference_fn(model, valid_loader, device) / len(model_list)

In [17]:
valid_pred = np.median(valid_pred, axis=1)
valid_pred.shape

(278315,)

# Searching for best threshold

In [18]:
def utility_score_bincount(date, weight, resp, action):
    count_i = len(np.unique(date))
    # print('weight: ', weight)
    # print('resp: ', resp)
    # print('action: ', action)
    # print('weight * resp * action: ', weight * resp * action)
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

In [19]:
best_threshold, best_u_score = 0.5, 0
for i in range(4500, 5500):
    thres = float(i) / 10000
    slice_valid_pred = valid_pred.copy()
    slice_valid_pred = np.where(slice_valid_pred >= thres, 1, 0).astype(int)
    valid_u_score = utility_score_bincount(date=valid.date.values, weight=valid.weight.values,
                                           resp=valid.resp.values, action=slice_valid_pred)
    print(f'thresold={thres:.4f}, valid_u_score={valid_u_score:.4f}')
    
    if valid_u_score >= best_u_score:
        best_u_score = valid_u_score
        best_threshold = thres

thresold=0.4500, valid_u_score=3680.7511
thresold=0.4501, valid_u_score=3680.1077
thresold=0.4502, valid_u_score=3684.8009
thresold=0.4503, valid_u_score=3682.1079
thresold=0.4504, valid_u_score=3688.3201
thresold=0.4505, valid_u_score=3700.9308
thresold=0.4506, valid_u_score=3701.5615
thresold=0.4507, valid_u_score=3704.8463
thresold=0.4508, valid_u_score=3717.4528
thresold=0.4509, valid_u_score=3723.0871
thresold=0.4510, valid_u_score=3720.2174
thresold=0.4511, valid_u_score=3720.4855
thresold=0.4512, valid_u_score=3723.2988
thresold=0.4513, valid_u_score=3724.9619
thresold=0.4514, valid_u_score=3722.9869
thresold=0.4515, valid_u_score=3729.3038
thresold=0.4516, valid_u_score=3729.6909
thresold=0.4517, valid_u_score=3725.1121
thresold=0.4518, valid_u_score=3729.2407
thresold=0.4519, valid_u_score=3728.0242
thresold=0.4520, valid_u_score=3736.4681
thresold=0.4521, valid_u_score=3741.4285
thresold=0.4522, valid_u_score=3747.3317
thresold=0.4523, valid_u_score=3748.4450
thresold=0.4524,

In [20]:
print(f'Best thresold={best_threshold:.4f}, best valid u score={best_u_score:.4f}')

Best thresold=0.4949, best valid u score=5032.3293


# Predict with best threshold

In [12]:
if True:
    import janestreet
    env = janestreet.make_env()
    env_iter = env.iter_test()

    for (test_df, pred_df) in tqdm(env_iter):
        if test_df['weight'].item() > 0:
            x_tt = test_df.loc[:, feat_cols].values
            if np.isnan(x_tt.sum()):
                x_tt = np.nan_to_num(x_tt) + np.isnan(x_tt) * f_mean

            cross_41_42_43 = x_tt[:, 41] + x_tt[:, 42] + x_tt[:, 43]
            cross_1_2 = x_tt[:, 1] / (x_tt[:, 2] + 1e-5)
            feature_inp = np.concatenate((
                x_tt,
                np.array(cross_41_42_43).reshape(x_tt.shape[0], 1),
                np.array(cross_1_2).reshape(x_tt.shape[0], 1),
            ), axis=1)

            pred = np.zeros((1, len(target_cols)))
            for model in model_list:
                pred += model(torch.tensor(feature_inp, dtype=torch.float).to(device)).sigmoid().detach().cpu().numpy() / NFOLDS
            pred = np.median(pred)
            pred_df.action = np.where(pred >= best_threshold, 1, 0).astype(int)
        else:
            pred_df.action = 0
        env.predict(pred_df)

15219it [03:44, 67.64it/s] 
