In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from scipy.optimize import minimize
from sklearn.model_selection import KFold

In [2]:
rand = np.random.RandomState(seed=71)
train_y_prob = np.linspace(0, 1, 100000)

In [3]:
train_y = pd.Series(rand.uniform(0, 1, train_y_prob.size) < train_y_prob)
train_pred_prob = np.clip(train_y_prob * np.exp(rand.standard_normal(train_y_prob.shape) * 0.3), 0, 1)

In [4]:
thresholds = []
scores_tr = []
scores_va = []

In [5]:
kf = KFold(n_splits=4, random_state=71, shuffle=True)
for i, (tr_idx, va_idx) in enumerate(kf.split(train_pred_prob)):
    tr_pred_prob, va_pred_prob = train_pred_prob[tr_idx], train_pred_prob[va_idx]
    tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]
    
    def f1_opt(x):
        return -f1_score(tr_y, tr_pred_prob >= x)
    
    result = minimize(f1_opt, x0=np.array([0.5]), method="Nelder-Mead")
    threshold = result['x'].item()
    score_tr = f1_score(tr_y, tr_pred_prob >= threshold)
    score_va = f1_score(va_y, va_pred_prob >= threshold)
    
    thresholds.append(threshold)
    scores_tr.append(score_tr)
    scores_va.append(score_va)

In [6]:
thresold_test = np.mean(thresholds)
print(thresold_test)

0.32277832031249987
