In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import warnings
warnings.filterwarnings(action='ignore') 

import numpy as np
import pandas as pd

- https://danilzherebtsov.medium.com/tune-threshold-explained-with-code-b8098049c9ce

In [2]:
labels = [0,0,0,0,0,0,0,0,1,1]
probas = np.array([0.11, 0.05, 0.12, 0.02, 0.07, 0.14, 0.16, 0.11, 0.21, 0.24])

# such probas will result in the predicted classes (with default 0.5 threshold) like below
preds = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [3]:
from sklearn.metrics import confusion_matrix, classification_report
y_true = labels
y_pred = preds
confusion_matrix(y_true, y_pred)
target_names = ['class 0', 'class 1']
print(classification_report(y_true, y_pred, target_names=target_names))

array([[8, 0],
       [2, 0]], dtype=int64)

              precision    recall  f1-score   support

     class 0       0.80      1.00      0.89         8
     class 1       0.00      0.00      0.00         2

    accuracy                           0.80        10
   macro avg       0.40      0.50      0.44        10
weighted avg       0.64      0.80      0.71        10



In [4]:
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score

print('Scoring based on default threshold')
print('-'*50)
print(f'Accuracy score:          {accuracy_score(labels, probas > 0.5)}')
print(f'Balanced accuracy score: {balanced_accuracy_score(labels, probas > 0.5)}')
print(f'F1 score:                {f1_score(labels, probas > 0.5)}')
print('-'*50)

Scoring based on default threshold
--------------------------------------------------
Accuracy score:          0.8
Balanced accuracy score: 0.5
F1 score:                0.0
--------------------------------------------------


In [5]:
print('Our predicted probabilities are:')
print(probas)
print('\n')
print('Find positive class with 0.5 threshold:')
print(probas>0.5)

Our predicted probabilities are:
[0.11 0.05 0.12 0.02 0.07 0.14 0.16 0.11 0.21 0.24]


Find positive class with 0.5 threshold:
[False False False False False False False False False False]


In [6]:
threshold = 0.17
print('Scoring based on tuned threshold')
print('-'*50)
print(f'Accuracy score:          {accuracy_score(labels, probas > threshold)}')
print(f'Balanced accuracy score: {balanced_accuracy_score(labels, probas > threshold)}')
print(f'F1 score:                {f1_score(labels, probas > threshold)}')
print('-'*50)

Scoring based on tuned threshold
--------------------------------------------------
Accuracy score:          1.0
Balanced accuracy score: 1.0
F1 score:                1.0
--------------------------------------------------


In [7]:
from collections import Counter

correct_threshold = 0.17
incorrect_threshold = 0.12

print('Correct threshold application')
print(f'Labels fraction_of_1:      {Counter(labels)[1] / len(labels)*100}%')
print(f'Predictions fraction_of_1: {Counter(probas > correct_threshold)[1] / len(probas)*100}%')
print('-'*30)
print('Incorrect threshold application')
print(f'Labels fraction_of_1:      {Counter(labels)[1] / len(labels)*100}%')
print(f'Predictions fraction_of_1: {Counter(probas > incorrect_threshold)[1] / len(probas)*100}% <-')

Correct threshold application
Labels fraction_of_1:      20.0%
Predictions fraction_of_1: 20.0%
------------------------------
Incorrect threshold application
Labels fraction_of_1:      20.0%
Predictions fraction_of_1: 40.0% <-


In [14]:
preds_012 = (probas > 0.12)*1
preds_012

array([0, 0, 0, 0, 0, 1, 1, 0, 1, 1])

In [8]:
preds_017 = (probas > 0.17)*1
confusion_matrix(y_true, preds_017)
print(classification_report(y_true, preds_017, target_names=target_names))

array([[8, 0],
       [0, 2]], dtype=int64)

              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00         8
     class 1       1.00      1.00      1.00         2

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [9]:
from verstack import ThreshTuner
labels
thresh = ThreshTuner(n_thresholds = 20)
thresh.fit(labels, probas)

[0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

ThreshTuner(n_threshols = 20,            
            min_threshold = None,                
            max_threshold = None,                    
            labels_fraction_of_1 = None,                        
            loss_func = None

                   Best threshold(s)
-------------------------------------------------------
 threshold  balanced_accuracy_score  fraction_of_1
  0.160000                      1.0            0.2
  0.202105                      1.0            0.2
-------------------------------------------------------


In [11]:
preds_016 = (probas > 0.160000)*1
confusion_matrix(y_true, preds_016)
print(classification_report(y_true, preds_016, target_names=target_names))

array([[8, 0],
       [0, 2]], dtype=int64)

              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00         8
     class 1       1.00      1.00      1.00         2

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [12]:
preds_017 = (probas > 0.202105)*1
confusion_matrix(y_true, preds_017)
print(classification_report(y_true, preds_017, target_names=target_names))

array([[8, 0],
       [0, 2]], dtype=int64)

              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00         8
     class 1       1.00      1.00      1.00         2

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [13]:
from sklearn.metrics import balanced_accuracy_score
y_true = [0, 1, 0, 0, 1, 0]
y_pred = [0, 1, 0, 0, 0, 1]
balanced_accuracy_score(y_true, y_pred)
accuracy_score(y_true, y_pred)

0.625

0.6666666666666666

### End