In [13]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, make_scorer, classification_report
from sklearn.metrics import precision_recall_curve
import numpy as np
import pandas as pd
import catboost as cb

In [8]:
X, y = make_classification(
    n_samples=5000, # 1000 observations 
    n_features=500, # 5 total features
    n_informative=100, # 3 'useful' features
    n_classes=2, # binary target/label 
    random_state=999 # if you want the same results as mine
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [9]:
y_train = np.where(y_train == 1, "yes", "no")
y_test = np.where(y_test == 1, "yes", "no")

In [11]:
clf = cb.CatBoostClassifier(verbose=False)
clf.fit(X_train, y_train)
y_pred = clf2.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          no       0.92      0.91      0.91       639
         yes       0.91      0.91      0.91       611

    accuracy                           0.91      1250
   macro avg       0.91      0.91      0.91      1250
weighted avg       0.91      0.91      0.91      1250



In [23]:
clf.classes_

array(['no', 'yes'], dtype=object)

In [28]:
# Precision and recall stats
precision, recall, thresholds = precision_recall_curve(
    y_test, y_pred_proba[:, 1], pos_label="yes")

# Print summary of precision and recall table
# TODO: Add mean return stats to pr_summary_df output
pr_summary = np.column_stack([precision, recall, np.append(thresholds, [1])])
pr_summary_df = pd.DataFrame(pr_summary, columns=["precision", "recall", "thresholds"])
bins = np.linspace(start=0, stop=1, num=11)
pr_agg_df = pr_summary_df.groupby(pd.cut(pr_summary_df["thresholds"], bins=bins)).mean()


In [31]:
precision

array([0.4888    , 0.48919135, 0.48958333, ..., 1.        , 1.        ,
       1.        ])

In [29]:
pr_agg_df

Unnamed: 0_level_0,precision,recall,thresholds
thresholds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(0.0, 0.1]",0.52681,0.997254,0.062224
"(0.1, 0.2]",0.62328,0.995495,0.14675
"(0.2, 0.3]",0.724658,0.98717,0.241413
"(0.3, 0.4]",0.802897,0.971078,0.347143
"(0.4, 0.5]",0.872264,0.939253,0.450934
"(0.5, 0.6]",0.926979,0.8834,0.545436
"(0.6, 0.7]",0.957841,0.804828,0.648933
"(0.7, 0.8]",0.978567,0.665988,0.750584
"(0.8, 0.9]",0.984675,0.432516,0.852074
"(0.9, 1.0]",0.985875,0.147112,0.941111


In [20]:
clf.classes_[0]

'no'

In [39]:
df = pd.DataFrame(X_test)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
0,-0.355573,-0.458297,-1.035269,-0.177795,-0.749322,-1.181323,-1.355402,1.126380,-1.378679,-4.305476,...,0.827518,6.571883,1.243823,1.226176,0.684980,0.265731,1.701003,-1.343898,-1.095624,0.333656
1,0.485297,-0.515670,1.600254,0.839812,-0.006350,-0.594706,-0.166324,-8.029558,-1.026229,1.988234,...,-0.670187,-3.312554,-1.139846,17.818009,1.605023,1.230285,-0.346698,-0.868879,0.436182,-0.674635
2,-0.511837,-1.868593,0.289801,-0.833455,-0.456358,0.971671,1.797679,1.562242,-0.725514,0.150018,...,0.258333,-4.651060,1.134019,5.157332,-1.068673,-1.833821,-0.639997,1.378108,-2.044283,-0.320976
3,-0.282293,-1.229541,-0.259932,0.226602,-0.491301,0.194417,1.526994,-2.483125,0.203651,8.850202,...,-1.237824,5.734414,-0.330963,-3.528918,-0.457661,-1.817994,0.388188,-1.332696,-0.461341,-0.951173
4,0.346085,0.110633,0.634873,-0.081138,1.814812,0.598574,-0.825893,3.156038,-0.127343,0.788724,...,0.636850,-3.597928,-2.597331,17.123569,-1.758326,-0.042311,1.223688,-1.090028,0.482071,-0.777810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1245,1.349706,0.370926,-0.684842,1.123948,1.335429,1.380114,0.443519,-1.661382,0.033419,4.018084,...,1.451757,5.006709,-1.364938,3.730562,1.091960,0.396796,-0.486305,0.225228,0.455587,-0.218366
1246,1.362625,-0.184110,0.629713,0.163028,-0.506726,0.579085,0.438312,-13.578304,0.781468,4.915427,...,-1.210319,-7.963894,0.928427,-5.962564,-0.539264,1.484899,-0.657201,-1.698135,0.518028,-2.752908
1247,-1.059802,0.688868,0.354718,-1.287415,0.396342,0.073631,0.596438,-10.823136,-1.120208,-6.164666,...,0.581850,-8.739441,-2.103106,3.599570,0.204673,0.950039,0.670582,-0.792710,0.003410,0.183034
1248,1.000496,-0.255267,-0.645738,0.781451,-0.309673,-1.213606,0.691961,-16.782067,-0.999900,7.661814,...,-0.479165,-8.638740,0.222073,7.195101,0.677879,1.444321,-2.487345,-0.065866,0.082317,-0.581106


In [51]:
r = np.random.random(len(df))
df["rand"] = r
df["rand"]

0       0.912750
1       0.494447
2       0.170063
3       0.246552
4       0.809981
          ...   
1245    0.940804
1246    0.724119
1247    0.431086
1248    0.787743
1249    0.311875
Name: rand, Length: 1250, dtype: float64

In [54]:
bins = np.linspace(start=0, stop=1, num=11)
ret_summary_df = df["rand"].groupby(pd.cut(df["rand"], bins=bins)).mean()
ret_summary_df

rand
(0.0, 0.1]    0.056422
(0.1, 0.2]    0.152643
(0.2, 0.3]    0.248772
(0.3, 0.4]    0.348482
(0.4, 0.5]    0.450150
(0.5, 0.6]    0.545912
(0.6, 0.7]    0.650791
(0.7, 0.8]    0.748016
(0.8, 0.9]    0.850684
(0.9, 1.0]    0.948776
Name: rand, dtype: float64

In [60]:
pd.concat([pr_agg_df, ret_summary_df], axis=1)

Unnamed: 0,precision,recall,thresholds,rand
"(0.0, 0.1]",0.52681,0.997254,0.062224,0.056422
"(0.1, 0.2]",0.62328,0.995495,0.14675,0.152643
"(0.2, 0.3]",0.724658,0.98717,0.241413,0.248772
"(0.3, 0.4]",0.802897,0.971078,0.347143,0.348482
"(0.4, 0.5]",0.872264,0.939253,0.450934,0.45015
"(0.5, 0.6]",0.926979,0.8834,0.545436,0.545912
"(0.6, 0.7]",0.957841,0.804828,0.648933,0.650791
"(0.7, 0.8]",0.978567,0.665988,0.750584,0.748016
"(0.8, 0.9]",0.984675,0.432516,0.852074,0.850684
"(0.9, 1.0]",0.985875,0.147112,0.941111,0.948776


In [64]:
ret_summary_df.shift(-2)

rand
(0.0, 0.1]    0.248772
(0.1, 0.2]    0.348482
(0.2, 0.3]    0.450150
(0.3, 0.4]    0.545912
(0.4, 0.5]    0.650791
(0.5, 0.6]    0.748016
(0.6, 0.7]    0.850684
(0.7, 0.8]    0.948776
(0.8, 0.9]         NaN
(0.9, 1.0]         NaN
Name: rand, dtype: float64

In [73]:
df["rand"].groupby(pd.cut(df["rand"], bins=bins)).agg([np.mean, np.max, np.min])

Unnamed: 0_level_0,mean,amax,amin
rand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(0.0, 0.1]",0.056422,0.099448,0.000955
"(0.1, 0.2]",0.152643,0.199962,0.100184
"(0.2, 0.3]",0.248772,0.298443,0.201612
"(0.3, 0.4]",0.348482,0.399292,0.300568
"(0.4, 0.5]",0.45015,0.49987,0.40014
"(0.5, 0.6]",0.545912,0.599738,0.500394
"(0.6, 0.7]",0.650791,0.699664,0.600191
"(0.7, 0.8]",0.748016,0.799127,0.700548
"(0.8, 0.9]",0.850684,0.898963,0.8007
"(0.9, 1.0]",0.948776,0.999265,0.901006


In [79]:
df["rand"].groupby(pd.cut(df["rand"], bins=bins)).agg('describe')

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
rand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(0.0, 0.1]",130.0,0.056422,0.030243,0.000955,0.027834,0.058589,0.083148,0.099448
"(0.1, 0.2]",122.0,0.152643,0.02909,0.100184,0.128436,0.152995,0.1789,0.199962
"(0.2, 0.3]",124.0,0.248772,0.02835,0.201612,0.223167,0.251444,0.272897,0.298443
"(0.3, 0.4]",118.0,0.348482,0.030123,0.300568,0.323042,0.347178,0.378242,0.399292
"(0.4, 0.5]",123.0,0.45015,0.02981,0.40014,0.424799,0.448601,0.47515,0.49987
"(0.5, 0.6]",132.0,0.545912,0.029763,0.500394,0.518849,0.543099,0.57301,0.599738
"(0.6, 0.7]",119.0,0.650791,0.02921,0.600191,0.626372,0.646629,0.676389,0.699664
"(0.7, 0.8]",143.0,0.748016,0.02962,0.700548,0.720174,0.746834,0.774514,0.799127
"(0.8, 0.9]",122.0,0.850684,0.029719,0.8007,0.824123,0.849693,0.880161,0.898963
"(0.9, 1.0]",117.0,0.948776,0.029021,0.901006,0.921977,0.952539,0.974923,0.999265


In [76]:
import sklearn
sklearn.metrics.get_scorer_names()

['accuracy',
 'adjusted_mutual_info_score',
 'adjusted_rand_score',
 'average_precision',
 'balanced_accuracy',
 'completeness_score',
 'explained_variance',
 'f1',
 'f1_macro',
 'f1_micro',
 'f1_samples',
 'f1_weighted',
 'fowlkes_mallows_score',
 'homogeneity_score',
 'jaccard',
 'jaccard_macro',
 'jaccard_micro',
 'jaccard_samples',
 'jaccard_weighted',
 'matthews_corrcoef',
 'max_error',
 'mutual_info_score',
 'neg_brier_score',
 'neg_log_loss',
 'neg_mean_absolute_error',
 'neg_mean_absolute_percentage_error',
 'neg_mean_gamma_deviance',
 'neg_mean_poisson_deviance',
 'neg_mean_squared_error',
 'neg_mean_squared_log_error',
 'neg_median_absolute_error',
 'neg_root_mean_squared_error',
 'normalized_mutual_info_score',
 'precision',
 'precision_macro',
 'precision_micro',
 'precision_samples',
 'precision_weighted',
 'r2',
 'rand_score',
 'recall',
 'recall_macro',
 'recall_micro',
 'recall_samples',
 'recall_weighted',
 'roc_auc',
 'roc_auc_ovo',
 'roc_auc_ovo_weighted',
 'roc_auc_