$$ 0 \leq BS = \frac{1}{N} \sum_{t=1}^{N} ( f_t - o_t)^2 \leq 1 $$

其中$ f_t $是事件$ t $预测的概率,$ o_t $是事件$ t $(必须为二分类)真实发生的概率

In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import brier_score_loss
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB



In [2]:
X, y = datasets.make_classification(n_samples=10000, n_features=20, n_classes=2,
                                    n_informative=2, n_redundant=2)  # 10特征2分类数据集
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [3]:
lg = LogisticRegression(solver='lbfgs', max_iter=10000)
lg.fit(X_train, y_train)
proba = lg.predict_proba(X_test)
proba.shape

(2500, 2)

In [4]:
brier_score_loss(y_test,  # 真实标签
                 proba[:, 0],  # ★★★★★Probabilities of the positive class.
                 pos_label=0)  # Label of the positive class.pos_label默认为1

0.058065419331728416

In [5]:
svc = SVC(probability=True)
svc.fit(X_train, y_train)

In [6]:
positive_probal = (svc.decision_function(X_test) - np.min(svc.decision_function(X_test))) / (np.max(svc.decision_function(X_test)) - np.min(svc.decision_function(X_test)))
positive_probal  # 进行放缩使之类比于概率

array([0.40583391, 0.7085993 , 0.55049014, ..., 0.34781325, 0.11373602,
       0.77653234])

In [7]:
brier_score_loss(y_test, positive_probal, pos_label=1)

0.10448421405340484

In [8]:
positive_probal1 = svc.predict_proba(X_test)
positive_probal1

array([[9.63776221e-01, 3.62237792e-02],
       [5.11131110e-02, 9.48886889e-01],
       [5.82535894e-01, 4.17464106e-01],
       ...,
       [9.88965936e-01, 1.10340637e-02],
       [9.99910767e-01, 8.92328974e-05],
       [1.31455797e-02, 9.86854420e-01]])

In [9]:
brier_score_loss(y_test, positive_probal1[:, 1], pos_label=1)

0.03521240170583413

In [10]:
navie = GaussianNB()
navie.fit(X_train, y_train)

In [11]:
brier_score_loss(y_test, navie.predict_proba(X_test)[:, 1], pos_label=1)

0.07225923393430231