# <span style="font-family: Arial, sans-serif; color:#97f788">Fisher Scoring</span>
## <span style="font-family: Arial, sans-serif; color:navyblue">Example with Focal Loss Logistic Regression</span>

<span style="font-family: Arial, sans-serif; color:navyblue">Repo: <a href="https://github.com/xRiskLab/fisher-scoring" title="GitHub link">https://github.com/xRiskLab/fisher-scoring</a></span>

In [1]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from fisher_scoring import FisherScoringFocalRegression
from fisher_scoring import FisherScoringLogisticRegression

# Set the prevalence of the positive class
prevalence = 0.01

X, y = make_classification(
    n_samples=7_000,
    n_features=10,
    n_informative=6,
    n_redundant=0,
    n_classes=2,
    weights=[1 - prevalence, prevalence],
    random_state=42
)

X, y = pd.DataFrame(X), pd.Series(y)
X.columns = [f'feature_{i}' for i in range(X.shape[1])]

ix_train, ix_test = train_test_split(X.index, test_size=0.3, random_state=42)

model_logistic = FisherScoringLogisticRegression(max_iter=100, information='observed')
model_logistic.fit(X.loc[ix_train], y.loc[ix_train])

probas_logistic = model_logistic.predict_proba(X.loc[ix_test])[:, 1]
gini_logistic = 2 * roc_auc_score(y.loc[ix_test], probas_logistic) - 1
print(f'Logistic regression Gini: {gini_logistic:.4f}')

model_focal = FisherScoringFocalRegression(gamma=2.0, max_iter=100, information='expected')
model_focal.fit(X.loc[ix_train], y.loc[ix_train])
probas_focal = model_focal.predict_proba(X.loc[ix_test])[:, 1]
gini_focal = 2 * roc_auc_score(y.loc[ix_test], probas_focal) - 1
print(f'Focal regression Gini: {gini_focal:.4f}')

Maximum iterations reached without convergence.
Logistic regression Gini: 0.5935
Convergence reached after 30 iterations.
Focal regression Gini: 0.6208


In [2]:
model_focal_obs = FisherScoringFocalRegression(
    gamma=2.0, max_iter=300, information='observed'
)
model_focal_obs.fit(X.loc[ix_train], y.loc[ix_train])
probas_focal = model_focal_obs.predict_proba(X.loc[ix_test])[:, 1]
gini_focal = 2 * roc_auc_score(y.loc[ix_test], probas_focal) - 1
print(f'Focal regression Gini: {gini_focal:.4f}')

Convergence reached after 47 iterations.
Focal regression Gini: 0.6208


In [3]:
import pandas as pd

betas = pd.DataFrame(
    {
        'Expected Fisher': model_focal.beta.ravel(),
        'Observed Fisher': model_focal_obs.beta.ravel(),
    }
)
print(betas)

    Expected Fisher  Observed Fisher
0         -3.945837        -3.945837
1         -0.123839        -0.123839
2          0.041933         0.041933
3         -0.427300        -0.427300
4         -0.769091        -0.769091
5          0.655187         0.655187
6          0.401045         0.401045
7         -0.247576        -0.247576
8         -0.079818        -0.079818
9          0.190831         0.190831
10         0.875582         0.875582
