# <span style="font-family: Arial, sans-serif; color:#97f788">Fisher Scoring</span>
## <span style="font-family: Arial, sans-serif; color:navyblue">Example with Multinomial Logistic Regression</span>

<span style="font-family: Arial, sans-serif; color:navyblue">Repo: <a href="https://github.com/xRiskLab/fisher-scoring" title="GitHub link">https://github.com/xRiskLab/fisher-scoring</a></span>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_multilabel_classification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from fisher_scoring import FisherScoringMultinomialRegression

from faker import Faker
Faker.seed(0)

# Generate a multilabel dataset
X, y = make_multilabel_classification(n_samples=1000, n_features=5, n_classes=4, n_labels=2, random_state=42)
X, y = pd.DataFrame(X), pd.DataFrame(y)
X.columns = [Faker().word() for _ in range(X.shape[1])]

# encode y as label encoded
y = y.idxmax(axis=1)

# Split the dataset into train and test sets
ix_train, ix_test = train_test_split(range(X.shape[0]), test_size=0.2, random_state=42)

In [3]:
from sklearn.metrics import classification_report

model = FisherScoringMultinomialRegression(
    use_bias=True, 
    verbose=0, 
    epsilon=1e-3,
    max_iter=100, 
    information="observed"
)

model.fit(X.loc[ix_train], y.loc[ix_train])
probas = model.predict_proba(X.loc[ix_test])
preds = model.predict(X.loc[ix_test])
print(classification_report(y.loc[ix_test], preds))

              precision    recall  f1-score   support

           0       0.77      0.76      0.77        85
           1       0.78      0.84      0.81        77
           2       0.85      0.74      0.79        23
           3       0.92      0.80      0.86        15

    accuracy                           0.80       200
   macro avg       0.83      0.79      0.81       200
weighted avg       0.80      0.80      0.79       200



In [4]:
from sklearn.linear_model import LogisticRegression

sklearn_model = LogisticRegression(max_iter=1000, solver='newton-cg')
sklearn_model.fit(X.loc[ix_train], y.loc[ix_train])
sklearn_preds = sklearn_model.predict(X.loc[ix_test])
print(classification_report(y.loc[ix_test], sklearn_preds))

              precision    recall  f1-score   support

           0       0.76      0.76      0.76        85
           1       0.78      0.84      0.81        77
           2       0.85      0.74      0.79        23
           3       0.92      0.73      0.81        15

    accuracy                           0.79       200
   macro avg       0.83      0.77      0.80       200
weighted avg       0.79      0.79      0.79       200

