In [1]:
import json
from datetime import datetime
from pprint import pprint

from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import make_pipeline
from tqdm.notebook import tqdm

from lazy_fca_sklearn import MyBinarizedBinaryClassifier

In [2]:
from heart import *
from hotel import *
from airline import *

data_and_transformers = [
    ('heart', load_heart(), transformers_heart()),
    ('airline', load_airline(), transformers_airline()),
    ('hotel', load_hotel(), transformers_hotel())
]

In [3]:
model = MyBinarizedBinaryClassifier(method='standard', alpha=0.)

In [4]:
results = []

for data_name, data, transformers in tqdm(data_and_transformers):
    pipeline = make_pipeline(*transformers, model)

    X_train, X_test, y_train, y_test = data
    pipeline.fit(X=X_train, y=y_train)
    y_pred = pipeline.predict(X_test)
    display(y_pred)

    res = {
        'model': 'BinarizedBinaryClassifier',
        'dataset': data_name,
        'f1': f1_score(y_test, y_pred),
        'accuracy': accuracy_score(y_test, y_pred)
    }
    display(pd.DataFrame([res]))
    results.append(res)

  0%|          | 0/3 [00:00<?, ?it/s]

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

Unnamed: 0,model,dataset,f1,accuracy
0,BinarizedBinaryClassifier,heart,0.0,0.855


array([False, False, False,  True, False, False,  True,  True,  True,
       False, False, False,  True, False,  True, False,  True, False,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False,  True,  True,
        True, False, False, False,  True,  True, False, False,  True,
        True,  True, False, False, False, False,  True, False,  True,
        True, False, False,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True, False,  True, False,  True, False,
       False, False, False, False, False, False, False,  True, False,
        True,  True, False,  True, False, False,  True, False,  True,
        True, False,  True,  True, False, False, False, False, False,
       False, False,  True, False, False,  True,  True, False,  True,
        True,  True,  True, False, False, False,  True,  True, False,
        True,  True, False, False,  True,  True,  True,  True,  True,
        True,  True,

Unnamed: 0,model,dataset,f1,accuracy
0,BinarizedBinaryClassifier,airline,0.619565,0.65


array([False,  True,  True,  True, False,  True,  True, False, False,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True, False,  True,  True,  True,  True, False,  True,  True,
       False,  True,  True,  True,  True, False,  True, False,  True,
       False,  True,  True,  True, False,  True,  True,  True, False,
       False,  True,  True,  True,  True, False, False, False,  True,
        True, False,  True,  True, False,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
       False,  True, False,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True, False,
       False, False,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,

Unnamed: 0,model,dataset,f1,accuracy
0,BinarizedBinaryClassifier,hotel,0.533937,0.485


In [5]:
df_results = pd.DataFrame(results).pivot(
    index='model',
    columns='dataset',
    values=['f1', 'accuracy'],
).swaplevel(axis=1).sort_index(axis=1)
df_results.style.highlight_max().format(precision=3)

dataset,airline,airline,heart,heart,hotel,hotel
Unnamed: 0_level_1,accuracy,f1,accuracy,f1,accuracy,f1
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
BinarizedBinaryClassifier,0.65,0.62,0.855,0.0,0.485,0.534
