In [1]:
import numpy as np
import pandas as pd

In [57]:
df = pd.read_csv("./data/diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [46]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import KBinsDiscretizer

features = ['Pregnancies', 'Glucose', 'BloodPressure', 'Insulin', "BMI", 'Age']
y_feat = 'remainder__Outcome'
preprocessing_pipeline = make_column_transformer(
    (KBinsDiscretizer(encode="onehot", n_bins=3), features),
    remainder="passthrough",
)



In [52]:
from sklearn.model_selection import KFold
from tqdm.auto import tqdm

from neural_fca import NeuralFCA

kf = KFold(n_splits=5, random_state=42)
cv_f1_scores = []

for train, test in tqdm(kf.split(df)):
    df_train = df.iloc[train, :]
    df_test= df.iloc[test, :]

    preprocessing_pipeline.fit(df_train)
    df_train_bin = pd.DataFrame(
        preprocessing_pipeline.transform(df_train), 
        columns=preprocessing_pipeline.get_feature_names_out()
    ).astype("bool")
    df_test_bin = pd.DataFrame(
        preprocessing_pipeline.transform(df_test), 
        columns=preprocessing_pipeline.get_feature_names_out()
    ).astype("bool")

    X_train, y_train = df_train_bin.drop(y_feat, axis=1), df_train_bin[y_feat]
    X_test, y_test = df_test_bin.drop(y_feat, axis=1), df_test_bin[y_feat]

    X_train.index = X_train.index.map(str)
    X_test.index = X_test.index.map(str)

    fca = NeuralFCA().fit(X_train, y_train)
    cv_f1_scores.append(fca.score(X_test, y_test))

0it [00:00, ?it/s]



In [53]:
cv_f1_scores

[0.0, 0.0, 0.18181818181818182, 0.0, 0.0]

In [54]:
np.mean(cv_f1_scores)

0.03636363636363636