In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from model.utils.metrics import *
from model.utils.dataloader import dataloader
from model.LFR import LFR
import matplotlib.pyplot as plt
import numpy as np
import random

random.seed(42)
np.random.seed(42)


if __name__=='__main__':
    sensitive_feature = 'Gender'
    # DF ='german' #or 'adult'
    DF ='STUDENT'
    data = dataloader(DF, sensitive_feature =sensitive_feature) # else adult
    dataset, target, numvars, categorical = data
    # Split data into train and test
    x_train, x_test, y_train, y_test = train_test_split(dataset,
                                                        target,
                                                        test_size=0.1,
                                                        random_state=42,
                                                        stratify=target)
    classification = target.columns.to_list()
    classification.remove(sensitive_feature)
    classification = classification[0]
    # We create the preprocessing pipelines for both numeric and categorical data.
    numeric_transformer = Pipeline(
        steps=[('scaler', StandardScaler())])

    categorical_transformer = Pipeline(
        steps=[('onehot', OneHotEncoder(handle_unknown='ignore',sparse=False))])

    transformations = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numvars),
            ('cat', categorical_transformer, categorical)])

    pipeline = Pipeline(steps=[('preprocessor', transformations)])
    dict_all_result_in_grid = {}
    x_train = pipeline.fit_transform(x_train)
    parameters = {'k': 10, 'Ax': 0.001, 'Ay': 0.1, 'Az': 10.0, 'max_iter': 150000, 'max_fun': 150000}

    lfr = LFR(sensitive_feature=sensitive_feature, privileged_class=1, unprivileged_class=0, seed=42,
              output_feature=classification, parameter=parameters)
    lfr.fit(X=x_train, y=y_train)
    Z_train, y_trainLFR = lfr.transform(X=x_train, y=y_train)


    bal_acc_arr_transf = []
    deo_arr_transf = []
    dao_arr_transf = []
    FairDEO = []
    FairDAO = []
    thresholds = np.linspace(0.01, 0.99, 100)
    svc = SVC(kernel='linear')
    svc.fit(Z_train, y_train[classification])

    x_test = pipeline.transform(x_test)
    Z_test, y_testLFR = lfr.transform(X=x_test, y=y_test)

    y_pred = svc.predict(Z_test)
    ACC = accuracy_score(y_pred, y_test[classification])
    


In [12]:
print(x_test.shape)
print(Z_test.shape)
print(y_test.shape)
print(y_testLFR.shape)
print(y_test)
print(y_testLFR)
print(y_pred)


(100, 16)
(100, 10)
(100, 2)
(100, 2)
     Exam_Score  Gender
743           0       1
401           0       1
488           0       0
76            0       0
35            0       1
..          ...     ...
866           0       0
128           0       1
504           0       0
265           0       0
382           0       0

[100 rows x 2 columns]
     Exam_Score  Gender
743    0.000021       1
401    0.007417       1
488    0.020696       0
76     0.000002       0
35     0.999395       1
..          ...     ...
866    0.009157       0
128    0.000239       1
504    0.058524       0
265    0.985625       0
382    0.000437       0

[100 rows x 2 columns]
[0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 1 0 1 1 1 0 1
 1 1 1 1 1 0 1 1 0 1 0 1 0 0 1 1 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0
 1 0 1 1 1 0 1 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0]
