In [1]:
from utils import get_dataset
from metrics import calculate_metrics

In [2]:
from sklearn.ensemble import AdaBoostClassifier


dataset_list = ['adult', 'bank', 'compass', 'kdd']

for dataset in dataset_list:
    print(f'Dataset {dataset}')
    
    X_train, y_train, is_protected_train, X_test, y_test, is_protected_test = get_dataset(dataset)
    
    print(f'Train size: {X_train.shape}, Test size: {X_test.shape}')
    print(f'Positive ratio (train): {y_train.mean():.3f}, Positive ratio (test): {y_test.mean():.3f}')
    print(f'Protected ratio (train): {is_protected_train.mean():.3f}, Protected ratio (test): {is_protected_test.mean():.3f}')
    
    model = AdaBoostClassifier()
    model.fit(X_train, y_train)
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    print('Metrics on train dataset:')
    train_metrics = calculate_metrics(y_train, y_train_pred, is_protected_train)
    for name, value in train_metrics.items():
        print(f'{name}: {value:.3f}')
    
    print('Metrics on test dataset:')
    test_metrics = calculate_metrics(y_test, y_test_pred, is_protected_test)
    for name, value in test_metrics.items():
        print(f'{name}: {value:.3f}')
    
    print()

Dataset adult
Train size: (32561, 108), Test size: (16281, 108)
Positive ratio (train): 0.241, Positive ratio (test): 0.236
Protected ratio (train): 0.331, Protected ratio (test): 0.333
Metrics on train dataset:
accuracy: 0.861
balanced_accuracy: 0.778
eq_odds: 0.201
tpr_protected: 0.505
tpr_non_protected: 0.637
tnr_protected: 0.981
tnr_non_protected: 0.912
Metrics on test dataset:
accuracy: 0.860
balanced_accuracy: 0.774
eq_odds: 0.190
tpr_protected: 0.510
tpr_non_protected: 0.629
tnr_protected: 0.981
tnr_non_protected: 0.910

Dataset bank
Train size: (33908, 51), Test size: (11303, 51)
Positive ratio (train): 0.117, Positive ratio (test): 0.117
Protected ratio (train): 0.602, Protected ratio (test): 0.602
Metrics on train dataset:
accuracy: 0.903
balanced_accuracy: 0.687
eq_odds: 0.110
tpr_protected: 0.362
tpr_non_protected: 0.454
tnr_protected: 0.975
tnr_non_protected: 0.957
Metrics on test dataset:
accuracy: 0.896
balanced_accuracy: 0.667
eq_odds: 0.098
tpr_protected: 0.331
tpr_non