In [5]:
import pandas as pd
from sklearn.datasets import make_moons
from sklearn.metrics import classification_report
from AdaFair import AdaFair
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Toy example

In [6]:
X, y = make_moons(n_samples=1000, noise=0.3, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [7]:
# Select parameters
base_clf = DecisionTreeClassifier(max_depth=2)
n_ests = 100
Adafair = AdaFair(base_clf = base_clf, n_ests = n_ests)
Adafair.fit(X_train, y_train)

predicitons = Adafair.predict(X_test)

print('balanced_accuracy_score: ', balanced_accuracy_score(y_test, predicitons), 'f1_score: ', f1_score(y_test, predicitons))
print()
print(classification_report(y_test, predicitons))

balanced_accuracy_score:  0.6225340729583777 f1_score:  0.411214953271028

              precision    recall  f1-score   support

           0       0.57      0.98      0.72       163
           1       0.94      0.26      0.41       167

    accuracy                           0.62       330
   macro avg       0.75      0.62      0.56       330
weighted avg       0.75      0.62      0.56       330



# Our Dataset with Imbalanced classes

In [17]:
import os, sys

path = sys.path[0].replace('AdaFair', 'data')
data_name = path + '\\' +  os.listdir(path)[0]

In [20]:
df = pd.read_csv(data_name)
y = df['Class-label']
X = df.drop('Class-label', axis=1)
X = pd.get_dummies(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [22]:
for depth in [3, 5, 7, 20]:

    base_clf = DecisionTreeClassifier(max_depth=depth)
    n_ests = 100
    Adafair = AdaFair(base_clf = base_clf, n_ests = n_ests)
    Adafair.fit(X_train, y_train)

    predicitons = Adafair.predict(X_test)
    
    print("Base estimator's depth: ", depth)
    print('balanced_accuracy_score: ', balanced_accuracy_score(y_test, predicitons), 'f1_score: ', f1_score(y_test, predicitons))
    print()
    print(classification_report(y_test, predicitons))
    print()

Base estimator's depth:  3
balanced_accuracy_score:  0.6242158862164228 f1_score:  0.3986105080330004

              precision    recall  f1-score   support

           0       0.80      1.00      0.89     11243
           1       0.98      0.25      0.40      3665

    accuracy                           0.81     14908
   macro avg       0.89      0.62      0.64     14908
weighted avg       0.85      0.81      0.77     14908


Base estimator's depth:  5
balanced_accuracy_score:  0.7157651770348177 f1_score:  0.5939032936229852

              precision    recall  f1-score   support

           0       0.85      0.97      0.90     11243
           1       0.83      0.46      0.59      3665

    accuracy                           0.84     14908
   macro avg       0.84      0.72      0.75     14908
weighted avg       0.84      0.84      0.83     14908


Base estimator's depth:  7
balanced_accuracy_score:  0.7581169741633387 f1_score:  0.6576207005364468

              precision    recall  