# Post-processing techniques

## Data preparation

In [1]:
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
df = pd.read_csv('../../data/final_features_df.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Age,Income,faves_pca0,faves_pca1,unfaves_pca0,unfaves_pca1,accessories,alcohol,animamted,...,Drama.2,Entertainment (Variety Shows),Factual,Learning,Music,News,Religion &amp; Ethics,Sport.1,Weather,Rating_bin
0,0,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0
1,1,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0
2,2,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0
3,3,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0
4,4,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,0


In [3]:
df = df.fillna(0)
y = df['Rating_bin']
X = df.drop(columns=['Unnamed: 0', 'Rating_bin', 'Gender_F'])

X.head()

Unnamed: 0,Age,Income,faves_pca0,faves_pca1,unfaves_pca0,unfaves_pca1,accessories,alcohol,animamted,animated,...,Comedy.1,Drama.2,Entertainment (Variety Shows),Factual,Learning,Music,News,Religion &amp; Ethics,Sport.1,Weather
0,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,0.0,...,1,1,0,0,0,0,0,0,0,0
1,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,0.0,...,1,1,0,0,0,0,0,0,0,0
2,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,0.0,...,1,1,0,0,0,0,0,0,0,0
3,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,0.0,...,1,1,0,0,0,0,0,0,0,0
4,62,1,-0.321485,0.0786,-0.19967,-0.200645,0.0,0.0,0.0,0.0,...,1,1,0,0,0,0,0,0,0,0


In [4]:
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42, train_size=0.85)

## Baseline model: DecisionTree

In [5]:
clf = DecisionTreeClassifier(class_weight='balanced')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_val)
print(classification_report(y_val, y_pred))
confusion_matrix(y_val, y_pred)

              precision    recall  f1-score   support

           0       0.94      0.63      0.76      4635
           1       0.26      0.77      0.39       783

    accuracy                           0.65      5418
   macro avg       0.60      0.70      0.57      5418
weighted avg       0.84      0.65      0.70      5418



array([[2927, 1708],
       [ 183,  600]])

## Métrica de fairness: Statistical Parity

In [6]:
import numpy as np

def statistical_parity(y, y_, Z, priv=None):
  if priv is None:
    values = np.unique(Z)
    counts = [np.mean(y[Z==z]) for z in values]
    priv = values[np.argmax(counts)]
    unpriv = [z for z in values if z != priv]
  else:
    unpriv = [z for z in values if z != priv]
  
  return np.array([np.mean([y_i for y_i, zi in zip(y_, Z) if zi == unp]) - np.mean([y_i for y_i, zi in zip(y_, Z) if zi == priv])
                   for unp in unpriv])

In [7]:
Z_train = X_train['Gender_M']==1
Z_val = X_val['Gender_M']==1

## Desempenho do baseline model

In [8]:
y_val_ = clf.predict(X_val)

print('F1-score:', f1_score(y_val, y_val_))
print('Statistical parity', statistical_parity(y_val, y_val_, Z_val)[0])

F1-score: 0.38822387576835976
Statistical parity -0.030887433610065462


## Estratégia 1: Threshold Optimizer

In [9]:
from fairlearn.postprocessing import ThresholdOptimizer

postprocess_est = ThresholdOptimizer(
                   estimator=clf,
                   constraints="demographic_parity",
                   objective="accuracy_score",
                   prefit=True,
                   predict_method='predict_proba')
postprocess_est.fit(X_train, y_train, sensitive_features=Z_train)

y_val_ = postprocess_est.predict(X_val, sensitive_features=Z_val)

print('F1-score:', f1_score(y_val, y_val_))
print('Statistical parity', statistical_parity(y_val, y_val_, Z_val)[0])

F1-score: 0.07556080283353012
Statistical parity 0.00028360748723766337


### Avaliando hiperparâmetros

O ThresholdOptimizer possui dois hiperparâmetros que geram impacto direto nas métricas, são eles: `constraints` e `objective`. Avaliando os resultados:

In [10]:
possible_constraints = [
    'demographic_parity',
    'selection_rate_parity',
    'equalized_odds',
    'true_positive_rate_parity',
    'true_negative_rate_parity',
    'false_positive_rate_parity',
    'false_negative_rate_parity',
]
possible_objectives = [
    'accuracy_score',
    'balanced_accuracy_score',
    'selection_rate',
    'true_positive_rate',
    'true_negative_rate',
]

impossible_pairs = [
  ['equalized_odds', 'selection_rate'],
  ['equalized_odds', 'true_positive_rate'],
  ['equalized_odds', 'true_negative_rate'],
]

results = dict()

y_val_ = clf.predict(X_val)
baseline_f1 = f1_score(y_val, y_val_)
baseline_sp = statistical_parity(y_val, y_val_, Z_val)[0]
print(f'Baseline F1: {baseline_f1}')
print(f'Baseline statistical parity: {baseline_sp}')

results['baseline']= [baseline_f1, baseline_sp]

for constraint in possible_constraints:
    for objective in possible_objectives:
        if [constraint, objective] not in impossible_pairs:
            print('---')
            print(f'Optimizing using objective "{objective}" and constraint "{constraint}"')
            postprocess_est = ThresholdOptimizer(
                   estimator=clf,
                   constraints=constraint,
                   objective=objective,
                   prefit=True,
                   predict_method='predict_proba')
            postprocess_est.fit(X_train, y_train, sensitive_features=Z_train)
            
            y_val_ = postprocess_est.predict(X_val, sensitive_features=Z_val)
            alt_f1 = f1_score(y_val, y_val_)
            alt_sp = statistical_parity(y_val, y_val_, Z_val)[0]
            print(f'F1-score: {alt_f1}')
            print(f'Statistical parity: {alt_sp}')
            results[f'{objective}__{constraint}']= [alt_f1, alt_sp]

Baseline F1: 0.38822387576835976
Baseline statistical parity: -0.030887433610065462
---
Optimizing using objective "accuracy_score" and constraint "demographic_parity"
F1-score: 0.06658739595719383
Statistical parity: 0.0018305574176249156
---
Optimizing using objective "balanced_accuracy_score" and constraint "demographic_parity"
F1-score: 0.3863709418193507
Statistical parity: 0.008894962099726678
---
Optimizing using objective "selection_rate" and constraint "demographic_parity"
F1-score: 0.2525399129172714
Statistical parity: 0.0
---
Optimizing using objective "true_positive_rate" and constraint "demographic_parity"
F1-score: 0.2525399129172714
Statistical parity: 0.0
---
Optimizing using objective "true_negative_rate" and constraint "demographic_parity"
F1-score: 0.0
Statistical parity: 0.0
---
Optimizing using objective "accuracy_score" and constraint "selection_rate_parity"
F1-score: 0.06183115338882283
Statistical parity: 0.0026427061310782245
---
Optimizing using objective "ba

In [11]:
results

{'baseline': [0.38822387576835976, -0.030887433610065462],
 'accuracy_score__demographic_parity': [0.06658739595719383,
  0.0018305574176249156],
 'balanced_accuracy_score__demographic_parity': [0.3863709418193507,
  0.008894962099726678],
 'selection_rate__demographic_parity': [0.2525399129172714, 0.0],
 'true_positive_rate__demographic_parity': [0.2525399129172714, 0.0],
 'true_negative_rate__demographic_parity': [0.0, 0.0],
 'accuracy_score__selection_rate_parity': [0.06183115338882283,
  0.0026427061310782245],
 'balanced_accuracy_score__selection_rate_parity': [0.3875321336760925,
  0.009178569586964336],
 'selection_rate__selection_rate_parity': [0.2525399129172714, 0.0],
 'true_positive_rate__selection_rate_parity': [0.2525399129172714, 0.0],
 'true_negative_rate__selection_rate_parity': [0.0, 0.0],
 'accuracy_score__equalized_odds': [0.0, 0.0],
 'balanced_accuracy_score__equalized_odds': [0.3862382343395001,
  0.0003867374825967662],
 'accuracy_score__true_positive_rate_parity'

In [12]:
best_combination = 'baseline'
best_combination_f1 = results[best_combination][0]
best_combination_sp = results[best_combination][1]

for combination, (alt_f1, alt_sp) in results.items():
    # In some cases, the statistical-parity or f1-score is 0, we consider those as an error
    # After this, we're checking if the candidate statistic parity is closer to 0 than the current best candidate
    if alt_f1 != 0 and alt_sp != 0 and min((abs(x), x) for x in [alt_sp, best_combination_sp])[1] == alt_sp:
        best_combination = combination
        best_combination_f1 = alt_f1
        best_combination_sp = alt_sp

objective, constraint = best_combination.split('__')
print(f'Best objetive: {objective}')
print(f'Best constraint: {constraint}')
print(f'F1-score: {best_combination_f1}')
print(f'Statistical parity: {best_combination_sp}')

Best objetive: balanced_accuracy_score
Best constraint: equalized_odds
F1-score: 0.3862382343395001
Statistical parity: 0.0003867374825967662


## Estratégia 2: Leaf Relabelling

Seguindo o que foi apresentado por [Kamiran et al.](https://www.win.tue.nl/~mpechen/publications/pubs/KamiranICDM2010.pdf)

In [13]:
import numpy as np

class Leaf:
    def __init__(self, path, node_id, u, v, w, x, transactions=None):
        self.path = path
        self.node_id = node_id
        self.acc = None
        self.disc = None
        self.ratio = None
        self.u = u
        self.v = v
        self.w = w
        self.x = x
        self.transactions = transactions

    def compute_gain(self, cnt_p, cnt_n, portion_zero, portion_one):
        n = self.u + self.w
        p = self.v + self.x
        
        if cnt_p > cnt_n:
            self.acc = n - p
            self.disc = (self.u + self.v) / portion_one - (self.w + self.x) / portion_zero

        else:
            self.acc = p - n
            self.disc = -(self.u + self.v) / portion_one + (self.w + self.x) / portion_zero

        if self.acc == 0:
            self.ratio = self.disc / -0.00000000000000000000000000000000000001
        else:
            self.ratio = self.disc / self.acc

In [14]:
def relabel_leaves(clf, x, y, y_pred, sensitive, threshold):
    for leaf in get_leaves_to_relabel(clf, x, y, y_pred, sensitive, threshold):
        if clf.tree_.value[leaf.node_id][0][0] == clf.tree_.value[leaf.node_id][0][1]:
            clf.tree_.value[leaf.node_id][0][1] += 1
        else:
            clf.tree_.value[leaf.node_id][0][0], clf.tree_.value[leaf.node_id][0][1] = \
                clf.tree_.value[leaf.node_id][0][1], clf.tree_.value[leaf.node_id][0][0]

def get_leaves_to_relabel(clf, x, y, y_pred, sensitive, threshold):
    disc_tree = discrimination(y, y_pred, sensitive)
    cnt = np.unique(sensitive, return_counts=True)[1]

    i = list()
    get_leaves_candidates(clf, x, y, sensitive, cnt, len(y), i)
    leaves = set()
    
    while rem_disc(disc_tree, leaves, threshold) > threshold and i:
        best_l = i[0]
        for leaf in i:
            if leaf.ratio > best_l.ratio:
                best_l = leaf
        leaves.add(best_l)
        i.remove(best_l)

    return leaves

def discrimination(y, y_pred, sensitive):
    w2, x2, u2, v2, b, b_not = 0, 0, 0, 0, 0, 0
    y_length = len(y)
    for index in range(0, y_length):
        if y_pred[index] == 1:
            if sensitive[index] == 0:
                if y[index] == 0:
                    w2 += 1
                elif y[index] == 1:
                    x2 += 1
            elif sensitive[index] == 1:
                if y[index] == 0:
                    u2 += 1
                elif y[index] == 1:
                    v2 += 1
        if sensitive[index] == 1:
            b += 1
        elif sensitive[index] == 0:
            b_not += 1

    w2 = w2 / y_length
    x2 = x2 / y_length
    u2 = u2 / y_length
    v2 = v2 / y_length

    b = b / y_length
    b_not = b_not / y_length

    return ((w2 + x2) / b_not) - ((u2 + v2) / b)


def get_leaves_candidates(clf, x, y, sensitive, cnt, length, leaves, node_id=0, path=tuple()):
    feature = clf.tree_.feature[node_id]
    if feature >= 0:
        tmp_path = path + ((node_id, feature, 'left'),)
        get_leaves_candidates(clf, x, y, sensitive, cnt, length, leaves,
                              clf.tree_.children_left[node_id], tmp_path)
        tmp_path = path + ((node_id, feature, 'right'),)
        get_leaves_candidates(clf, x, y, sensitive, cnt, length, leaves,
                              clf.tree_.children_right[node_id], tmp_path)
    else:
        transactions = get_transactions_by_leaf(clf, path, x)
        tmp_path = path + ((node_id, feature, 'leaf'),)

        u, v, w, x = 0, 0, 0, 0
        for transaction in transactions:
            if sensitive[transaction] == 1:
                if y[transaction] == 0:
                    u += 1
                elif y[transaction] == 1:
                    v += 1
            elif sensitive[transaction] == 0:
                if y[transaction] == 0:
                    w += 1
                elif y[transaction] == 1:
                    x += 1
        leaf = Leaf(tmp_path, node_id, u / length, v / length,
                    w / length, x / length, transactions)
        leaf.compute_gain(v + x, u + w, cnt[0] / length, cnt[1] / length)
        if leaf.disc < 0:
            leaves.append(leaf)
            
def get_transactions_by_leaf(clf, path, x):
    filtered = pd.DataFrame(x)
    for tupl in path:
        node_id = tupl[0]
        feature = tupl[1]
        if tupl[2] == 'left':
            filtered = filtered.loc[filtered[feature] <= clf.tree_.threshold[node_id]]
        elif tupl[2] == 'right':
            filtered = filtered.loc[filtered[feature] > clf.tree_.threshold[node_id]]
        else:
            raise Exception("Should not reach here")
    return list(filtered.index)

def rem_disc(disc_tree, leaves, threshold):
    s = 0
    for leaf in leaves:
        if leaf.disc < threshold:
            s += leaf.disc
    return disc_tree + s

In [18]:
df_lr = pd.read_csv('../../data/final_features_df.csv')
df_lr.head()
df_lr = df_lr.fillna(0)
y_lr = df_lr['Rating_bin']
X_lr = df.drop(columns=['Unnamed: 0', 'Rating_bin', 'Gender_M'])
X_lr_train, X_lr_val, y_lr_train, y_lr_val = train_test_split(X_lr, y_lr, random_state=42, train_size=0.85)

sensitive = X_lr_val['Gender_F']

y_lr_np = y_lr_val.to_numpy()
X_lr_np = X_lr_val.to_numpy()
sensitive_np = sensitive.to_numpy()

In [19]:
clf = DecisionTreeClassifier(class_weight='balanced')
clf.fit(X_lr_train, y_lr_train)

y_lr_original_pred = clf.predict(X_lr_val)
threshold = -0.0001

relabel_leaves(clf, X_lr_np, y_lr_np, y_lr_original_pred, sensitive_np, threshold)
y_lr_pred_relabeled = clf.predict(X_lr_val)

In [20]:
print('F1-score:', f1_score(y_lr_val, y_lr_pred_relabeled))
print('Statistical parity', statistical_parity(y_lr_val, y_lr_pred_relabeled, Z_val)[0])

F1-score: 0.36596893990546925
Statistical parity -0.04310833806012476
