# Testing aspects of Minimax Group Fairness
URL: https://arxiv.org/abs/2011.03108

Try simulate "Two-Player Game Formulation" proposed by the article to see how i can fit it under the package Temis.

GENERAL DESCRIPTION:

Regulator: Tries to identify which group has great loss and increase it's weight through exponential weights.

Learner: Minimize current model and seek for optimal solution.

In [17]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score
from sklearn.datasets import make_classification
from sklearn.base import clone

In [None]:
class MinimaxFairness:
    def __init__(self, model_class, iterations=100, lr=0.5, verbose=False):
        self.model_class = model_class
        self.T = iterations
        self.lr = lr
        self.verbose = verbose

        # Initialize storage for models, lambdas, and group losses history
        self.models = []
        self.lambdas_history = []
        self.group_losses_history = []
    def fit(self, X, y, groups):
        if self.verbose == True:
            print(f"Debugging fit information...")
        n_samples = len(y)

        unique_groups = np.unique(groups)
        n_groups = len(unique_groups)
        if self.verbose == True:
            print(f"Number identified groups: {n_groups}")
            print(f"Identified Groups: {unique_groups}")

        group_counts = {g: np.sum(groups == g) for g in unique_groups}

        if self.verbose == True:
            print(f"Group Counts: {group_counts}")

        self.lambdas = {g: group_counts[g] / n_samples for g in unique_groups}

        if self.verbose == True:
            print(f"Initial Lambdas: {self.lambdas}")

        if self.verbose == True:
            print(f"Initializing game with {self.T} rounds...")
            
        sample_weights = np.zeros(n_samples)
        for g in unique_groups:
            mask = (groups == g)
            sample_weights = self.lambdas[g]

        for t in range(1, self.T + 1):
            if self.verbose == True:
                print(f"Initialing round: {t}")

            #h_t = self.model_class(solver='lbfgs', max_iter=100)
            h_t = clone(self.model_class)
            h_t.fit(X, y, sample_weight=sample_weights)
            self.models.append(h_t)

            group_losses = {}
            probs = h_t.predict_proba(X)

            for g in unique_groups:
                mask = (groups == g)
                loss_k = log_loss(y[mask], probs[mask])
                group_losses[g] = loss_k

            self.group_losses_history.append(group_losses)
            self.lambdas_history.append(self.lambdas.copy())

            for g in unique_groups:
                self.lambdas[g] *= np.exp(self.lr * group_losses[g])
    def predict_proba(self, X):
        if self.verbose == True:
            print(f"debugging predict_proba information....")
        preds = np.array([h.predict_proba(X) for h in self.models])
        mean_preds = np.mean(preds, axis=0)
        return mean_preds

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

In [None]:
random_state = 42

np.random.seed(random_state)
n_samples = 1000
X, y = make_classification(n_samples=n_samples, n_features=20, n_informative=10, n_redundant=10, random_state=random_state)
groups = np.random.choice([0, 1], size=n_samples, p=[0.7, 0.3])

# Add noise to some group to make it harder to guess.
noise_idxs = np.where(groups == 1)[0]
y[noise_idxs] = np.random.choice([0, 1], size=len(noise_idxs))

# Add some baseline model and test error rates.
baseline_model = LogisticRegression(solver='lbfgs', max_iter=1000)
baseline_model.fit(X, y)
baseline_preds = baseline_model.predict(X)
#print(baseline_model.coef_)

baseline_loss_class_0 = log_loss(y[groups == 0], baseline_model.predict_proba(X)[groups == 0])
baseline_loss_class_1 = log_loss(y[groups == 1], baseline_model.predict_proba(X)[groups == 1])

print("--- Baseline (Standard Logistic Regression) ---")
print(f"Log Loss Grupo 0: {baseline_loss_class_0:.4f}")
print(f"Log Loss Grupo 1: {baseline_loss_class_1:.4f}")
print(f"Diferença de Erro: {abs(baseline_loss_class_0 - baseline_loss_class_1):.4f}")

# Add Minimax Fairness model and test error rates.
n_iter = 10
mm_model_class = LogisticRegression(solver='lbfgs', max_iter=100)
mm_model = MinimaxFairness(mm_model_class, iterations=n_iter, lr=0.5, verbose=False)
mm_model.fit(X, y, groups)

mm_pred_probs = mm_model.predict_proba(X)
mm_pred_y0 = mm_pred_probs[:, 0]
mm_pred_y1 = mm_pred_probs[:, 1]
mm_loss_class_0 = log_loss(y[groups == 0], mm_pred_y1[groups == 0])
mm_loss_class_1 = log_loss(y[groups == 1], mm_pred_y1[groups == 1])

print(f"\n--- Minimax Fair Model (Após {n_iter} iterações) ---")
print(f"Log Loss Grupo 0: {mm_loss_class_0:.4f}")
print(f"Log Loss Grupo 1: {mm_loss_class_1:.4f}")
print(f"Diferença de Erro: {abs(mm_loss_class_0 - mm_loss_class_1):.4f}")



--- Baseline (Standard Logistic Regression) ---
Log Loss Grupo 0: 0.4144
Log Loss Grupo 1: 0.8712
Diferença de Erro: 0.4568

--- Minimax Fair Model (Após 10 iterações) ---
Log Loss Grupo 0: 0.4148
Log Loss Grupo 1: 0.8703
Diferença de Erro: 0.4555
