# Testing aspects of Minimax Group Fairness
URL: https://arxiv.org/abs/2011.03108

Try simulate "Two-Player Game Formulation" proposed by the article to see how i can fit it under the package Temis.

GENERAL DESCRIPTION:

Regulator: Tries to identify which group has great loss and increase it's weight through exponential weights.

Learner: Minimize current model and seek for optimal solution.

In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score
from sklearn.datasets import make_classification

In [None]:
class MinimaxFairness:
    def __init__(self, model_class, iterations=100, lr=0.5, verbose=False):
        self.model_class = model_class
        self.T = iterations
        self.lr = lr
        self.verbose = verbose

        # Initialize storage for models, lambdas, and group losses history
        self.models = []
        self.lambdas_history = []
        self.group_losses_history = []
    def fit(self, X, y, groups, verbose=False):
        n_samples = len(y)

        # I don't think this way will work for intersection of groups.
        unique_groups = np.unique(groups)
        n_groups = len(unique_groups)

        group_counts = {g: np.sum(groups == g) for g in unique_groups}
        self.lambdas = {g: group_counts[g] / n_samples for g in unique_groups}

        if verbose == True:
            print(f"Iniciando Jogo Minimax com {self.T} rodadas...")

        for t in range(1, self.T + 1):
            sample_weights = np.ones(n_samples)

            h_t = self.model_class(solver='lbfgs', max_iter=100)
            h_t.fit(X, y, sample_weight=sample_weights)
            self.models.append(h_t)

            group_losses = {}
            probs = h_t.predict_proba(X)

            for g in unique_groups:
                mask = (groups == g)
                loss_k = log_loss(y[mask], probs[mask])
                group_losses[g] = loss_k

            self.group_losses_history.append(group_losses)
            self.lambdas_history.append(self.lambdas.copy())

            for g in unique_groups:
                self.lambdas[g] *= np.exp(self.lr * group_losses[g])
    def predict_proba(self, X):
        all_probs = np.array([model.predict_proba(X)] for model in self.models)
        return np.mean(all_probs, axis=0)

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

    def debug_step(self, t):
        idx = t - 1
        if idx >= len(self.lambdas_history):
            print('Debug: Histórico ainda não disponível para essa iteração.')
            return

        current_lambdas = self.lambdas_history[idx]
        current_group_losses = self.group_losses_history[idx]
        groups = sorted(current_lambdas.keys())

        print(f"\n----- [DEBUG] Rodada {t} -----")
        print("1. Jogada do Regulador (Pesos definidos):")


    

In [37]:
random_state = 42

np.random.seed(random_state)
n_samples = 1000
X, y = make_classification(n_samples=n_samples, n_features=20, n_informative=10, n_redundant=10, random_state=random_state)
groups = np.random.choice([0, 1], size=n_samples, p=[0.7, 0.3])

# Add noise to some group to make it harder to guess.
noise_idxs = np.where(groups == 1)[0]
y[noise_idxs] = np.random.choice([0, 1], size=len(noise_idxs))

# Add some baseline model and test error rates.
baseline_model = LogisticRegression(solver='lbfgs', max_iter=1000)
baseline_model.fit(X, y)
baseline_preds = baseline_model.predict(X)
print(baseline_model.coef_)

baseline_loss_class_0 = log_loss(y[groups == 0], baseline_model.predict_proba(X)[groups == 0])
baseline_loss_class_1 = log_loss(y[groups == 1], baseline_model.predict_proba(X)[groups == 1])

print("--- Baseline (Standard Logistic Regression) ---")
print(f"Log Loss Grupo 0 (Fácil): {baseline_loss_class_0:.4f}")
print(f"Log Loss Grupo 1 (Difícil): {baseline_loss_class_1:.4f}")
print(f"Diferença de Erro: {abs(baseline_loss_class_0 - baseline_loss_class_1):.4f}")

# Add Minimax Fairness model and test error rates.
mm_model = MinimaxFairness(LogisticRegression, iterations=10, lr=0.5)
mm_model.fit(X, y, groups)

mm_pred_probs = mm_model.predict_proba(X)
mm_loss_class_0 = log_loss(y[groups == 0], mm_pred_probs[groups == 0])
mm_loss_class_1 = log_loss(y[groups == 1], mm_pred_probs[groups == 1])

print("\n--- Minimax Fair Model (Após 100 iterações) ---")
print(f"Log Loss Grupo 0 (Fácil): {mm_loss_class_0:.4f}")
print(f"Log Loss Grupo 1 (Difícil): {mm_loss_class_1:.4f}")
print(f"Diferença de Erro: {abs(mm_loss_class_0 - mm_loss_class_1):.4f}")



[[-0.10394707 -0.02122631  0.02497732  0.032889    0.19497393  0.06218199
  -0.00995388 -0.22468309  0.00296418 -0.11127511  0.12740759 -0.086235
  -0.01550448 -0.17103389 -0.07877878 -0.01402433 -0.08433013 -0.05990881
   0.04956577  0.0870589 ]]
--- Baseline (Standard Logistic Regression) ---
Log Loss Grupo 0 (Fácil): 0.4144
Log Loss Grupo 1 (Difícil): 0.8712
Diferença de Erro: 0.4568


AxisError: axis 0 is out of bounds for array of dimension 0