# Explication 1-1 pour le dataset Breast Cancer

In [24]:
import pandas as pd
from gurobipy import *

# Chargement des données

df = pd.read_csv("breastcancer_processed.csv")

features = [
    "ClumpThickness",
    "UniformityOfCellSize",
    "UniformityOfCellShape",
    "MarginalAdhesion",
    "SingleEpithelialCellSize",
    "BareNuclei",
    "BlandChromatin",
    "NormalNucleoli",
    "Mitoses"
]

X = df[features]

# Poids régression logistique

logreg_weights = {
    "ClumpThickness": 0.416,
    "UniformityOfCellSize": 0.134,
    "UniformityOfCellShape": 0.247,
    "MarginalAdhesion": 0.235,
    "SingleEpithelialCellSize": 0.11,
    "BareNuclei": 0.352,
    "BlandChromatin": 0.304,
    "NormalNucleoli": 0.193,
    "Mitoses": 0.226
}

bias = -8.377 

# Instance à expliquer

i = 1
x = X.iloc[i]
mu = X.mean()

# Calcul des deltas

deltas = {
    f: logreg_weights[f] * (x[f] - mu[f])
    for f in features
}

# Pros et Cons
pros = [f for f, v in deltas.items() if v > 0]
cons = [f for f, v in deltas.items() if v < 0]

print("Pros :", pros)
print("Cons :", cons)
print("Deltas :", deltas)

# Paires 1–1 valides

valid_pairs = [
    (p, c) for p in pros for c in cons
    if deltas[p] + deltas[c] > 0
]

print(f"Nombre de paires valides : {len(valid_pairs)}")

# Modèle Gurobi 1–1

m = Model("Explication_1_1_LogReg")

c_vars = {
    (p, c): m.addVar(vtype=GRB.BINARY, name=f"c_{p}_{c}")
    for p, c in valid_pairs
}

m.update()

# Chaque Con couvert exactement une fois
for c_feat in cons:
    m.addConstr(
        quicksum(c_vars[(p, c_feat)] for p in pros if (p, c_feat) in valid_pairs) == 1,
        name=f"Cover_{c_feat}"
    )

# Chaque Pro utilisé au plus une fois
for p_feat in pros:
    m.addConstr(
        quicksum(c_vars[(p_feat, c)] for c in cons if (p_feat, c) in valid_pairs) <= 1,
        name=f"Unique_{p_feat}"
    )

# Faisabilité
m.setObjective(0, GRB.MINIMIZE)
m.params.outputflag = 0
m.optimize()

# Analyse

if m.status == GRB.OPTIMAL:
    print("\nExplication 1–1 trouvée :\n")
    for p, c in valid_pairs:
        if c_vars[(p, c)].x > 0.5:
            net = deltas[p] + deltas[c]
            print(
                f"Pro {p} (Δ={deltas[p]:+.4f}) "
                f"/ Con {c} (Δ={deltas[c]:+.4f}) "
                f"→ Net {net:+.4f}"
            )

elif m.status == GRB.INFEASIBLE:
    print("\nAucune explication 1–1 possible.")


Pros : ['ClumpThickness', 'UniformityOfCellSize', 'UniformityOfCellShape', 'MarginalAdhesion', 'SingleEpithelialCellSize', 'BareNuclei']
Cons : ['BlandChromatin', 'NormalNucleoli', 'Mitoses']
Deltas : {'ClumpThickness': np.float64(0.23205856515373363), 'UniformityOfCellSize': np.float64(0.11379209370424599), 'UniformityOfCellShape': np.float64(0.19383894582723274), 'MarginalAdhesion': np.float64(0.5099121522693997), 'SingleEpithelialCellSize': np.float64(0.41423133235724746), 'BareNuclei': np.float64(2.2722811127379208), 'BlandChromatin': np.float64(-0.13530893118594436), 'NormalNucleoli': np.float64(-0.16785065885797948), 'Mitoses': np.float64(-0.13632796486090773)}
Nombre de paires valides : 15

Explication 1–1 trouvée :

Pro ClumpThickness (Δ=+0.2321) / Con BlandChromatin (Δ=-0.1353) → Net +0.0967
Pro SingleEpithelialCellSize (Δ=+0.4142) / Con NormalNucleoli (Δ=-0.1679) → Net +0.2464
Pro BareNuclei (Δ=+2.2723) / Con Mitoses (Δ=-0.1363) → Net +2.1360


Dans le modèle 1-1, chaque con doit être compensé par exactement un pro.

Le modèle identifie toutes les paires pro–con dont la somme des deltas > 0, car seules ces paires peuvent neutraliser les cons.

Une explication 1-1 a été trouvée, ce patient est alors malin cliniquement.

# Explication 1-m pour le dataset Breast Cancer

In [22]:
import pandas as pd
from gurobipy import *

df = pd.read_csv("breastcancer_processed.csv")

features = [
    "ClumpThickness",
    "UniformityOfCellSize",
    "UniformityOfCellShape",
    "MarginalAdhesion",
    "SingleEpithelialCellSize",
    "BareNuclei",
    "BlandChromatin",
    "NormalNucleoli",
    "Mitoses"
]

y = 1 - df["Benign"]
X = df[features]

logistic_weights = {
    "ClumpThickness": 0.416,
    "UniformityOfCellSize": 0.134,
    "UniformityOfCellShape": 0.247,
    "MarginalAdhesion": 0.235,
    "SingleEpithelialCellSize": 0.110,
    "BareNuclei": 0.352,
    "BlandChromatin": 0.304,
    "NormalNucleoli": 0.193,
    "Mitoses": 0.226
}

bias = -8.377

i = 1
x = X.iloc[i]
mu = X.mean()

# Calcul des Δ_i
deltas = {f: logistic_weights[f] * (x[f] - mu[f]) for f in features}

# Séparer pros et cons, exclure BIAS des pros
pros = [f for f in deltas if deltas[f] > 0]  # inclut seulement les vraies features
cons = [f for f in deltas if deltas[f] < 0]

print("Pros :", pros)
print("Cons :", cons)

# Modèle 1-m
m = Model("BreastCancer_Explanation_1_m_without_bias")

x_var = m.addVars(pros, cons, vtype=GRB.BINARY, name="x")
m.update()

# Chaque con doit être compensée par exactement un pro
for c in cons:
    m.addConstr(quicksum(x_var[p, c] for p in pros) == 1)

# Validité des pros
epsilon = 1e-6
for p in pros:
    m.addConstr(
        deltas[p] + quicksum(deltas[c] * x_var[p, c] for c in cons) >= epsilon
    )

# Objectif fictif (juste trouver une solution)
m.setObjective(0, GRB.MINIMIZE)
m.params.outputflag = 0
m.optimize()

# Analyse des résultats
if m.status == GRB.OPTIMAL:
    print("\nExplication (1–m) trouvée\n")
    for p in pros:
        covered = [c for c in cons if x_var[p, c].x > 0.5]
        if covered:
            net = deltas[p] + sum(deltas[c] for c in covered) + (-bias)  # Ajouter BIAS au bilan net
            print(f"Pro {p} (Δ = {deltas[p]:+.4f})")
            print(f"  compense {covered}")
            print(f"  bilan net (+BIAS) = {net:+.4f}\n")

elif m.status == GRB.INFEASIBLE:
    print("Aucune explication (1–m) possible.")


Pros : ['ClumpThickness', 'UniformityOfCellSize', 'UniformityOfCellShape', 'MarginalAdhesion', 'SingleEpithelialCellSize', 'BareNuclei']
Cons : ['BlandChromatin', 'NormalNucleoli', 'Mitoses']

Explication (1–m) trouvée

Pro UniformityOfCellShape (Δ = +0.1938)
  compense ['Mitoses']
  bilan net (+BIAS) = +8.4345

Pro BareNuclei (Δ = +2.2723)
  compense ['BlandChromatin', 'NormalNucleoli']
  bilan net (+BIAS) = +10.3461



Les cons vont dans le sens de la tumeur benigne et les pros dans le sens de la tumeur maligne.

'BareNuclei' permet de compenser 'BlandChromatin' et 'NormalNucleoli'. 'UniformityOfCellShape' permet de compenser 'Mitoses'.
Ce patient est classé malin principalement cliniquement, en y ajoutant l'a priori du modèle, il est fortement malin.

# Explication mixte m-1 pour le dataset Breast Cancer

In [20]:
import pandas as pd
from gurobipy import *

# Chargement des données
df = pd.read_csv("breastcancer_processed.csv")

features = [
    "ClumpThickness",
    "UniformityOfCellSize",
    "UniformityOfCellShape",
    "MarginalAdhesion",
    "SingleEpithelialCellSize",
    "BareNuclei",
    "BlandChromatin",
    "NormalNucleoli",
    "Mitoses"
]

# Classe cible : Malignant = 1
y = 1 - df["Benign"]
X = df[features]

# Poids et biais
logistic_weights = {
    "ClumpThickness": 0.416,
    "UniformityOfCellSize": 0.134,
    "UniformityOfCellShape": 0.247,
    "MarginalAdhesion": 0.235,
    "SingleEpithelialCellSize": 0.110,
    "BareNuclei": 0.352,
    "BlandChromatin": 0.304,
    "NormalNucleoli": 0.193,
    "Mitoses": 0.226
}
bias = -8.377

# Instance à expliquer
i = 1
x = X.iloc[i]
mu = X.mean()

# Calcul des contributions Δ_i = w_i * (x_i - μ_i)
deltas = {f: logistic_weights[f] * (x[f] - mu[f]) for f in features}

# Séparer pros et cons (BIAS exclu des pros)
pros = [f for f in deltas if deltas[f] > 0]
cons = [f for f in deltas if deltas[f] < 0]

print("Pros :", pros)
print("Cons :", cons)
print("Deltas :", deltas)

# Modèle m-1
m = Model("BreastCancer_Explanation_m_1_without_bias")

# Variables binaires : y_var[p, c] = 1 si pro p compense con c
y_var = m.addVars(pros, cons, vtype=GRB.BINARY, name="y")
m.update()

# Chaque pro peut couvrir au maximum une con
for p in pros:
    m.addConstr(quicksum(y_var[p, c] for c in cons) <= 1)

# Chaque con doit être compensée par les pros sélectionnés
epsilon = 1e-6
for c in cons:
    m.addConstr(
        deltas[c] + quicksum(deltas[p] * y_var[p, c] for p in pros) >= epsilon
    )

# Objectif fictif : minimiser 0 (on cherche juste une solution)
m.setObjective(0, GRB.MINIMIZE)

m.params.outputflag = 0
m.optimize()

# Analyse des résultats
if m.status == GRB.OPTIMAL:
    print("\nExplication (m–1) trouvée\n")
    for c in cons:
        assigned_pros = [p for p in pros if y_var[p, c].x > 0.5]
        sum_pros = sum(deltas[p] for p in assigned_pros)
        # Ajouter le BIAS au bilan net final
        net_value = deltas[c] + sum_pros + (-bias)
        print(f"Argument Contre {c} (Δ={deltas[c]:+.4f}) compensé par : {assigned_pros}")
        print(f"  cumul Pros = {sum_pros:+.4f}, bilan net (+BIAS) = {net_value:+.4f}\n")

elif m.status == GRB.INFEASIBLE:
    print("Aucune explication (m–1) possible.")


Pros : ['ClumpThickness', 'UniformityOfCellSize', 'UniformityOfCellShape', 'MarginalAdhesion', 'SingleEpithelialCellSize', 'BareNuclei']
Cons : ['BlandChromatin', 'NormalNucleoli', 'Mitoses']
Deltas : {'ClumpThickness': np.float64(0.23205856515373363), 'UniformityOfCellSize': np.float64(0.11379209370424599), 'UniformityOfCellShape': np.float64(0.19383894582723274), 'MarginalAdhesion': np.float64(0.5099121522693997), 'SingleEpithelialCellSize': np.float64(0.41423133235724746), 'BareNuclei': np.float64(2.2722811127379208), 'BlandChromatin': np.float64(-0.13530893118594436), 'NormalNucleoli': np.float64(-0.16785065885797948), 'Mitoses': np.float64(-0.13632796486090773)}

Explication (m–1) trouvée

Argument Contre BlandChromatin (Δ=-0.1353) compensé par : ['BareNuclei']
  cumul Pros = +2.2723, bilan net (+BIAS) = +10.5140

Argument Contre NormalNucleoli (Δ=-0.1679) compensé par : ['UniformityOfCellShape']
  cumul Pros = +0.1938, bilan net (+BIAS) = +8.4030

Argument Contre Mitoses (Δ=-0.13


Ici, chaque cons doit être compensé par un ou plusieurs pros et chaque pros peut être utilisé au plus une fois.

Une explication m-1 est trouvée, trois pros permettent de compenser les trois seuls cons. La décision finale est alors maligne.

# Explication mixte (1-m et m-1) pour le dataset Breast Cancer

In [18]:
import pandas as pd
from gurobipy import *

# Chargement des données
df = pd.read_csv("breastcancer_processed.csv")

features = [
    "ClumpThickness",
    "UniformityOfCellSize",
    "UniformityOfCellShape",
    "MarginalAdhesion",
    "SingleEpithelialCellSize",
    "BareNuclei",
    "BlandChromatin",
    "NormalNucleoli",
    "Mitoses"
]

# Classe cible : Malignant = 1
y = 1 - df["Benign"]
X = df[features]

# Poids et bias de la régression logistique
logistic_weights = {
    "ClumpThickness": 0.416,
    "UniformityOfCellSize": 0.134,
    "UniformityOfCellShape": 0.247,
    "MarginalAdhesion": 0.235,
    "SingleEpithelialCellSize": 0.110,
    "BareNuclei": 0.352,
    "BlandChromatin": 0.304,
    "NormalNucleoli": 0.193,
    "Mitoses": 0.226
}
bias = -8.377

# Instance à expliquer
i = 1  # patient
x = X.iloc[i]
mu = X.mean()

# Calcul des contributions Δ_i = w_i * (x_i - μ_i)
deltas = {f: logistic_weights[f] * (x[f] - mu[f]) for f in features}

# Séparer les contributions positives et négatives
pros = [f for f, v in deltas.items() if v > 0]  # features positives uniquement
cons = [f for f, v in deltas.items() if v < 0]  # features négatives uniquement

print(f"Instance {i}")
print("Pros :", pros)
print("Cons :", cons)
print("Deltas :", deltas)

# Modèle mixte 1-m et m-1 (BIAS exclu des pivots)
m = Model("BreastCancer_Explanation_Mixte_BIAS_exclu")

# Variables pivots
z_1m = m.addVars(pros, vtype=GRB.BINARY, name="z_1m")  # Pro pivot 1-m
z_m1 = m.addVars(cons, vtype=GRB.BINARY, name="z_m1")  # Con pivot m-1

# Variables de relations
v = m.addVars(pros, cons, vtype=GRB.BINARY, name="v")  # p couvre c en 1-m
w = m.addVars(pros, cons, vtype=GRB.BINARY, name="w")  # p aide c en m-1

m.update()

# Contraintes mixtes
epsilon = 1e-6
BigM = 1000

# Chaque con est soit pivot m-1, soit couvert par un pro en 1-m
for c in cons:
    m.addConstr(z_m1[c] + quicksum(v[p, c] for p in pros) == 1, name=f"CoverCons_{c}")

# Chaque pro est soit pivot 1-m, soit utilisé en m-1, ou rien
for p in pros:
    m.addConstr(z_1m[p] + quicksum(w[p, c] for c in cons) <= 1, name=f"UniquePro_{p}")

# Consistance des liens
for p in pros:
    for c in cons:
        m.addConstr(v[p, c] <= z_1m[p])
        m.addConstr(w[p, c] <= z_m1[c])

# Validité des groupes (ignorer BIAS dans les contraintes)
for p in pros:
    m.addConstr(deltas[p] + quicksum(deltas[c] * v[p, c] for c in cons) >= epsilon - BigM * (1 - z_1m[p]))
for c in cons:
    m.addConstr(deltas[c] + quicksum(deltas[p] * w[p, c] for p in pros) >= epsilon - BigM * (1 - z_m1[c]))

# Objectif : minimiser le nombre de pivots (explication compacte)
m.setObjective(quicksum(z_1m[p] for p in pros) + quicksum(z_m1[c] for c in cons), GRB.MINIMIZE)

m.params.outputflag = 0
m.optimize()

# Analyse des résultats
if m.status == GRB.OPTIMAL:
    print("\nSolution Mixte trouvée :\n")
    
    # Trade-offs 1-m
    for p in pros:
        if z_1m[p].x > 0.5:
            my_cons = [c for c in cons if v[p, c].x > 0.5]
            net_val = deltas[p] + sum(deltas[c] for c in my_cons) + (-bias)  # Ajouter BIAS au score final
            print(f"[Type 1-m] Pro '{p}' (Δ={deltas[p]:+.4f}) couvre Cons {my_cons} (Net + BIAS: {net_val:+.4f})")

    # Trade-offs m-1
    for c in cons:
        if z_m1[c].x > 0.5:
            my_pros = [p for p in pros if w[p, c].x > 0.5]
            net_val = deltas[c] + sum(deltas[p] for p in my_pros) + (-bias)
            print(f"[Type m-1] Con '{c}' (Δ={deltas[c]:+.4f}) compensé par Pros {my_pros} (Net + BIAS: {net_val:+.4f})")

elif m.status == GRB.INFEASIBLE:
    print("Aucune explication mixte possible.")


Instance 1
Pros : ['ClumpThickness', 'UniformityOfCellSize', 'UniformityOfCellShape', 'MarginalAdhesion', 'SingleEpithelialCellSize', 'BareNuclei']
Cons : ['BlandChromatin', 'NormalNucleoli', 'Mitoses']
Deltas : {'ClumpThickness': np.float64(0.23205856515373363), 'UniformityOfCellSize': np.float64(0.11379209370424599), 'UniformityOfCellShape': np.float64(0.19383894582723274), 'MarginalAdhesion': np.float64(0.5099121522693997), 'SingleEpithelialCellSize': np.float64(0.41423133235724746), 'BareNuclei': np.float64(2.2722811127379208), 'BlandChromatin': np.float64(-0.13530893118594436), 'NormalNucleoli': np.float64(-0.16785065885797948), 'Mitoses': np.float64(-0.13632796486090773)}

Solution Mixte trouvée :

[Type 1-m] Pro 'BareNuclei' (Δ=+2.2723) couvre Cons ['BlandChromatin', 'NormalNucleoli', 'Mitoses'] (Net + BIAS: +10.2098)


Le modèle a trouvé une explication de type 1-m.

BareNuclei permet de compenser les trois seuls cons. La tumeur est maligne cliniquement. Le biais favorisant le caractère malin, la décision est fortement maligne.