### Notebook contains accuracy metrics for black-white groups across all emotions and individually as well.

In [1]:
import torch
from emonet import EmoNet
import os
import random
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms
import pandas as pd
random.seed(42)

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EmoNet(n_expression=8)
model.load_state_dict(torch.load("D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\\cfd_finetuned_emonet_100_epochs.pth"))
model.to(device)
model.eval()

In [3]:
device

device(type='cuda')

In [4]:
#defining image transform
cfd_transform=transforms.Compose([ transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])])

In [5]:
emotion_map = {'N': 0, 'A': 1, 'F': 2, 'HC': 3, 'HO': 3}

In [6]:
def load_images(path):
    images_list = []
    label_list = []
    filenames = [f for f in os.listdir(path) if f.endswith('.jpg')]
    
    for file in filenames:
        img_path = os.path.join(path, file)
        last_char = file[-5]
        if last_char=='C' or last_char=='O':
            last_char='H'+last_char
        label = emotion_map[last_char]
        image = Image.open(img_path).convert("RGB")
        image = cfd_transform(image)
        images_list.append(image)
        label_list.append(label)

    images_tensor = torch.stack(images_list) if images_list else torch.empty(0)
    labels_tensor = torch.tensor(label_list, dtype=torch.long) if label_list else torch.empty(0, dtype=torch.long)

    return images_tensor, labels_tensor

### loading black and white datasets

In [7]:
path_abm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\Black\Angry"
path_fbm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\Black\Fear"
path_nbm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\Black\\Neutral"
path_hcbm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\Black\Happy_ClosedMouth"
path_hobm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\Black\Happy_OpenMouth"

path_abf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\Black\Angry"
path_fbf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\Black\Fear"
path_nbf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\Black\\Neutral"
path_hcbf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\Black\Happy_ClosedMouth"
path_hobf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\Black\Happy_OpenMouth"

abm_list,abm_labels=load_images(path_abm)
fbm_list,fbm_labels=load_images(path_fbm)
nbm_list,nbm_labels=load_images(path_nbm)
hcbm_list,hcbm_labels=load_images(path_hcbm)
hobm_list,hobm_labels=load_images(path_hobm)

abf_list,abf_labels=load_images(path_abf)
fbf_list,fbf_labels=load_images(path_fbf)
nbf_list,nbf_labels=load_images(path_nbf)
hcbf_list,hcbf_labels=load_images(path_hcbf)
hobf_list,hobf_labels=load_images(path_hobf)

#we've loaded corresponding images, now stack everything to create one whole tensor for black people.

black_images = torch.cat([
    abm_list, fbm_list, nbm_list, hcbm_list, hobm_list, 
    abf_list, fbf_list, nbf_list, hcbf_list, hobf_list
], dim=0)

black_labels=torch.cat([
abm_labels,fbm_labels,nbm_labels,hcbm_labels,hobm_labels,abf_labels,fbf_labels,nbf_labels,hcbf_labels,hobf_labels
])

print("Black image tensor shape:", black_images.shape)

Black image tensor shape: torch.Size([526, 3, 256, 256])


In [8]:
path_awm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\White\Angry"
path_fwm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\White\Fear"
path_nwm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\White\\Neutral"
path_hcwm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\White\Happy_ClosedMouth"
path_howm="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Male\White\Happy_OpenMouth"

path_awf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\White\Angry"
path_fwf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\White\Fear"
path_nwf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\White\\Neutral"
path_hcwf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\White\Happy_ClosedMouth"
path_howf="D:\Integrated_gap_gradients\ig2_CNN\gpu_env_ig2\Female\White\Happy_OpenMouth"

awm_list,awm_labels=load_images(path_awm)
fwm_list,fwm_labels=load_images(path_fwm)
nwm_list,nwm_labels=load_images(path_nwm)
hcwm_list,hcwm_labels=load_images(path_hcwm)
howm_list,howm_labels=load_images(path_howm)

awf_list,awf_labels=load_images(path_awf)
fwf_list,fwf_labels=load_images(path_fwf)
nwf_list,nwf_labels=load_images(path_nwf)
hcwf_list,hcwf_labels=load_images(path_hcwf)
howf_list,howf_labels=load_images(path_howf)


white_images = torch.cat([
    awm_list, fwm_list, nwm_list, hcwm_list, howm_list, 
    awf_list, fwf_list, nwf_list, hcwf_list, howf_list
], dim=0)

white_labels=torch.cat([
awm_labels,fwm_labels,nwm_labels,hcwm_labels,howm_labels,awf_labels,fwf_labels,nwf_labels,hcwf_labels,howf_labels
])

print("white image tensor shape:", white_images.shape)

white image tensor shape: torch.Size([464, 3, 256, 256])


### loading functions for evaluation

In [9]:
#function to extract relevant emotion logits, we only need 4 instead of 8
def extract_relevant_logits(output):
    logits=output["expression"]
    relevant_logits = torch.stack([
        logits[:, 0],  # N
        logits[:, 6],  # A
        logits[:, 4],  # F
        logits[:, 1],  # (HappyClosedMouth + HappyOpenMouth)
    ], dim=1) 

    return relevant_logits

In [10]:
def predict_emonet(image_path, model):
    image = Image.open(image_path).convert("RGB")
    image = cfd_transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        predicted_label = torch.argmax(extract_relevant_logits(output), dim=1).item()
    
    return predicted_label

In [11]:
def calculate_accuracy(image_tensor, label_tensor, model):
    correct = 0
    total = image_tensor.shape[0] 
    with torch.no_grad():
        for i in range(total):
            image = image_tensor[i].unsqueeze(0).to(device)
            true_label = label_tensor[i].item() 
            output = model(image)
            pred_label = torch.argmax(extract_relevant_logits(output), dim=1).item()

            if pred_label == true_label:
                correct += 1

    return correct / total * 100 if total > 0 else 0

### accuracy calculation for all black and white images

In [12]:
black_accuracy = calculate_accuracy(black_images, black_labels, model)
white_accuracy = calculate_accuracy(white_images, white_labels, model)

print(f"accuracy black imgs: {black_accuracy:.2f}%")
print(f"accuracy white imgs: {white_accuracy:.2f}%")

accuracy black imgs: 91.83%
accuracy white imgs: 90.95%


### accuracy calculation for all black and white angry images

In [13]:
black_angry_images = torch.cat([
    abm_list, abf_list
], dim=0)

black_angry_labels=torch.cat([
abm_labels,abf_labels
])

print("Black angry image tensor shape:", black_angry_images.shape)

Black angry image tensor shape: torch.Size([82, 3, 256, 256])


In [14]:
white_angry_images = torch.cat([
    awm_list, awf_list
], dim=0)

white_angry_labels=torch.cat([
awm_labels,awf_labels
])

print("white angry image tensor shape:", white_angry_images.shape)

white angry image tensor shape: torch.Size([72, 3, 256, 256])


In [15]:
black_angry_accuracy = calculate_accuracy(black_angry_images, black_angry_labels, model)
white_angry_accuracy = calculate_accuracy(white_angry_images, white_angry_labels, model)

print(f"accuracy black angry imgs: {black_angry_accuracy:.2f}%")
print(f"accuracy white angry imgs: {white_angry_accuracy:.2f}%")

accuracy black angry imgs: 85.37%
accuracy white angry imgs: 86.11%


### accuracy calculation for angry b and w males

In [16]:
black_angry_male_accuracy = calculate_accuracy(abm_list, abm_labels, model)
white_angry_male_accuracy = calculate_accuracy(awm_list,awm_labels, model)

print(f"accuracy black angry male imgs: {black_angry_male_accuracy:.2f}%")
print(f"accuracy white angry male imgs: {white_angry_male_accuracy:.2f}%")

accuracy black angry male imgs: 91.43%
accuracy white angry male imgs: 85.71%


### accuracy calculation for angry b and w females

In [17]:
black_angry_female_accuracy = calculate_accuracy(abf_list, abf_labels, model)
white_angry_female_accuracy = calculate_accuracy(awf_list,awf_labels, model)

print(f"accuracy black angry female imgs: {black_angry_female_accuracy:.2f}%")
print(f"accuracy white angry female imgs: {white_angry_female_accuracy:.2f}%")

accuracy black angry female imgs: 80.85%
accuracy white angry female imgs: 86.49%


In [18]:
abf_labels

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

### accuracy calculation for all fear b and w images

In [19]:
black_fear_images = torch.cat([
    fbm_list, fbf_list
], dim=0)

black_fear_labels=torch.cat([
fbm_labels,fbf_labels
])

print("Black fear image tensor shape:", black_fear_images.shape)

Black fear image tensor shape: torch.Size([83, 3, 256, 256])


In [20]:
fbf_labels

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [21]:
white_fear_images = torch.cat([
    fwm_list, fwf_list
], dim=0)

white_fear_labels=torch.cat([
fwm_labels,fwf_labels
])

print("white fear image tensor shape:", white_fear_images.shape)

white fear image tensor shape: torch.Size([66, 3, 256, 256])


In [22]:
black_fear_accuracy = calculate_accuracy(black_fear_images, black_fear_labels, model)
white_fear_accuracy = calculate_accuracy(white_fear_images, white_fear_labels, model)

print(f"accuracy black fear imgs: {black_fear_accuracy:.2f}%")
print(f"accuracy white fear imgs: {white_fear_accuracy:.2f}%")

accuracy black fear imgs: 90.36%
accuracy white fear imgs: 81.82%


### accuracy calculation for male fear b and w images

In [23]:
black_fear_male_accuracy = calculate_accuracy(fbm_list, fbm_labels, model)
white_fear_male_accuracy = calculate_accuracy(fwm_list,fwm_labels, model)

print(f"accuracy black fear male imgs: {black_fear_male_accuracy:.2f}%")
print(f"accuracy white fear male imgs: {white_fear_male_accuracy:.2f}%")

accuracy black fear male imgs: 94.29%
accuracy white fear male imgs: 89.66%


### accuracy calculation for female fear b and w images

In [24]:
black_fear_female_accuracy = calculate_accuracy(fbf_list, fbf_labels, model)
white_fear_female_accuracy = calculate_accuracy(fwf_list,fwf_labels, model)

print(f"accuracy black fear female imgs: {black_fear_female_accuracy:.2f}%")
print(f"accuracy white fear female imgs: {white_fear_female_accuracy:.2f}%")

accuracy black fear female imgs: 87.50%
accuracy white fear female imgs: 75.68%


### all b-w neutral images accuracy calculation

In [25]:
black_neutral_images = torch.cat([nbm_list, nbf_list], dim=0)
black_neutral_labels = torch.cat([nbm_labels, nbf_labels])

white_neutral_images = torch.cat([nwm_list, nwf_list], dim=0)
white_neutral_labels = torch.cat([nwm_labels, nwf_labels])

print("Black neutral image tensor shape:", black_neutral_images.shape)
print("White neutral image tensor shape:", white_neutral_images.shape)

Black neutral image tensor shape: torch.Size([197, 3, 256, 256])
White neutral image tensor shape: torch.Size([183, 3, 256, 256])


In [26]:
black_neutral_accuracy = calculate_accuracy(black_neutral_images, black_neutral_labels, model)
white_neutral_accuracy = calculate_accuracy(white_neutral_images, white_neutral_labels, model)

print(f"Accuracy black neutral imgs: {black_neutral_accuracy:.2f}%")
print(f"Accuracy white neutral imgs: {white_neutral_accuracy:.2f}%")

Accuracy black neutral imgs: 90.36%
Accuracy white neutral imgs: 93.99%


### accuracy for neutral male b-w images

In [27]:
black_male_neutral_accuracy = calculate_accuracy(nbm_list, nbm_labels, model)
white_male_neutral_accuracy = calculate_accuracy(nwm_list, nwm_labels, model)

print(f"Accuracy black male neutral imgs: {black_male_neutral_accuracy:.2f}%")
print(f"Accuracy white male neutral imgs: {white_male_neutral_accuracy:.2f}%")

Accuracy black male neutral imgs: 93.55%
Accuracy white male neutral imgs: 96.77%


### accuracy for neutral female b-w images

In [28]:
black_female_neutral_accuracy = calculate_accuracy(nbf_list, nbf_labels, model)
white_female_neutral_accuracy = calculate_accuracy(nwf_list, nwf_labels, model)

print(f"Accuracy black female neutral imgs: {black_female_neutral_accuracy:.2f}%")
print(f"Accuracy white female neutral imgs: {white_female_neutral_accuracy:.2f}%")

Accuracy black female neutral imgs: 87.50%
Accuracy white female neutral imgs: 91.11%


### accuracy for all b-w happy images

In [29]:
black_happy_images = torch.cat([hcbm_list, hcbf_list,hobf_list,hobm_list], dim=0)
black_happy_labels = torch.cat([hcbm_labels, hcbf_labels,hobf_labels,hobm_labels])

white_happy_images = torch.cat([hcwm_list, hcwf_list,howf_list,howm_list], dim=0)
white_happy_labels = torch.cat([hcwm_labels, hcwf_labels,howf_labels,howm_labels])

print("Black happy image tensor shape:", black_happy_images.shape)
print("White happy image tensor shape:", white_happy_images.shape)

Black happy image tensor shape: torch.Size([164, 3, 256, 256])
White happy image tensor shape: torch.Size([143, 3, 256, 256])


In [30]:
black_happy_accuracy = calculate_accuracy(black_happy_images, black_happy_labels, model)
white_happy_accuracy = calculate_accuracy(white_happy_images, white_happy_labels, model)

print(f"Accuracy black happy imgs: {black_happy_accuracy:.2f}%")
print(f"Accuracy white happy imgs: {white_happy_accuracy:.2f}%")

Accuracy black happy imgs: 97.56%
Accuracy white happy imgs: 93.71%


### accuracy for b-w male happy images

In [31]:
black_happymale_images = torch.cat([hcbm_list,hobm_list], dim=0)
black_happymale_labels = torch.cat([hcbm_labels,hobm_labels])

white_happymale_images = torch.cat([hcwm_list,howm_list], dim=0)
white_happymale_labels = torch.cat([hcwm_labels,howm_labels])

print("Black happy image tensor shape:", black_happymale_images.shape)
print("White happy image tensor shape:", white_happymale_images.shape)

Black happy image tensor shape: torch.Size([68, 3, 256, 256])
White happy image tensor shape: torch.Size([71, 3, 256, 256])


In [32]:
black_happymale_accuracy = calculate_accuracy(black_happymale_images, black_happymale_labels, model)
white_happymale_accuracy = calculate_accuracy(white_happymale_images, white_happymale_labels, model)

print(f"Accuracy black happy male imgs: {black_happymale_accuracy:.2f}%")
print(f"Accuracy white happy male imgs: {white_happymale_accuracy:.2f}%")

Accuracy black happy male imgs: 98.53%
Accuracy white happy male imgs: 90.14%


### accuracy for b-w female happy images

In [33]:
black_happyfemale_images = torch.cat([hcbf_list,hobf_list], dim=0)
black_happyfemale_labels = torch.cat([hcbf_labels,hobf_labels])

white_happyfemale_images = torch.cat([hcwf_list,howf_list], dim=0)
white_happyfemale_labels = torch.cat([hcwf_labels,howf_labels])

print("Black happy image tensor shape:", black_happyfemale_images.shape)
print("White happy image tensor shape:", white_happyfemale_images.shape)

Black happy image tensor shape: torch.Size([96, 3, 256, 256])
White happy image tensor shape: torch.Size([72, 3, 256, 256])


In [34]:
black_happyfemale_accuracy = calculate_accuracy(black_happyfemale_images, black_happyfemale_labels, model)
white_happyfemale_accuracy = calculate_accuracy(white_happyfemale_images, white_happyfemale_labels, model)

print(f"Accuracy black happy female imgs: {black_happyfemale_accuracy:.2f}%")
print(f"Accuracy white happy female imgs: {white_happyfemale_accuracy:.2f}%")

Accuracy black happy female imgs: 96.88%
Accuracy white happy female imgs: 97.22%


### save to csv

In [35]:
accuracy_list=[black_accuracy, white_accuracy,black_angry_accuracy,white_angry_accuracy,black_angry_male_accuracy,white_angry_male_accuracy,black_angry_female_accuracy,white_angry_female_accuracy,black_fear_accuracy,white_fear_accuracy,black_fear_male_accuracy,white_fear_male_accuracy,black_fear_female_accuracy,white_fear_female_accuracy,black_happy_accuracy,white_happy_accuracy,black_happymale_accuracy,white_happymale_accuracy,black_happyfemale_accuracy,white_happyfemale_accuracy,black_neutral_accuracy,white_neutral_accuracy,black_male_neutral_accuracy,white_male_neutral_accuracy,black_female_neutral_accuracy,white_female_neutral_accuracy]
col_list = [
    "black_accuracy", "white_accuracy", "black_angry_accuracy", "white_angry_accuracy",
    "black_angry_male_accuracy", "white_angry_male_accuracy", "black_angry_female_accuracy", "white_angry_female_accuracy",
    "black_fear_accuracy", "white_fear_accuracy", "black_fear_male_accuracy", "white_fear_male_accuracy",
    "black_fear_female_accuracy", "white_fear_female_accuracy", "black_happy_accuracy", "white_happy_accuracy",
    "black_happymale_accuracy", "white_happymale_accuracy", "black_happyfemale_accuracy", "white_happyfemale_accuracy",
    "black_neutral_accuracy", "white_neutral_accuracy", "black_male_neutral_accuracy", "white_male_neutral_accuracy",
    "black_female_neutral_accuracy", "white_female_neutral_accuracy"
]


In [36]:
rounded_accuracy = [round(value, 3) for value in accuracy_list]

In [37]:
df=pd.DataFrame(rounded_accuracy,col_list,columns=['accuracy(%)'])
df.head()

Unnamed: 0,accuracy(%)
black_accuracy,91.825
white_accuracy,90.948
black_angry_accuracy,85.366
white_angry_accuracy,86.111
black_angry_male_accuracy,91.429


In [38]:
df.to_csv('metrics_bw.csv')

### FAIRNESS METRICS PRE-SUPPRESSION

In [39]:
from torch.utils.data import DataLoader, TensorDataset
def get_preds_in_batches_reduced(model, images, batch_size=32, device="cuda"):
    dataset = TensorDataset(images)
    loader = DataLoader(dataset, batch_size=batch_size)
    preds = []

    model.eval()
    with torch.no_grad():
        for (batch,) in loader:
            batch = batch.to(device)
            output = model(batch)
            logits = extract_relevant_logits(output)
            pred = logits.argmax(dim=1).cpu()
            preds.append(pred)

    return torch.cat(preds)

In [40]:
def compute_group_metrics(preds, labels, target_class):
    # True Positive Rate (TPR): correctly predicted as class y / all actual class y
    # False Positive Rate (FPR): predicted as class y but actual not y / all actual not y
    true_positive = ((preds == target_class) & (labels == target_class)).sum().item()
    false_positive = ((preds == target_class) & (labels != target_class)).sum().item()
    actual_positive = (labels == target_class).sum().item()
    actual_negative = (labels != target_class).sum().item()

    tpr = true_positive / actual_positive if actual_positive > 0 else 0
    fpr = false_positive / actual_negative if actual_negative > 0 else 0

    return tpr, fpr

In [41]:
def compute_equalized_odds_gap(model, black_images, black_labels, white_images, white_labels, target_class, device="cuda"):
    preds_b = get_preds_in_batches_reduced(model, black_images, device=device)
    preds_w = get_preds_in_batches_reduced(model, white_images, device=device)

    tpr_b, fpr_b = compute_group_metrics(preds_b, black_labels, target_class)
    tpr_w, fpr_w = compute_group_metrics(preds_w, white_labels, target_class)

    gap = abs(tpr_b - tpr_w) + abs(fpr_b - fpr_w)
    return gap, tpr_b,tpr_w,fpr_b,fpr_w

In [42]:
target_class = 1  # Angry is index 1 in our modified emotion set
gap1,tpr_b1,tpr_w1,fpr_b1,fpr_w1 = compute_equalized_odds_gap(model, black_images, black_labels, white_images, white_labels, target_class)
print(f"Equalized Odds Gap (Angry): {gap1:.3f}")

Equalized Odds Gap (Angry): 0.027


In [43]:
print(f"TPR Black Angry: {tpr_b1:.3f}, TPR White Angry: {tpr_w1:.3f}")
print(f"FPR Black Angry: {fpr_b1:.3f}, FPR White Angry: {fpr_w1:.3f}")

TPR Black Angry: 0.854, TPR White Angry: 0.861
FPR Black Angry: 0.014, FPR White Angry: 0.033


as observed above, the TPR for black angry and white angry images is almost similar and White individuals are more likely to be incorrectly predicted as angry, but the gap is small. therefore we observe close to no ethnic bias in the anger emotion for these 2 classes.

In [44]:
target_class = 2 #Fear is index 2
gap2,tpr_b2,tpr_w2,fpr_b2,fpr_w2 = compute_equalized_odds_gap(model, black_images, black_labels, white_images, white_labels, target_class)
print(f"Equalized Odds Gap (Fear): {gap2:.3f}")

Equalized Odds Gap (Fear): 0.114


In [45]:
print(f"TPR Black Fear: {tpr_b2:.3f}, TPR White Fear: {tpr_w2:.3f}")
print(f"FPR Black Fear: {fpr_b2:.3f}, FPR White Fear: {fpr_w2:.3f}")

TPR Black Fear: 0.904, TPR White Fear: 0.818
FPR Black Fear: 0.036, FPR White Fear: 0.008


for fear emotion across the 2 ethnicities, we observe that FPR in black images is ~ 4.5 times higher than for whites, which means more black images have been categorized as showing fear even when there is none, this could indicate some ethnic bias, as it predicts more black faces showing fear when there isn't any such thing.

In [46]:
target_class = 3 #Happy is index 3
gap3,tpr_b3,tpr_w3,fpr_b3,fpr_w3 = compute_equalized_odds_gap(model, black_images, black_labels, white_images, white_labels, target_class)
print(f"Equalized Odds Gap (Happy): {gap3:.3f}")

Equalized Odds Gap (Happy): 0.045


In [47]:
print(f"TPR Black Happy: {tpr_b3:.3f}, TPR White Happy: {tpr_w3:.3f}")
print(f"FPR Black Happy: {fpr_b3:.3f}, FPR White Happy: {fpr_w3:.3f}")

TPR Black Happy: 0.976, TPR White Happy: 0.937
FPR Black Happy: 0.022, FPR White Happy: 0.016


In [48]:
target_class = 0 #Neutral is index 0
gap0,tpr_b0,tpr_w0,fpr_b0,fpr_w0 = compute_equalized_odds_gap(model, black_images, black_labels, white_images, white_labels, target_class)
print(f"Equalized Odds Gap (Neutral): {gap0:.3f}")

Equalized Odds Gap (Neutral): 0.072


In [50]:
print(f"TPR Black N: {tpr_b3:.3f}, TPR White N: {tpr_w3:.3f}")
print(f"FPR Black N: {fpr_b3:.3f}, FPR White N: {fpr_w3:.3f}")

TPR Black N: 0.976, TPR White N: 0.937
FPR Black N: 0.022, FPR White N: 0.016
