In [None]:
import numpy as np
import pandas as pd
import sklearn.feature_selection as fs
import torch
import clip
from helpers import *
import debias_clip as dclip

## Learning MI for features as described in "Are Gender-Neutral Queries Really Gender-Neutral? Mitigating Gender Bias in Image Search" (Wang et al., 2021)

def calc_feature_MI(features, labels, n_neighbors = 10, rs=1):
    return fs.mutual_info_classif(features, labels, discrete_features=False, copy=True, n_neighbors=n_neighbors, random_state=rs)

def return_feature_MI_order(features, data, sensitive_attributes, n_neighbors = 10, rs=1):
    labels = data[sensitive_attributes].apply(lambda x: ' '.join(x), axis=1) 
    print(labels)
    feature_MI = calc_feature_MI(features, labels, n_neighbors, rs)
    print(feature_MI)
    feature_order = np.argsort(feature_MI)[::-1]
    print(feature_MI[feature_order])
    return feature_order

In [None]:
data = pd.read_csv('datasets/celeba/list_attr_celeba.csv')
data = data.astype('string')
print(data.head())


MIclip_training = data.iloc[0:20000]
MIclip_training.to_csv('datasets/celeba/MIclip_training.csv')
img_testing = data.iloc[20000:35000]
img_testing.to_csv('datasets/celeba/alg_testing.csv')


In [None]:
prefix = 'datasets/celeba/img_align_celeba/'
data['relative_path'] = prefix + data['image_id']    

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device)

batch_size = 200

if True:
    features = process_images(model, preprocess, data['relative_path'], batch_size=batch_size)

np.save('datasets/celeba/features.npy', features)


In [None]:
prefix = 'datasets/celeba/img_align_celeba/'
data['relative_path'] = prefix + data['image_id']    

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device)

device_d = "cuda" if torch.cuda.is_available() else "cpu"
model_debias, preprocess_debias = dclip.load("ViT-B/16-gender", device_d)

batch_size = 200

features_debias = process_images(model_debias, preprocess_debias, data['relative_path'], batch_size=batch_size, device=device_d)

device_d = 'cpu'
model_debias, preprocess_debias = dclip.load("ViT-B/16-gender", device_d)

np.save('datasets/celeba/features_debias.npy', features_debias)

In [None]:
MIfeatures = np.load('datasets/celeba/features.npy')[0:20000]
gender_MI_order = return_feature_MI_order(MIfeatures, MIclip_training, ['Male'])
skintone_MI_order = return_feature_MI_order(MIfeatures, MIclip_training, ['Pale_Skin'])
age_MI_order = return_feature_MI_order(MIfeatures, MIclip_training, ['Young'])
gender_skintone_intersectional_MI_order = return_feature_MI_order(MIfeatures, MIclip_training, ['Male', 'Pale_Skin'])
intersectional_MI_order = return_feature_MI_order(MIfeatures, MIclip_training, ['Pale_Skin', 'Male', 'Young'])
additional_concepts_MI_order = return_feature_MI_order(MIfeatures, MIclip_training, ['Pale_Skin', 'Male', 'Young', 'Attractive'])
                                                       
np.save('datasets/MI_orders/gender.npy', gender_MI_order)
np.save('datasets/MI_orders/skintone.npy', skintone_MI_order)
np.save('datasets/MI_orders/age.npy', age_MI_order)
np.save('datasets/MI_orders/gender_skintone.npy', gender_skintone_intersectional_MI_order)
np.save('datasets/MI_orders/intersectional.npy', intersectional_MI_order)
np.save('datasets/MI_orders/additional_concepts.npy', additional_concepts_MI_order)


In [None]:
print(age_MI_order)