In [None]:
import torch
import clip
from PIL import Image

In [None]:
import os
import numpy as np
import itertools
from scipy.spatial import distance

In [None]:
DATA_PATH = 'img/gender-science'

## Load the CLIP model

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14", device=device)

## Load image generations

In [None]:
def load_image_asset(A='male', X='science'):
    path = os.path.join(DATA_PATH, f'{A}-{X}')
    images = [preprocess(Image.open(os.path.join(path, f))).to(device) for f in os.listdir(path)]
    images = torch.stack(images)
    with torch.no_grad():
        image_feature = model.encode_image(images)
    return image_feature.cpu().numpy()

In [None]:
S_n = load_image_asset(A='neutral', X='science')
S_a = load_image_asset(A='male', A='science')
S_b = load_image_asset(A='female', A='science')
A_n = load_image_asset(A='neutral', A='art')
A_a = load_image_asset(A='male', A='art')
A_b = load_image_asset(A='female', X='art')

## Compute association measures

In [None]:
def compute_pairwise_associate_score(S1, S2):
    scores = []
    for i, j in itertools.product(np.arange(S1.shape[0]), np.arange(S2.shape[0])):
        s = distance.cosine(S1[i], S2[j])
        scores.append(s)
    return np.array(scores).mean()

In [None]:
def get_associate_scores(S1, S2, num_images_per_prompt=5):
    # S2 should be larger than S1
    scale = S2.shape[0] // S1.shape[0]
    
    scores = []
    for i in range(0, S1.shape[0], num_images_per_prompt):
        j = i + num_images_per_prompt
        asc = compute_pairwise_associate_score(S1[i:j], S2[i*scale:j*scale])
        scores.append(asc)
    
    return np.array(scores)

In [None]:
def get_differential_association(X_n, X_a, X_b, Y_n, Y_a, Y_b):
    
    X_pos = get_associate_scores(X_n, X_a)
    X_neg = get_associate_scores(X_n, X_b)

    Y_pos = get_associate_scores(Y_n, Y_a)
    Y_neg = get_associate_scores(Y_n, Y_b)
    
    print(X_pos.mean(), X_neg.mean(), Y_pos.mean(), Y_neg.mean())
    
    return (X_pos.mean() - X_neg.mean()) - (Y_pos.mean() - Y_neg.mean())
    

In [None]:
# differential association
get_differential_association(S_n, S_a, S_b, A_n, A_a, A_b)

In [None]:
def get_effect_size(X_n, X_a, X_b, Y_n, Y_a, Y_b):
    X_pos = get_associate_scores(X_n, X_a)
    X_neg = get_associate_scores(X_n, X_b)
    X = X_pos - X_neg
    
    Y_pos = get_associate_scores(Y_n, Y_a)
    Y_neg = get_associate_scores(Y_n, Y_b)
    Y = Y_pos - Y_neg
    
    return (X.mean() - Y.mean()) / np.concatenate((X, Y)).std()

In [None]:
# effect size
get_effect_size(S_n, S_a, S_b, A_n, A_a, A_b)