In [16]:
import torch
import clip
from PIL import Image
import logging
import pandas as pd
import glob 
import os
from io import BytesIO
from PIL import UnidentifiedImageError
import subprocess
from weat.test import Test
import torch
import clip
# device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device("cuda:2")
print(device)
model, preprocess = clip.load("ViT-L/14", device=device)

cuda:2


In [17]:
class dwebpException(Exception):
    pass

def dwebp(file: str):
    webp = subprocess.run(
        f"dwebp  {file} -quiet -o -", shell=True, capture_output=True
    )
    if webp.returncode != 0:
        raise dwebpException(webp.stderr.decode())
    else:
        return Image.open(BytesIO(webp.stdout))

def load_dir(path):
    tmp = []
    
    for file in glob.glob(path):
        # 파일 확장자가 .json이면 무시
        if os.path.splitext(file)[1].lower() == ".json":
            continue

        with torch.no_grad():
            try:
                img = Image.open(file)
            except UnidentifiedImageError:
                if os.path.splitext(file)[1].lower() == ".webp":
                    img = dwebp(file)
                  
                else:
                    raise
            prep = preprocess(img).unsqueeze(0).to(device)
            emb = model.encode_image(prep)
            tmp.append(emb.cpu())
    return tmp

In [18]:
def ieat_calc(X_Image_Paths, Y_Image_Paths, A_texts, B_texts, Names):
    df = pd.DataFrame(columns=['Name', 'X', 'Y', 'A', 'B', 'n_t', 'n_a', 'p_i', 'd_i'])
    for i in range(len(Names[0])):
        X_image = torch.cat(load_dir(f'{X_Image_Paths[i]}*'))
        Y_image = torch.cat(load_dir(f'{Y_Image_Paths[i]}*'))

        with torch.no_grad():
            A_text = model.encode_text(A_texts[i]).to("cpu")
            B_text = model.encode_text(B_texts[i]).to("cpu")
                
        test = Test(X_image, Y_image, A_text, B_text)
        out = test.run()
        result = {
            'Name': Names[0][i],
            'X': Names[1][i],
            'Y': Names[2][i],
            'A': Names[3][i],
            'B': Names[4][i],
            'n_t': X_image.shape[0],
            'n_a': A_text.shape[0], 
            'p_i': out[1],
            'd_i': out[0]
        }
        df = df.append(result, ignore_index=True)
    return df

In [30]:
import json
import torch
import clip

def load_caption(load_dir, model, device):
    # 1. JSON 파일 읽기
    with open(load_dir+'captions.json', 'r') as f:
        data = json.load(f)

    # 2. "caption" 키의 값만 추출
    captions = [item['caption'] for item in data]

    return model.encode_text(clip.tokenize(captions).to(device))


### Naive image-caption bias

In [39]:
X_image = torch.cat(load_dir('./gender/male/*'))
Y_image = torch.cat(load_dir('./gender/female/*'))

A_text = load_caption('./gender/science/', model, device).detach().to("cpu")
B_text = load_caption('./gender/liberal-arts/', model, device).detach().to("cpu")

test = Test(X_image, Y_image, A_text, B_text)
out = test.run()

df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Science', 'B': 'Arts', 'n_t': X_image.shape[0], 'n_a': A_text.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

09/07 08:48:17 PM: Computing cosine similarities...
09/07 08:48:17 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
09/07 08:48:17 PM: Computing pval...
09/07 08:48:17 PM: Using non-parametric test
09/07 08:48:17 PM: Drawing 9999 samples (and biasing by 1)
09/07 08:48:17 PM: pval: 0.3603
09/07 08:48:17 PM: computing effect size...
09/07 08:48:17 PM: esize: 0.0805895


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Gender,Male,Female,Science,Arts,40,21,0.3603,0.08059


### Swapped image-caption bias

In [33]:
# X_image = torch.cat(load_dir('./gender/male/*'))
# Y_image = torch.cat(load_dir('./gender/female/*'))

# A_text = load_caption('./gender/science/', model, device).detach().to("cpu")
# B_text = load_caption('./gender/liberal-arts/', model, device).detach().to("cpu")

X_image = torch.cat(load_dir('./gender/science/*'))
Y_image = torch.cat(load_dir('./gender/liberal-arts/*'))

A_text = load_caption('./gender/male/', model, device).detach().to("cpu")
B_text = load_caption('./gender/female/', model, device).detach().to("cpu")

test = Test(X_image, Y_image, A_text, B_text)
out = test.run()

df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Science', 'B': 'Arts', 'n_t': X_image.shape[0], 'n_a': A_text.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

09/07 08:39:00 PM: Computing cosine similarities...
09/07 08:39:00 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
09/07 08:39:00 PM: Computing pval...
09/07 08:39:00 PM: Using non-parametric test
09/07 08:39:00 PM: Drawing 9999 samples (and biasing by 1)
09/07 08:39:00 PM: pval: 0.0007
09/07 08:39:00 PM: computing effect size...
09/07 08:39:00 PM: esize: 0.932319


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Gender,Male,Female,Science,Arts,21,40,0.0007,0.932319


### FarconVAE Neutralized image-caption bias

In [38]:
import torch
# X_image = torch.cat(load_dir('./gender/male/*'))
# Y_image = torch.cat(load_dir('./gender/female/*'))

# A_text = load_caption('./gender/science/', model, device).detach().to("cpu")
# B_text = load_caption('./gender/liberal-arts/', model, device).detach().to("cpu")

X_image = torch.cat(load_dir('./gender/science/*'))
Y_image = torch.cat(load_dir('./gender/liberal-arts/*'))

# A_text = load_caption('./gender/male/', model, device).detach().to("cpu")
# B_text = load_caption('./gender/female/', model, device).detach().to("cpu")
A_text = torch.load('/data1/bubble3jh/farcon/git_FarconVAE/neut_embeddings/imgs/gender/male_ViT-L14.pt').detach().to("cpu")
B_text = torch.load('/data1/bubble3jh/farcon/git_FarconVAE/neut_embeddings/imgs/gender/female_ViT-L14.pt').detach().to("cpu")


test = Test(X_image, Y_image, A_text, B_text)
out = test.run()

df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Science', 'B': 'Arts', 'n_t': X_image.shape[0], 'n_a': A_text.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

09/07 08:44:13 PM: Computing cosine similarities...
09/07 08:44:13 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
09/07 08:44:14 PM: Computing pval...
09/07 08:44:14 PM: Using non-parametric test
09/07 08:44:14 PM: Drawing 9999 samples (and biasing by 1)
09/07 08:44:14 PM: pval: 0.2102
09/07 08:44:14 PM: computing effect size...
09/07 08:44:14 PM: esize: 0.256716


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Gender,Male,Female,Science,Arts,21,21,0.2102,0.256716


In [15]:
X_image = torch.cat(load_dir('./ieat/data/experiments/gender/male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/gender/female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/engineering/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/care/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Engineering', 'B': 'Caregiving', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

RuntimeError: torch.cat(): expected a non-empty list of Tensors

In [8]:
X_image = torch.cat(load_dir('./gender/male/*'))
Y_image = torch.cat(load_dir('./gender/female/*'))
A_image = torch.cat(load_dir('./gender/career/*'))
B_image = torch.cat(load_dir('./gender/family/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Career', 'B': 'Family', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

NameError: name 'preprocess' is not defined

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american-male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american-female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/science/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/liberal-arts/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'African-American', 'Y': 'European-American', 'A': 'Science', 'B': 'Arts', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american-male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american-female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/engineering/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/care/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'African-American', 'Y': 'European-American', 'A': 'Engineering', 'B': 'Caregiving', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american-male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american-female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/career/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/family/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'African-American', 'Y': 'European-American', 'A': 'Career', 'B': 'Family', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/arab-muslim/other-people/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/arab-muslim/arab-muslim/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/valence/pleasant/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/valence/unpleasant/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'other-people', 'Y': 'arab-muslim', 'A': 'pleasant', 'B': 'unpleasant', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/valence/pleasant/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/valence/unpleasant/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'european-american', 'Y': 'african-american', 'A': 'pleasant', 'B': 'unpleasant', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/asian/european-american/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/asian/asian-american/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/valence/pleasant/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/valence/unpleasant/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'european-american', 'Y': 'asian-american', 'A': 'pleasant', 'B': 'unpleasant', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/weapon/white/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/weapon/black/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/valence/pleasant/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/valence/unpleasant/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'white', 'Y': 'black', 'A': 'pleasant', 'B': 'unpleasant', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/weapon/white/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/weapon/black/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/weapon/tool-modern/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/weapon/weapon-modern/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'white', 'Y': 'black', 'A': 'tool', 'B': 'weapon', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/weapon/white/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/weapon/black/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/weapon/tool/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/weapon/weapon/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'white', 'Y': 'black', 'A': 'tool', 'B': 'weapon', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/skin-tone/light/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/skin-tone/dark/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/weapon/tool/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/weapon/weapon/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'white', 'Y': 'black', 'A': 'tool', 'B': 'weapon', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

In [None]:
X_image = torch.cat(load_dir('./ieat/data/experiments/sexuality/straight/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/sexuality/gay/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/valence/pleasant/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/valence/unpleasant/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'straight', 'Y': 'gay', 'A': 'pleasant', 'B': 'unpleasant', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df