In [106]:
from diff_exp.data.attribute_celeba_dataset import Dataset, default_args
from omegaconf import OmegaConf
from tqdm import tqdm

In [107]:
def get_dataset(split):
    # Blond hair
    args = OmegaConf.create(default_args())
    args.target_attr = "Blond_Hair"
    args.data_dir = "../data"
    args.split = split
    
    blond_dataset = Dataset(**args)
    blond_idxs = [idx for idx, (x,y) in enumerate(tqdm(blond_dataset)) if int(y) == 1]
    print("Num blond:", len(blond_idxs))

    # Black hair
    args = OmegaConf.create(default_args())
    args.target_attr = "Black_Hair"
    args.data_dir = "../data"
    args.split = split
    black_hair_dataset = Dataset(**args)
    
    black_hair_idxs = [idx for idx, (x, y) in enumerate(tqdm(black_hair_dataset)) if int(y) == 1]
    print("Num black:", len(black_hair_idxs))

    both = set(blond_idxs).intersection(set(black_hair_idxs))

    out = set()
    out.update(blond_idxs)
    out.update(black_hair_idxs)
    out = out - both

    out = list(out)
    out.sort()

    return out

In [108]:
args = OmegaConf.create(default_args())
args.target_attr = "Blond_Hair"
args.data_dir = "../data"
args.split = "train"
blond_dataset = Dataset(**args)


In [109]:
train_set = get_dataset("train")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 162770/162770 [00:04<00:00, 33667.96it/s]


Num blond: 24267


100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 162770/162770 [00:04<00:00, 33578.45it/s]

Num black: 38906





In [111]:
valid_set = get_dataset("valid")

100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 19867/19867 [00:01<00:00, 10977.69it/s]


Num blond: 3056


100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 19867/19867 [00:00<00:00, 33607.28it/s]

Num black: 4144





In [112]:
with open("blond_black_hair_train.txt", "w") as f:
    lines = [str(x) for x in train_set]
    lines = "\n".join(lines)
    f.write(lines)

In [113]:
with open("blond_black_hair_valid.txt", "w") as f:
    lines = [str(x) for x in valid_set]
    lines = "\n".join(lines)
    f.write(lines)

## Filter with eval classifier

In [39]:
import dlib
import cv2
import yaml
import numpy as np
from PIL import Image
from diff_exp.data.attribute_celeba_dataset import default_args, Dataset, _CELEBA_ATTRS
from omegaconf import OmegaConf
from diff_exp.transforms_utils import get_transform
import yaml
from tqdm import tqdm
from diff_exp.utils import TransformDataset, tensor2pil
from torchvision import transforms as tr
import torch as th
from einops import rearrange
from tqdm import trange, tqdm
from matplotlib import pyplot as plt

import sys
import torch
import facer

In [42]:
from diff_exp.data.attribute_celeba_dataset import default_args, Dataset, _CELEBA_ATTRS

args = default_args()
args = OmegaConf.create(args)
# train
args.data_dir = "../data"
args.target_attr = "Black_Hair"
args.filter_path = "blond_black_hair_train.txt"

print(OmegaConf.to_yaml(args))
dataset = Dataset(**args)

target_attr: Black_Hair
data_dir: ../data
split: train
filter_path: blond_black_hair_train.txt



In [41]:
from diff_exp.data.attribute_celeba_dataset import default_args, Dataset, _CELEBA_ATTRS

args = default_args()
args = OmegaConf.create(args)
# valid
args.data_dir = "../data"
args.target_attr = "Black_Hair"
args.filter_path = "blond_black_hair_valid.txt"
args.split = "valid"

print(OmegaConf.to_yaml(args))
dataset = Dataset(**args)

target_attr: Black_Hair
data_dir: ../data
split: valid
filter_path: blond_black_hair_valid.txt



In [43]:
transform_str = """
- - to_tensor
- - center_crop
  - size: 178
- - resize
  - size: 64
- - normalize
  - mean: 0.5, 0.5, 0.5
  - std: 0.5, 0.5, 0.5
""".strip()
transform = yaml.load(transform_str, yaml.Loader)
transform = OmegaConf.create(transform)
transform = get_transform(transform)

dataset = Dataset(**args)
dataset = TransformDataset(dataset, transform)

In [45]:
# calibrated_1.245.pt
import torch as th
from diff_exp.models.efficientnet import get_model, default_args
import yaml
from diff_exp.transforms_utils import get_transform
from diff_exp.utils import TransformDataset

model_args = default_args()
model_args = OmegaConf.create(model_args)
print(model_args)
ckpt_path = "/home/anon/artifacts/test_classifiers/blond_black_hair_64x64_cls_calibrated.pt"
model = get_model(model_args)
ckpt = th.load(ckpt_path, map_location="cpu")
model.load_state_dict(ckpt['state_dict'])

{'size': 's', 'num_classes': 2}


<All keys matched successfully>

In [27]:
dataset = Dataset(
    target_attr="Black_Hair",
    data_dir="../data",
    split="train",
    filter_path="/home/anon/Documents/DiffusionExtrapolation-code/diff_exp/data/blond_black_hair_extreme_ensemble/train.txt"
)
transform_str = """
- - to_tensor
- - center_crop
  - size: 178
- - resize
  - size: 64
- - normalize
  - mean: 0.5, 0.5, 0.5
  - std: 0.5, 0.5, 0.5
""".strip()
transform = yaml.safe_load(transform_str)
transform = OmegaConf.create(transform)
transform = get_transform(transform)

dataset = TransformDataset(dataset, transform)

In [20]:
import torch

In [46]:
all_preds = []
device = "cuda:0"

loader = th.utils.data.DataLoader(dataset, pin_memory=True, batch_size=64, shuffle=False, drop_last=False, num_workers=10)
model.eval()
model = model.to(device)
for x, y in tqdm(loader):
    x = x.to(device)
    with torch.no_grad():
        out = model(x).cpu()
    all_preds.append(out)

all_preds = th.cat(all_preds).softmax(-1)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 988/988 [00:07<00:00, 125.85it/s]


## Send through farl for detecting attributes (unsuccessful)

In [50]:
from diff_exp.data.attribute_celeba_dataset import Dataset, default_args as data_args, _CELEBA_ATTRS
from diff_exp.utils import mkdirs4file, TransformDataset
from diff_exp.transforms_utils import get_transform
from omegaconf import OmegaConf
from tqdm import tqdm
import os.path as osp
from PIL import Image
from diff_exp.models.efficientnet import default_args as model_args, get_model
import torch as th
from torch.utils.data import DataLoader
import numpy as np
import yaml
from matplotlib import pyplot as plt
import torchvision

import sys
import torch
import facer
from einops import rearrange
import torchvision.transforms as tr

In [51]:
def get_attributes(loader, face_detector, face_attr):
    # Age attribute
    predicted_attributes = []
    all_indices = []
    all_labels = []
    #    labels = face_attr.labels
    idx_start = 0
    for batch, y in tqdm(loader, desc="loading"):
        batch = batch.to(device)
        indices = th.arange(idx_start, idx_start + len(batch))
        idx_start += len(batch)
        
        with torch.inference_mode():
            faces = face_detector(batch)
    
        with torch.inference_mode():
            faces = face_attr(batch, faces)
    
    
        face_attrs = faces["attrs"].cpu()
        indices = indices[faces["image_ids"].cpu()]
        y = y[faces["image_ids"].cpu()]
        
        predicted_attributes.append(face_attrs)
        all_indices.append(indices)
        all_labels.append(y)

    predicted_attributes = th.cat(predicted_attributes)
    all_indices = th.cat(all_indices)
    all_labels = th.cat(all_labels)

    return predicted_attributes, all_indices, all_labels
        

In [52]:
ds_args = data_args()
ds_args = OmegaConf.create(ds_args)
ds_args.data_dir = "../data"
ds_args.filter_path = "blond_black_hair_train.txt"
ds_args.target_attr = "Black_Hair"
dataset = Dataset(**ds_args)

In [53]:
n_blond_hair = sum(1 for x, y in dataset if int(y) == 0)
n_black_hair = sum(1 for x, y in dataset if int(y) == 1)

In [54]:
print("Blond hair:", n_blond_hair)
print("Black hair:", n_black_hair)
print("Ratio blond hair:", n_blond_hair / (n_blond_hair + n_black_hair))

Blond hair: 24265
Black hair: 38904
Ratio blond hair: 0.3841282907755386


In [55]:
def transform(x):
    x = np.array(x)
    x = th.tensor(x)
    x = rearrange(x, "h w c -> c h w")
    return x

transformed_dataset = TransformDataset(dataset, transform)
len(dataset)
loader = th.utils.data.DataLoader(transformed_dataset, batch_size=64, shuffle=False, drop_last=False, pin_memory=True)

In [56]:
device = "cuda:1"
face_detector = facer.face_detector("retinaface/mobilenet", device=device)
face_attr = facer.face_attr("farl/celeba/224", device=device)

In [57]:
celeba_attributes, celeba_indices, celeba_labels = get_attributes(loader, face_detector, face_attr)

loading: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 988/988 [02:36<00:00,  6.30it/s]


In [58]:
young_idx = _CELEBA_ATTRS.index("Young")
smiling_idx = _CELEBA_ATTRS.index("Smiling")
bald_idx = _CELEBA_ATTRS.index("Bald")
blond_hair_idx = _CELEBA_ATTRS.index("Blond_Hair")
black_hair_idx = _CELEBA_ATTRS.index("Black_Hair")

In [59]:
blond_idxs = []
for idx, pred, label in zip(celeba_indices, celeba_attributes, celeba_labels):
    if pred[blond_hair_idx].item() > 0.9 and label.item() == 0:
        blond_idxs.append(int(idx))

In [60]:
black_hair_idxs = []
for idx, pred, label in zip(celeba_indices, celeba_attributes, celeba_labels):
    if pred[black_hair_idx].item() > 0.5 and label.item() == 1:
        black_hair_idxs.append(int(idx))

In [61]:
print("Blond hair (FaRL):", len(blond_idxs))
print("Black hair (FaRL):", len(black_hair_idxs))

Blond hair (FaRL): 13049
Black hair (FaRL): 1681


In [63]:
len(black_hair_idxs)

1681

In [83]:
low_prob_black_idxs = [idx for idx in black_hair_idxs if celeba_attributes[idx][black_hair_idx].item() < 0.001 ]

In [87]:
low_prob_blond_idxs = [idx for idx in blond_idxs if celeba_attributes[idx][blond_hair_idx].item() < 0.5 ]

In [88]:
len(low_prob_black_idxs)

223

In [89]:
len(low_prob_blond_idxs)

8200

In [103]:
faulty_blond_idxs = list(range(8))
faulty_black_hair_idxs = list(range(8))

In [104]:
for idx in faulty_black_hair_idxs:
    idx = low_prob_black_idxs[idx]
    img = dataset[idx][0]
    save_path = f"faulty_black_hair/{idx}.png"
    mkdirs4file(save_path)
    img.save(save_path)
    

In [105]:
for idx in faulty_blond_idxs:
    idx = low_prob_blond_idxs[idx]
    img = dataset[idx][0]
    save_path = f"faulty_blond_hair/{idx}.png"
    mkdirs4file(save_path)
    img.save(save_path)
    

In [None]:
for idx in range(100):
    _ = idx
    idx = low_prob_black_idxs[idx]
    img = dataset[idx][0]
    display(dataset[idx][0])
    print(celeba_attributes[idx][black_hair_idx].item())
    print(_)
    