In [2]:
# 加载数据集
from PIL import Image
import json
from pathlib import Path
from torchvision import transforms

dataset_dir = '/home/humw/Datasets/new-CelebA-HQ'

def load_data(data_dir, image_size=512, resample=2):
    import numpy as np
    def image_to_numpy(image):
        return np.array(image).astype(np.uint8)
    # more robust loading to avoid loaing non-image files
    images = [] 
    for i in list(Path(data_dir).iterdir()):
        if not i.suffix in [".jpg", ".png", ".jpeg"]:
            continue
        else:
            images.append(image_to_numpy(Image.open(i).convert("RGB")))
    images = [Image.fromarray(i).resize((image_size, image_size), resample) for i in images]
    images = np.stack(images)
    # from B x H x W x C to B x C x H x W
    images = torch.from_numpy(images).permute(0, 3, 1, 2).float()
    assert images.shape[-1] == images.shape[-2]
    return images

train_aug = [
        transforms.Resize(224, interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
    ]
tensorize_and_normalize = [
    transforms.Normalize([0.5*255]*3,[0.5*255]*3),
]
all_trans = train_aug + tensorize_and_normalize
all_trans = transforms.Compose(all_trans)
    
# 加载模型
from transformers.models.clip.modeling_clip import CLIPVisionModelWithProjection
import torch
import os
import torch.nn.functional as F

device = "cuda:5"
torch_dtype = torch.float16
pretrained_model_name_or_path = '/home/humw/Pretrains/clip-vit-large-patch14'
model = CLIPVisionModelWithProjection.from_pretrained(pretrained_model_name_or_path).to(device, dtype=torch_dtype).eval()
model.to(torch_dtype)
id_embeds_dict = {}
# 获取图像编码
person_id_list = sorted(os.listdir(dataset_dir))
for person_id in person_id_list:
    person_id_dir = os.path.join(dataset_dir, person_id, "set_B")
    clean_data = load_data(person_id_dir, 224, 2)
    original_data = clean_data.to(device).requires_grad_(False).to(dtype=torch_dtype)
    tran_original_data = all_trans(original_data)
    ori_embeds = model(tran_original_data, output_hidden_states=True).hidden_states[-2]
    id_embeds_dict[person_id] = ori_embeds
# 计算两两之间的编码余弦损失距离，距离越大越好
id_map_id = dict()
id_map_loss = dict()
for person_id_i in person_id_list:
    id_map_id[person_id_i] = -1
    id_map_loss[person_id_i] = -2
    for person_id_j in person_id_list:
        tmp = -F.cosine_similarity(id_embeds_dict[person_id_i], id_embeds_dict[person_id_j], -1).mean() # 越近越小，最小-1，越远越大，最大1
        if tmp > id_map_loss[person_id_i]:
            id_map_id[person_id_i] = person_id_j
            id_map_loss[person_id_i] = tmp

Some weights of the model checkpoint at /home/humw/Pretrains/clip-vit-large-patch14 were not used when initializing CLIPVisionModelWithProjection: ['text_model.encoder.layers.6.layer_norm1.weight', 'text_model.encoder.layers.6.self_attn.k_proj.bias', 'text_model.encoder.layers.11.mlp.fc2.bias', 'text_model.encoder.layers.4.self_attn.out_proj.bias', 'text_model.encoder.layers.10.mlp.fc1.bias', 'text_model.encoder.layers.9.self_attn.k_proj.weight', 'text_model.encoder.layers.5.mlp.fc1.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.7.self_attn.out_proj.weight', 'text_model.encoder.layers.6.self_attn.q_proj.weight', 'text_model.encoder.layers.2.self_attn.q_proj.bias', 'text_model.encoder.layers.3.layer_norm1.bias', 'text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.weight', 'text_model.encoder.layers.4.mlp.fc2.weight', 'text_model.encoder.layers.8.self_attn.k_proj.bias', 'text_model.encoder.layers.1

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 5 has a total capacity of 23.69 GiB of which 8.81 MiB is free. Process 1758892 has 23.68 GiB memory in use. Of the allocated memory 21.22 GiB is allocated by PyTorch, and 2.14 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# 保存json文件
dict = id_map_id
json.dump(dict,open('/data1/humw/Codes/FaceOff/max_clip_cosine_distance_map_new-CelebA-HQ.json','w'), indent=4)