In [21]:
import datasets
from transformers import CLIPProcessor, CLIPModel, CLIPVisionModel
import torch
import numpy as np
import os

In [22]:
torch.manual_seed(42)

<torch._C.Generator at 0x2079a7fe5b0>

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [24]:
# Load model and pre-processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
model.to(device)
vision_model = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")
vision_model.to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [25]:
age_brackets = ["0-2", "3-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "more than 70"]
age_texts = [f"A person in the {c} age group" for c in age_brackets]

In [26]:
def get_embedding_and_zs(sample):

    # Age prediction
    inputs = processor(text=age_texts, images=sample["image"], return_tensors="pt", padding=True).to(device)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image # this is the image-text similarity score
    age_pred = logits_per_image.argmax(dim=1) # we can take the argmax
    
    sample["zs_age_clip"] = [int(gp) for gp in age_pred]
    
    # Store embeddings - dim 512
    sample["proj_embeddings"] = outputs.image_embeds

    #  # Store embeddings - output of encoder, not projection - dim 768
    # inputs = processor(images=sample["image"], return_tensors="pt", padding=True).to(device)
    # outputs = vision_model(**inputs)
    # sample["vm_embeddings"] = outputs.pooler_output

    # # Reduce the age by 2
    # sample["age"] = [age - 2 for age in sample["age"]] # Since classes 0 and 1 have been deleted

    return sample

In [27]:
# Load training data
train_ds = datasets.load_dataset('HuggingFaceM4/FairFace', '1.25', split='train', verification_mode="no_checks")
train_ds = train_ds.shuffle(seed=42) #.filter(lambda sample: sample["age"] not in {0, 1}) # Filter out the first two classes
train_ds = train_ds.map(get_embedding_and_zs, batched = True, batch_size=32)

In [28]:
len(train_ds)

86744

In [29]:
train_ds[0].keys()

dict_keys(['image', 'age', 'gender', 'race', 'service_test', 'zs_age_clip', 'proj_embeddings'])

In [30]:
np.array(train_ds[0]["proj_embeddings"]).shape

(512,)

In [31]:
# np.array(train_ds[0]["vm_embeddings"]).shape

In [32]:
# Make a dir for embeddings and zs
if not os.path.exists("embeddings/"):
    os.mkdir("embeddings")

In [33]:
# Save projection embeddings
np.save("embeddings/train_project_embeddings.npy", np.array(train_ds["proj_embeddings"]))

In [34]:
# Save encoder embeddings
# np.save("embeddings/train_encoder_embeddings.npy", np.array(train_ds["vm_embeddings"]))

In [35]:
# Save age ground truth
np.save("embeddings/train_age.npy", np.array(train_ds["age"]))

In [36]:
# Save age ground truth
np.save("embeddings/train_zs_age.npy", np.array(train_ds["zs_age_clip"]))

In [37]:
# Load training data
# Load validation data and test on this
test_valid_ds = datasets.load_dataset('HuggingFaceM4/FairFace', '1.25', split="validation", verification_mode="no_checks")
test_valid_ds = test_valid_ds.shuffle(seed=42)# .filter(lambda sample: sample["age"] not in {0, 1}) # Filter out the first two classes
valid_ds = test_valid_ds.select([i for i in range(6_000)]) # Take only first 6_000 images'
test_ds = test_valid_ds.select([i for i in range(6_000, len(test_valid_ds))])
valid_ds = valid_ds.map(get_embedding_and_zs, batched = True, batch_size=16)
test_ds = test_ds.map(get_embedding_and_zs, batched = True, batch_size=16)

Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

In [None]:
# Save projection embeddings
np.save("embeddings/val_project_embeddings.npy", np.array(valid_ds["proj_embeddings"]))
# Save encoder embeddings
# np.save("embeddings/val_encoder_embeddings.npy", np.array(valid_ds["vm_embeddings"]))
# Save age ground truth
np.save("embeddings/val_age.npy", np.array(valid_ds["age"]))
# Save age ground truth
np.save("embeddings/val_zs_age.npy", np.array(valid_ds["zs_age_clip"]))

In [39]:
# Save projection embeddings
np.save("embeddings/test_project_embeddings.npy", np.array(test_ds["proj_embeddings"]))
# Save encoder embeddings
# np.save("embeddings/test_encoder_embeddings.npy", np.array(test_ds["vm_embeddings"]))
# Save age ground truth
np.save("embeddings/test_age.npy", np.array(test_ds["age"]))
# Save age ground truth
np.save("embeddings/test_zs_age.npy", np.array(test_ds["zs_age_clip"]))