In [4]:
import torch
import open_clip
import numpy as np
import pandas as pd
from PIL import Image

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model, preprocess, _ = open_clip.create_model_and_transforms(
    "ViT-B-32",
    pretrained="laion2b_s34b_b79k"
)
model = model.to(device)
model.eval()

CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (patch_dropout): Identity()
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): ModuleList(
        (0-11): 12 x ResidualAttentionBlock(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ls_1): Identity()
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): GELU(approximate='none')
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ls_2): Identity()
        )
      )
    )
    (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine

In [None]:
def embed_single_image(path: str) -> np.ndarray:
    # 1) Load image as RGB
    img = Image.open(path).convert("RGB")
    
    img_tensor = preprocess(img).unsqueeze(0)  
    img_tensor = img_tensor.to(device)


    with torch.no_grad():
        emb = model.encode_image(img_tensor)   
        emb = emb.float()                    

    emb = emb / emb.norm(dim=-1, keepdim=True)
    vec = emb.squeeze(0).cpu().numpy()    
    return vec

In [None]:
from typing import List

def embed_many_images(paths: List[str], batch_size: int = 16) -> np.ndarray:
    all_vecs = []

    for i in range(0, len(paths), batch_size):
        batch_paths = paths[i:i+batch_size]
        imgs = []

        for p in batch_paths:
            img = Image.open(p).convert("RGB")
            imgs.append(preprocess(img))

        batch_tensor = torch.stack(imgs, dim=0).to(device)  # (B, 3, H, W)

        with torch.no_grad():
            emb = model.encode_image(batch_tensor).float()  # (B, D)
            emb = emb / emb.norm(dim=-1, keepdim=True)      # normalize per row

        all_vecs.append(emb.cpu().numpy())

    return np.vstack(all_vecs)


In [None]:

df = pd.read_csv("/Users/alakarthika/Documents/Personal_Projects/WeakHero/assets/files/images.csv")   
paths = df["path"].tolist()
E = embed_many_images(paths)




In [None]:
import os
os.makedirs("/Users/alakarthika/Documents/Personal_Projects/WeakHero/assets/files/embeddings", exist_ok=True)
np.save("/Users/alakarthika/Documents/Personal_Projects/WeakHero/assets/files/embeddings/images.npy", E)

In [15]:
import json

In [13]:
tokenizer = open_clip.get_tokenizer("ViT-B-32")

In [10]:
def encode_text_list(texts, model, tokenizer, device="cpu"):
    with torch.no_grad():
        tokens = tokenizer(texts).to(device)
        emb = model.encode_text(tokens).float()
        emb = emb / emb.norm(dim=-1, keepdim=True)
    return emb.cpu().numpy()

In [11]:
def embed_characters(df):
    out = {}
    for _, row in df.iterrows():
        prompts = [str(row[c]) for c in df.columns if c.startswith("prompt_") and pd.notna(row[c])]
        embs = encode_text_list(prompts, model, tokenizer, device)
        avg = embs.mean(axis=0)
        avg /= np.linalg.norm(avg)
        out[row["id"]] = avg.tolist()
    return out

In [16]:
chars = pd.read_csv("/Users/alakarthika/Documents/Personal_Projects/WeakHero/characters.csv")
char_embeds = embed_characters(chars)

with open("/Users/alakarthika/Documents/Personal_Projects/WeakHero/assets/files/embeddings/char_embeds.json","w") as f:
    json.dump(char_embeds,f)


In [17]:
labels = pd.read_csv("/Users/alakarthika/Documents/Personal_Projects/WeakHero/explain_labels.csv")
label_vecs = encode_text_list(labels["label"].tolist(), model, tokenizer, device)

label_embeds = {lab: vec.tolist() for lab, vec in zip(labels["label"], label_vecs)}
with open("/Users/alakarthika/Documents/Personal_Projects/WeakHero/assets/files/embeddings/label_embeds.json","w") as f:
    json.dump(label_embeds,f)