In [2]:
CLARIFAI_PAT = "288150d2774a422ea6163e6ce152e85f"
API_BASE = "https://api.clarifai.com/v2/models"
TEXT_MODEL_ID = "multilingual-multimodal-clip-embed"
IMAGE_MODEL_ID = "image-embedder-clip"


In [3]:
import requests

# Test Clarifai PAT
headers = {
    "Authorization": f"Key {CLARIFAI_PAT}"
}

response = requests.get(API_BASE, headers=headers)

print("Status code:", response.status_code)
print("First 300 characters of response:")
print(response.text[:300])


Status code: 200
First 300 characters of response:
{"status":{"code":10000,"description":"Ok","req_id":"6211ed362feb451fbc9cd0c481080076"},"models":[{"id":"Devstral-Small-2505-unsloth-bnb-4bit","name":"Devstral-Small-2505-unsloth-bnb-4bit","created_at":"2025-06-27T08:33:31.434848Z","modified_at":"2025-09-04T19:39:24.780811Z","app_id":"completion","m


In [4]:
import requests

headers = {"Authorization": f"Key {CLARIFAI_PAT}"}
resp = requests.get(API_BASE, headers=headers)

models = resp.json().get("models", [])
for m in models:
    if "clip" in m["id"].lower():   # look for models with 'clip' in their ID
        print(m["id"])

image-embedder-clip
CLIP-ViT-L-14-DataComp-XL-s13B-b90K
general-english-image-caption-clip
multilingual-multimodal-clip-embed


In [5]:
import json, requests

prompt = "neon cyberpunk street at night, rain"

url = f"{API_BASE}/{TEXT_MODEL_ID}/outputs"
headers = {
    "Authorization": f"Key {CLARIFAI_PAT}",
    "Content-Type": "application/json"
}
body = {
    "user_app_id": {"user_id": "clarifai", "app_id": "main"},   # <-- key addition
    "inputs": [{"data": {"text": {"raw": prompt}}}]
}

resp = requests.post(url, headers=headers, data=json.dumps(body))
j = resp.json()
print("HTTP:", resp.status_code, "| Clarifai:", j.get("status", {}).get("description"))

# Try to pull the vector safely
outs = j.get("outputs", [])
if outs and "embeddings" in outs[0].get("data", {}):
    vec = outs[0]["data"]["embeddings"][0]["vector"]
    print("Embedding length:", len(vec))
    print("First 5 numbers:", vec[:5])
else:
    print("Details:", j)   # show exact reason if it still fails


HTTP: 503 | Clarifai: Model is deploying, this can take several minutes depending on size of model and configuration of your compute. Please try sending your request again in a few seconds with backoff.
Details: {'status': {'code': 21351, 'description': 'Model is deploying, this can take several minutes depending on size of model and configuration of your compute. Please try sending your request again in a few seconds with backoff.', 'details': 'Model is currently deploying or scaling up.', 'req_id': '30af2a9e608e424aa7e8863db26e64de'}, 'outputs': []}


In [None]:
img_url = "https://images.unsplash.com/photo-1508057198894-247b23fe5ade"

url = f"{API_BASE}/{IMAGE_MODEL_ID}/outputs"
headers = {"Authorization": f"Key {CLARIFAI_PAT}", "Content-Type": "application/json"}
body = {
    "user_app_id": {"user_id": "clarifai", "app_id": "main"},  # public model owner
    "inputs": [{"data": {"image": {"url": img_url}}}]
}

resp = requests.post(url, headers=headers, data=json.dumps(body))
j = resp.json()

outs = j.get("outputs", [])
if outs and "embeddings" in outs[0].get("data", {}):
    vec_img = outs[0]["data"]["embeddings"][0]["vector"]
    print("Image embedding length:", len(vec_img))  # e.g., 512
    print("First 5 numbers:", vec_img[:5])
else:
    print("Clarifai said:", j.get("status", {}), "\nDetails:", j)


Image embedding length: 512
First 5 numbers: [0.0013861761, 0.004325815, 0.032025, -0.0024806731, -0.01681301]


In [None]:
import numpy as np

# Convert to numpy arrays
t = np.array(vec)
i = np.array(vec_img)

# Cosine similarity = dot product / (lengths of vectors)
cos_sim = np.dot(t, i) / (np.linalg.norm(t) * np.linalg.norm(i))

print("Cosine similarity:", cos_sim)


Cosine similarity: 0.14864956524004505


In [None]:
import json, requests, numpy as np

urls = [
  "https://images.unsplash.com/photo-1508057198894-247b23fe5ade",
  "https://images.unsplash.com/photo-1519681393784-d120267933ba",
]

def embed_image(u):
    url = f"{API_BASE}/{IMAGE_MODEL_ID}/outputs"
    headers = {"Authorization": f"Key {CLARIFAI_PAT}", "Content-Type": "application/json"}
    body = {"user_app_id": {"user_id":"clarifai","app_id":"main"}, "inputs":[{"data":{"image":{"url":u}}}]}
    j = requests.post(url, headers=headers, data=json.dumps(body)).json()
    return j["outputs"][0]["data"]["embeddings"][0]["vector"]

t = np.array(vec)  # your text embedding from earlier
scores = []
for u in urls:
    i = np.array(embed_image(u))
    cos = np.dot(t, i) / (np.linalg.norm(t) * np.linalg.norm(i))
    scores.append((u, float(cos)))

# show ranked results
scores.sort(key=lambda x: x[1], reverse=True)
for u, s in scores:
    print(f"{s:.3f}  {u}")

print("\nBEST MATCH:", scores[0][0])


0.198  https://images.unsplash.com/photo-1519681393784-d120267933ba
0.149  https://images.unsplash.com/photo-1508057198894-247b23fe5ade

BEST MATCH: https://images.unsplash.com/photo-1519681393784-d120267933ba


In [1]:
urls = [
    "https://simplycanvasart.co.uk/cdn/shop/products/210132575_0_1200x1200.jpg?v=1641348073"
    "https://www.meisterdrucke.us/kunstwerke/1000px/Xavier_Sager_-_Fat_cat_wearing_monocle_alongside_various_less_favoured_pussies_-_(MeisterDrucke-400023).jpg"
    "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/i/b5cc603f-d7b7-406d-8ff0-a546ffacf6af/d5cig6x-43f61fa0-c360-4e22-90b7-2e531c87f37d.png/v1/fill/w_965,h_462,q_80,strp/humanoid_cat_with_tophat_and_monocle_by_rainbowcrash1234_d5cig6x-fullview.jpg",
    "https://www.arthipo.com/image/cache/catalog/artists-painters/l/louis-wain/lswn87-ht4535-louis-wain-cat-wearing-a-monocle-600x315w.webp",
    "https://i.pinimg.com/736x/8a/58/00/8a5800d00d6e8882b6aa94b4d0c3c23c.jpg",
]

In [6]:
import json, requests, numpy as np

prompt = "a cat wearing a top hat and a monocle"

url = f"{API_BASE}/{TEXT_MODEL_ID}/outputs"
headers = {"Authorization": f"Key {CLARIFAI_PAT}", "Content-Type": "application/json"}
body = {
    "user_app_id": {"user_id": "clarifai", "app_id": "main"},
    "inputs": [{"data": {"text": {"raw": prompt}}}]
}

j = requests.post(url, headers=headers, data=json.dumps(body)).json()
text_vec = np.array(j["outputs"][0]["data"]["embeddings"][0]["vector"])

print("Text embedding length:", len(text_vec))
print("First 5 numbers:", text_vec[:5])


Text embedding length: 512
First 5 numbers: [ 0.01978545  0.00077702 -0.06900492 -0.04456287 -0.01495147]


In [7]:
def embed_image(url_img):
    url = f"{API_BASE}/{IMAGE_MODEL_ID}/outputs"
    headers = {"Authorization": f"Key {CLARIFAI_PAT}", "Content-Type": "application/json"}
    body = {
        "user_app_id": {"user_id": "clarifai", "app_id": "main"},
        "inputs": [{"data": {"image": {"url": url_img}}}]
    }
    j = requests.post(url, headers=headers, data=json.dumps(body)).json()
    try:
        return np.array(j["outputs"][0]["data"]["embeddings"][0]["vector"])
    except:
        print("Failed on:", url_img)
        return None

# Loop through your list of URLs and get embeddings
img_vecs = []
for u in urls:
    v = embed_image(u)
    if v is not None:
        img_vecs.append((u, v))

print(f"Got embeddings for {len(img_vecs)} images")


Got embeddings for 3 images


In [8]:
good = [u for u, _ in img_vecs]
bad = [u for u in urls if u not in good]
print("Succeeded:", *good, sep="\n- ")
print("\nFailed:", *bad, sep="\n- ")


Succeeded:
- https://simplycanvasart.co.uk/cdn/shop/products/210132575_0_1200x1200.jpg?v=1641348073https://www.meisterdrucke.us/kunstwerke/1000px/Xavier_Sager_-_Fat_cat_wearing_monocle_alongside_various_less_favoured_pussies_-_(MeisterDrucke-400023).jpghttps://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/i/b5cc603f-d7b7-406d-8ff0-a546ffacf6af/d5cig6x-43f61fa0-c360-4e22-90b7-2e531c87f37d.png/v1/fill/w_965,h_462,q_80,strp/humanoid_cat_with_tophat_and_monocle_by_rainbowcrash1234_d5cig6x-fullview.jpg
- https://www.arthipo.com/image/cache/catalog/artists-painters/l/louis-wain/lswn87-ht4535-louis-wain-cat-wearing-a-monocle-600x315w.webp
- https://i.pinimg.com/736x/8a/58/00/8a5800d00d6e8882b6aa94b4d0c3c23c.jpg

Failed:


In [10]:
import numpy as np

# 1) put your text embedding into a NumPy array
t = np.array(text_vec)

# 2) cosine similarity for each (url, image_vector)
scores = []
for u, v in img_vecs:
    v = np.array(v)
    cos = np.dot(t, v) / (np.linalg.norm(t) * np.linalg.norm(v))
    scores.append((u, float(cos)))

# 3) sort best → worst and print
scores.sort(key=lambda x: x[1], reverse=True)

if not scores:
    print("No image embeddings to rank.")
else:
    for i, (u, s) in enumerate(scores, 1):
        print(f"{i}. {s:.3f}  {u}")
    print("\nBEST MATCH:", scores[0][0])

1. 0.272  https://simplycanvasart.co.uk/cdn/shop/products/210132575_0_1200x1200.jpg?v=1641348073https://www.meisterdrucke.us/kunstwerke/1000px/Xavier_Sager_-_Fat_cat_wearing_monocle_alongside_various_less_favoured_pussies_-_(MeisterDrucke-400023).jpghttps://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/i/b5cc603f-d7b7-406d-8ff0-a546ffacf6af/d5cig6x-43f61fa0-c360-4e22-90b7-2e531c87f37d.png/v1/fill/w_965,h_462,q_80,strp/humanoid_cat_with_tophat_and_monocle_by_rainbowcrash1234_d5cig6x-fullview.jpg
2. 0.267  https://i.pinimg.com/736x/8a/58/00/8a5800d00d6e8882b6aa94b4d0c3c23c.jpg
3. 0.254  https://www.arthipo.com/image/cache/catalog/artists-painters/l/louis-wain/lswn87-ht4535-louis-wain-cat-wearing-a-monocle-600x315w.webp

BEST MATCH: https://simplycanvasart.co.uk/cdn/shop/products/210132575_0_1200x1200.jpg?v=1641348073https://www.meisterdrucke.us/kunstwerke/1000px/Xavier_Sager_-_Fat_cat_wearing_monocle_alongside_various_less_favoured_pussies_-_(MeisterDrucke-400023).jpghttps://images-wix

In [None]:
# 1) Upload the JSON file from your computer
from google.colab import files, output
uploaded = files.upload()

# 2) Load JSON
import json, io
fname = next(iter(uploaded.keys()))
data = json.load(io.BytesIO(uploaded[fname]))

urls = []

def grab_links(node):
    if isinstance(node, dict):
        if isinstance(node.get("imageLinks"), list):
            for u in node["imageLinks"]:
                if isinstance(u, str): urls.append(u)
        if isinstance(node.get("image"), str):
            urls.append(node["image"])
        for v in node.values():
            grab_links(v)
    elif isinstance(node, list):
        for v in node:
            grab_links(v)

grab_links(data)

# 4) Clean up: dedupe, only http(s), keep a small set for quick testing
urls = [u for u in dict.fromkeys(urls) if u.startswith(("http://","https://"))]
print("Found image URLs:", len(urls))
for i, u in enumerate(urls[:10], 1):
    print(f"{i}. {u}")
test_urls = urls[:12]


Saving example.json to example (1).json
Found image URLs: 145
1. https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/e819d60e-3b57-49e7-b82a-8cb09afd0ea6/d34fpvw-1bf01763-2554-4339-b01f-cbb1b4580339.jpg/v1/fill/w_762,h_1049,q_70,strp/monocle_cat_by_tonedeafjeff_d34fpvw-pre.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7ImhlaWdodCI6Ijw9MTIzOCIsInBhdGgiOiJcL2ZcL2U4MTlkNjBlLTNiNTctNDllNy1iODJhLThjYjA5YWZkMGVhNlwvZDM0ZnB2dy0xYmYwMTc2My0yNTU0LTQzMzktYjAxZi1jYmIxYjQ1ODAzMzkuanBnIiwid2lkdGgiOiI8PTkwMCJ9XV0sImF1ZCI6WyJ1cm46c2VydmljZTppbWFnZS5vcGVyYXRpb25zIl19.BcR_3o1LtVh2U14GB5DOej-mQTlbiNczHv7uoqExTVE
2. https://st.deviantart.net/eclipse/popups/hover-component/2024/deviation-2x.png
3. https://st.deviantart.net/eclipse/popups/hover-component/2024/dreamup-2x.png
4. https://st.deviantart.net/eclipse/popups/hover-component/2024/status-update-2x.png
