In [1]:
import os
import json
import torch
import requests
import numpy as np
from PIL import Image
from io import BytesIO
from transformers import SiglipProcessor, SiglipVisionModel

# === Load SigLip model ===
model_path = r"D:\images retrieve from db\siglip_model"
processor = SiglipProcessor.from_pretrained(model_path)
model = SiglipVisionModel.from_pretrained(model_path)
model.eval()

# === Function to extract embedding ===
def extract_embedding(image: Image.Image):
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

# === Fetch all items from paginated API ===
def fetch_all_items(api_url):
    all_items = []
    page = 1

    while True:
        print(f"üîÑ Fetching page {page}...")
        response = requests.get(f"{api_url}?page={page}")
        data = response.json()

        items = data.get("data", {}).get("items", [])
        if not items:
            break

        all_items.extend(items)

        total_pages = data.get("totalPages", 1)
        if page >= total_pages:
            break
        page += 1

    print(f"‚úÖ Total items fetched: {len(all_items)}")
    return all_items

# === Main logic ===
api_url = "https://clothes-server-production.up.railway.app/api/v1/items"
items = fetch_all_items(api_url)

os.makedirs("embeddings", exist_ok=True)

for item in items:
    item_id = item.get("_id")
    img_url = item.get("img")
    category = item.get("categoryField", "").strip()

    if not img_url or not category:
        print(f"[DEBUG] item_id: {item.get('_id')}, img: '{img_url}', categoryField: '{item.get('categoryField')}'")
        continue
    try:
        # Load image
        img_response = requests.get(img_url, timeout=10)
        img_response.raise_for_status()
        image = Image.open(BytesIO(img_response.content)).convert("RGB")

        # Extract embedding
        embedding = extract_embedding(image)

        # Save in correct folder
        folder = os.path.join("embeddings", category)
        os.makedirs(folder, exist_ok=True)
        save_path = os.path.join(folder, f"{item_id}.npy")
        np.save(save_path, embedding)

        print(f"‚úÖ Saved: {save_path}")
    except Exception as e:
        print(f"‚ùå Failed for item '{item.get('name')}' (ID: {item_id}): {e}")


  from .autonotebook import tqdm as notebook_tqdm


üîÑ Fetching page 1...
üîÑ Fetching page 2...
‚úÖ Total items fetched: 19
‚úÖ Saved: embeddings\Jeans\6807f88fee022d1fb845eb20.npy
‚úÖ Saved: embeddings\Jeans\6807f8c7ee022d1fb845eb2c.npy
‚úÖ Saved: embeddings\Jeans\6807f924ee022d1fb845eb37.npy
‚úÖ Saved: embeddings\Jeans\6807f957ee022d1fb845eb3a.npy
‚úÖ Saved: embeddings\Jeans\6807f9e2ee022d1fb845eb54.npy
‚úÖ Saved: embeddings\Jeans\6807fa3bee022d1fb845eb63.npy
‚úÖ Saved: embeddings\Jeans\6807fb25ee022d1fb845eb7e.npy
‚úÖ Saved: embeddings\Jeans\6807fb47ee022d1fb845eb81.npy
‚úÖ Saved: embeddings\Jeans\6807fbbbee022d1fb845eb84.npy
‚úÖ Saved: embeddings\Sweatshirts\6807fe8cee022d1fb845ebd4.npy
‚úÖ Saved: embeddings\Sweatshirts\6807ff02ee022d1fb845ebda.npy
‚úÖ Saved: embeddings\Sweatshirts\6807ff2aee022d1fb845ebdd.npy
‚úÖ Saved: embeddings\Sweatshirts\6807ff4dee022d1fb845ebe0.npy
‚úÖ Saved: embeddings\Tshirts\6807ffefee022d1fb845ebeb.npy
‚úÖ Saved: embeddings\Tshirts\6808004bee022d1fb845ec06.npy
‚úÖ Saved: embeddings\Tshirts\68080083ee0