In [None]:
from google.cloud import storage
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
from io import BytesIO
import pandas as pd

# 1. Load CLIP model
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", use_safetensors=True)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 2. Init GCS client + bucket
client = storage.Client()
bucket_name = "swipe-bucket"
bucket = client.bucket(bucket_name)

# 3. Candidate labels
labels = ["a photo of nature", "a photo of a city", "an abstract photo"]

results = []

# 4. Loop through ALL images in bucket
for blob in bucket.list_blobs():
    if not blob.name.endswith((".jpg", ".jpeg", ".png")):
        continue

    img_bytes = blob.download_as_bytes()
    image = Image.open(BytesIO(img_bytes)).convert("RGB")

    # Run through CLIP
    inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    probs = outputs.logits_per_image.softmax(dim=1)[0].tolist()

    # Get top prediction
    top_idx = torch.argmax(torch.tensor(probs)).item()
    predicted_type = labels[top_idx].replace("a photo of ", "").replace("an ", "")

    # Build public URL
    image_url = f"https://storage.googleapis.com/{bucket_name}/{blob.name}"

    results.append({
        "image_id": f"<a href='{image_url}' target='_blank'>{blob.name}</a>",
        "predicted_type": predicted_type
    })

# 5. Build DataFrame
df = pd.DataFrame(results)

# 6. Drop all abstract images
filtered_df = df[df["predicted_type"] != "abstract"]

# 7. Save HTML with clickable links
filtered_df.to_html("filtered_images.html", escape=False, index=False)

# 8. Upload HTML back to GCS
out_blob = bucket.blob("outputs/filtered_images.html")
out_blob.upload_from_filename("filtered_images.html")

print("Clickable filtered results uploaded to:")
print(f"https://storage.googleapis.com/{bucket_name}/outputs/filtered_images.html")


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
