In [1]:
import os
import pandas as pd
import torch
import clip
from PIL import Image
import shutil

# -------------------------------
BASE_DIR = r"C:/Users/priya/OneDrive/Desktop/mediaval"
CSV_PATH = os.path.join(BASE_DIR, "subset.csv")
IMAGE_DIR = os.path.join(BASE_DIR, "newsimages_25_v1.1", "newsimages")

GROUP_NAME = "ELITE_CODERS"
APPROACH_NAME = "CLIP"

# Output folder inside ZIP
OUTPUT_DIR = os.path.join(BASE_DIR, "RET_CLIP_SMALLS")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------------------------------
# Load data
df = pd.read_csv(
    CSV_PATH,
    header=None,
    names=["article_id", "article_url", "article_title", "article_tags", "image_id", "image_url"]
)
df = df.dropna(subset=['image_id', 'article_title']).copy()
df['article_tags'] = df['article_tags'].fillna("")

# -------------------------------
# Load CLIP (if you still want embeddings for later use)
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# -------------------------------
# Save images in CSV order
for idx, row in df.iterrows():
    img_path = os.path.join(IMAGE_DIR, f"{row['image_id']}.jpg")
    if not os.path.exists(img_path):
        print(f"⚠️ Missing: {img_path}")
        continue
    try:
        image = Image.open(img_path).convert("RGB")
        image = image.resize((460, 260))
        save_name = f"{row['article_id']}_{GROUP_NAME}_{APPROACH_NAME}.png"
        image.save(os.path.join(OUTPUT_DIR, save_name))
        print(f"✅ Saved {save_name}")
    except Exception as e:
        print(f"❌ Error processing {img_path}: {e}")

# -------------------------------
# Create ZIP
zip_path = os.path.join(BASE_DIR, f"{GROUP_NAME}.zip")
shutil.make_archive(os.path.join(BASE_DIR, GROUP_NAME), 'zip', OUTPUT_DIR)
print(f"✅ Created ZIP at {zip_path} with folder RET_CLIP_SMALLS inside")


✅ Saved 76_ELITE_CODERS_CLIP.png
✅ Saved 238_ELITE_CODERS_CLIP.png
✅ Saved 346_ELITE_CODERS_CLIP.png
✅ Saved 493_ELITE_CODERS_CLIP.png
✅ Saved 865_ELITE_CODERS_CLIP.png
✅ Saved 1004_ELITE_CODERS_CLIP.png
✅ Saved 1336_ELITE_CODERS_CLIP.png
✅ Saved 1817_ELITE_CODERS_CLIP.png
✅ Saved 2307_ELITE_CODERS_CLIP.png
✅ Saved 2362_ELITE_CODERS_CLIP.png
✅ Saved 2384_ELITE_CODERS_CLIP.png
✅ Saved 2482_ELITE_CODERS_CLIP.png
✅ Saved 2509_ELITE_CODERS_CLIP.png
✅ Saved 2563_ELITE_CODERS_CLIP.png
✅ Saved 2715_ELITE_CODERS_CLIP.png
✅ Saved 2844_ELITE_CODERS_CLIP.png
✅ Saved 4409_ELITE_CODERS_CLIP.png
✅ Saved 4609_ELITE_CODERS_CLIP.png
✅ Saved 5291_ELITE_CODERS_CLIP.png
✅ Saved 5796_ELITE_CODERS_CLIP.png
✅ Saved 5800_ELITE_CODERS_CLIP.png
✅ Saved 5923_ELITE_CODERS_CLIP.png
✅ Saved 6074_ELITE_CODERS_CLIP.png
✅ Saved 6172_ELITE_CODERS_CLIP.png
✅ Saved 6469_ELITE_CODERS_CLIP.png
✅ Saved 7304_ELITE_CODERS_CLIP.png
✅ Saved 7374_ELITE_CODERS_CLIP.png
✅ Saved 8192_ELITE_CODERS_CLIP.png
✅ Saved 8399_ELITE_CODERS_