In [4]:
import os
import pandas as pd
import torch
import clip
from PIL import Image
import shutil

# -------------------------------
BASE_DIR = r"C:/Users/priya/OneDrive/Desktop/mediaval"
CSV_PATH = os.path.join(BASE_DIR, "subset.csv")
IMAGE_DIR = os.path.join(BASE_DIR, "newsimages_25_v1.1", "newsimages")

GROUP_NAME = "ssn-coder"
APPROACH_NAME = "CLIP"

# Output folder inside ZIP
OUTPUT_DIR = os.path.join(BASE_DIR, "RET_CLIP_SMALLS")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------------------------------
# Load data
df = pd.read_csv(
    CSV_PATH,
    header=None,
    names=["article_id", "article_url", "article_title", "article_tags", "image_id", "image_url"]
)
df = df.dropna(subset=['image_id', 'article_title']).copy()
df['article_tags'] = df['article_tags'].fillna("")

# -------------------------------
# Load CLIP (if you still want embeddings for later use)
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# -------------------------------
# Save images in CSV order
for idx, row in df.iterrows():
    img_path = os.path.join(IMAGE_DIR, f"{row['image_id']}.jpg")
    if not os.path.exists(img_path):
        print(f"⚠️ Missing: {img_path}")
        continue
    try:
        image = Image.open(img_path).convert("RGB")
        image = image.resize((460, 260))
        save_name = f"{row['image_id']}_{GROUP_NAME}_{APPROACH_NAME}.png"
        image.save(os.path.join(OUTPUT_DIR, save_name))
        print(f"✅ Saved {save_name}")
    except Exception as e:
        print(f"❌ Error processing {img_path}: {e}")

# -------------------------------
# Create ZIP
zip_path = os.path.join(BASE_DIR, f"{GROUP_NAME}.zip")
shutil.make_archive(os.path.join(BASE_DIR, GROUP_NAME), 'zip', OUTPUT_DIR)
print(f"✅ Created ZIP at {zip_path} with folder RET_CLIP_SMALLS inside")


✅ Saved 5977476D371A08807B962A128E753011_ssn-coder_CLIP.png
✅ Saved 6C30AEF73293D3F34AE4CAFD0F29E342_ssn-coder_CLIP.png
✅ Saved 1F8169E5565E063C9C1C1066FDCFBCAA_ssn-coder_CLIP.png
✅ Saved EF0F14170DD0061E4A62B4EF138AC5D1_ssn-coder_CLIP.png
✅ Saved 07F4480EF56DE13D65EA0EA0ADD9C5B4_ssn-coder_CLIP.png
✅ Saved C1F5526A302529FEA89E9554A0F0AB02_ssn-coder_CLIP.png
✅ Saved 1952F1BE97776D9D21304544BE0C204D_ssn-coder_CLIP.png
✅ Saved 77E3512C450F4192D3820DDBBF07A18A_ssn-coder_CLIP.png
✅ Saved 9960790450CFF4CCD6407DA32A37B6C0_ssn-coder_CLIP.png
✅ Saved 08A3010B02B9A89302C23241B8224A5F_ssn-coder_CLIP.png
✅ Saved E0FD4A80607B2F5028248B6E43E40B7B_ssn-coder_CLIP.png
✅ Saved 7845611BA0D98562CA08CABBDD319287_ssn-coder_CLIP.png
✅ Saved 34ED4943C90E5F5B5DC0E292FF4B8B72_ssn-coder_CLIP.png
✅ Saved B7E731340306F4D34738B734D145658C_ssn-coder_CLIP.png
✅ Saved 859AD1D83C651652147607FAFA1F6761_ssn-coder_CLIP.png
✅ Saved CD298FD790187F1E7C7E602C060C5878_ssn-coder_CLIP.png
✅ Saved 50621AB5D22E039ADFC3B4D52C9C635B