In [None]:
# Core utilities + data handling
import os
import pickle
import pandas as pd
import numpy as np
from pathlib import Path
import zipfile
from io import BytesIO
import ast
from collections import Counter

# Visualization
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from PIL import Image
from IPython.display import display

# Downloading / I/O
import requests
import gdown

# Models / embeddings
import clip
import torch
import torch.nn as nn
from torchvision import transforms

# Image preprocessing for visualization/model input
size_ = 512
resize = transforms.Resize(size_)
center_crop = transforms.CenterCrop(size_)

# Use GPU if available, otherwise fall back to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"

#### Load Data

In [None]:
# Download the dataset from the following link (skip if you already have the dataset):
file_id = '1KJot5VeUSCUg11IuDYqmlS024MTqhUnQ'
url = f'https://drive.google.com/uc?id={file_id}'

output = 'Visual-Interestingness-Decoded-Dataset.zip'

gdown.download(url, output, quiet=False)

In [None]:
# Open the ZIP file from the in-memory buffer in read mode
with zipfile.ZipFile(output, "r") as z:
    # Extract all files from the ZIP into the current directory
    z.extractall(Path("./"))

# Recursively find all CSV files in the extracted content
csv_files = sorted(Path("./").rglob("*.csv"))

# Print how many CSV files were found
print("CSVs found:", len(csv_files))

# Print the paths of the CSV files (for inspection)
for p in csv_files:
    print(p)

In [None]:
df_images = pd.read_csv(csv_files[0])
df_relative_int = pd.read_csv(csv_files[1])
df_single_int = pd.read_csv(csv_files[2])

In [None]:
df_images.head()

#### Example Image

In [None]:
# Select a specific image and retrieve its metadata
test_ID = 51654698266
row = df_images[df_images["ImageID"] == test_ID].iloc[0]

print("Image Description:", row.ImageDescription)

# Download and load the image from its URL
response = requests.get(row.ImageURL)
url_img = Image.open(BytesIO(response.content)).convert("RGB")

In [None]:
# Display image
fig, axs = plt.subplots(1, 1, figsize=(5, 5))
axs.imshow(center_crop(resize(url_img)))
axs.set_title('URL Image')
axs.axis('off')
plt.tight_layout()
plt.show()

#### Single Image Interestingness

In [None]:
# Select the row corresponding to the given ImageID
row = df_single_int[df_single_int["ImageID"] == test_ID]

# Utility to ensure values are proper Python lists
# (handles Series, already-parsed lists, and stringified lists)
def to_list(x):
    if isinstance(x, pd.Series):
        x = x.iloc[0]
    if isinstance(x, list):
        return x
    if isinstance(x, str):
        return ast.literal_eval(x)
    return [x]

# Pretty-print paired answers and explanations for one source
def pretty_print(name, answers, explanations):
    answers = to_list(answers)
    explanations = to_list(explanations)

    print(f"\n{name}:")
    for a, e in zip(answers, explanations):
        print(f"  {a} - '{e}'")

# Display annotations for different annotators / models
pretty_print("Human", row["Human_Answers"], row["Human_Explanations"])
pretty_print("GPT-4o", row["GPT_Answers"], row["GPT_Explanations"])
pretty_print("DeepSeek-VL2", row["DeepSeek_Answers"], row["DeepSeek_Explanations"])
pretty_print("Llama 3.2", row["Llama_Answers"], row["Llama_Explanations"])


#### Relative Image Interestingness

In [None]:
# Compute a majority decision ("first" or "second") from a list of answers
def majority_vote(answers):
    answers = to_list(answers)          # Ensure input is a proper list
    if len(answers) == 0:
        return None

    # Count how often each answer occurs
    counts = Counter(answers)

    # Ignore any labels other than "first" and "second"
    counts = {k: v for k, v in counts.items() if k in ["first", "second"]}
    if len(counts) == 0:
        return None

    # Return the label with the highest count
    most_common = Counter(counts).most_common()
    return most_common[0][0]

In [None]:
# Select all relative-annotation rows where the test image appears (as ImageID1 or ImageID2)
rows = df_relative_int[
    (df_relative_int["ImageID1"] == test_ID) | 
    (df_relative_int["ImageID2"] == test_ID)
]

# Attach image URLs for both images in each pair
rows = (
    rows
    .merge(
        df_images[["ImageID", "ImageURL"]],
        left_on="ImageID1",
        right_on="ImageID",
        how="left"
    )
    .rename(columns={"ImageURL": "ImageURL1"})
    .drop(columns=["ImageID"])
)

rows = (
    rows
    .merge(
        df_images[["ImageID", "ImageURL"]],
        left_on="ImageID2",
        right_on="ImageID",
        how="left"
    )
    .rename(columns={"ImageURL": "ImageURL2"})
    .drop(columns=["ImageID"])
)

# Compute majority preference ("first" / "second") for humans and GPT
rows["Human_Majority"] = rows["Human_Answers"].apply(majority_vote)
rows["GPT_Majority"]   = rows["GPT_Answers"].apply(majority_vote)

In [None]:
# Create a grid of subplots to show image pairs
fig, ax = plt.subplots(5, 2, figsize=(5, 12))

for i in range(len(rows)):
    # Load, resize, and center-crop both images in the pair
    img1 = center_crop(resize(Image.open(BytesIO(requests.get(rows.iloc[i]["ImageURL1"]).content))))
    img2 = center_crop(resize(Image.open(BytesIO(requests.get(rows.iloc[i]["ImageURL2"]).content))))

    # Display the image pair
    ax[i, 0].imshow(img1, cmap="gray")
    ax[i, 0].axis("off")
    ax[i, 1].imshow(img2, cmap="gray")
    ax[i, 1].axis("off")

    # Highlight the image preferred by the human majority
    if rows.iloc[i]["Human_Majority"] == "first":
        rect = mpatches.Rectangle((0, 0), size_, size_, linewidth=10,
                                  edgecolor="g", facecolor="none")
        ax[i, 0].add_patch(rect)
    elif rows.iloc[i]["Human_Majority"] == "second":
        rect = mpatches.Rectangle((0, 0), size_, size_, linewidth=10,
                                  edgecolor="g", facecolor="none")
        ax[i, 1].add_patch(rect)

    # Print majority decisions for this image pair
    print(
        f"Image {i+1}:\n"
        f"Human Majority: {rows.iloc[i]['Human_Majority']}\n"
        f"GPT Majority: {rows.iloc[i]['GPT_Majority']}\n"
    )

# Adjust layout and render the figure
plt.tight_layout()
plt.show()


#### Computational Model (Relative Interestingness)

In [None]:
# Load the CLIP ViT-L/14 model and its preprocessing pipeline
clip_model, preprocess = clip.load("ViT-L/14", device=device)

# Define the network for predicting relative interestingness
class BaseNetwork(nn.Module):
    def __init__(self, input_dim):
        super(BaseNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 1)  # single-score output

    def forward(self, x):
        return self.fc1(x)

# Initialize the model, load pretrained weights, and set it to evaluation mode
RI_model = BaseNetwork(input_dim=768).to(device)
RI_model.load_state_dict(torch.load("RI_model.pth"))
RI_model.eval()

In [None]:
# Collect all image URLs from both columns and remove duplicates
image_urls = rows["ImageURL1"].tolist() + rows["ImageURL2"].tolist()
image_urls = list(set(image_urls))

# Download, resize, and center-crop all unique images
images = [
    center_crop(
        resize(Image.open(BytesIO(requests.get(url).content)).convert("RGB"))
    )
    for url in image_urls
]

# Extract CLIP image embeddings
image_embeddings = []
with torch.no_grad():
    for img in images:
        img_input = preprocess(img).unsqueeze(0).to(device)
        embedding = clip_model.encode_image(img_input)
        image_embeddings.append(embedding.cpu())

# Predict relative-interestingness scores for each image
ri_scores = []
with torch.no_grad():
    for emb in image_embeddings:
        emb = emb.to(device).float()
        score = RI_model(emb).item()
        ri_scores.append(score)

# Create lookup tables from image URLs to scores and loaded images
url_to_ri_score = {url: score for url, score in zip(image_urls, ri_scores)}
url_to_image = {url: img for url, img in zip(image_urls, images)}


In [None]:
# Sort images by predicted RI score (highest first)
sorted_images = sorted(
    url_to_ri_score.items(), key=lambda x: x[1], reverse=True
)

# Visualize the top-6 most interesting images
fig, axs = plt.subplots(1, 6, figsize=(18, 5))
for i in range(6):
    url, score = sorted_images[i]
    img = url_to_image[url]

    axs[i].imshow(img)
    axs[i].set_title(f"RI Score: {score:.4f}")
    axs[i].axis("off")

plt.tight_layout()
plt.show()
