In [None]:
import os
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import gdown
import clip
import torch
import torch.nn as nn
from torchvision import transforms
import requests
from IPython.display import display

# Used for visualization purposes
size_ = 512 
resize = transforms.Resize(size_)
center_crop = transforms.CenterCrop(size_)

### Load Data

In [None]:
# Download the dataset (flickrUser_definition.csv) from the following link (skip if you already have the dataset):
file_id = '1wX8Ti3opqCS_AnXU88sixHElbcS_Lj03'
url = f'https://drive.google.com/uc?id={file_id}'
# The file will be saved as 'flickrUser_definition.csv'
output = 'flickrUser_definition.csv'

gdown.download(url, output, quiet=False)

In [None]:
# Load the dataset
df_definition = pd.read_csv('flickrUser_definition.csv')
df_definition.head()

### Data-driven Definition of Common Interest

In [None]:
# Common Interestingness (cf. Figure 2)
unique_users = df_definition.groupby(['partition'])['user_id'].nunique().reset_index(name='unique_users')
unique_users.sort_values(by='unique_users', ascending=False, inplace=True)
unique_users.reset_index(drop=True, inplace=True)

# Bar plot of unique users per partition
unique_users['unique_users'].plot(kind='bar', figsize=(20, 5))
plt.xlabel('Partition')
plt.ylabel('Unique Users')
plt.xticks(ticks=np.arange(0, len(unique_users), 1), labels=unique_users['partition'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

### What Makes an Image Commonly Interesting?

In [None]:
# Split the dataset into three groups based on the trend_group
df_common = df_definition[(df_definition['trend_group'] == 0)]
df_interplay = df_definition[(df_definition['trend_group'] == 1)]
df_subjective = df_definition[(df_definition['trend_group'] == 2)]

In [None]:
# Perceptual Features (cf. Table 1)
perceptual_features_common = df_common.value_counts(subset = ['VILA_main_feature'], normalize=True).round(4) * 100
aesthetic_score_common = df_common['VILA_aesthetic_score'].describe()[['25%', '50%', '75%']].round(4) * 100

perceptual_features_interplay = df_interplay.value_counts(subset = ['VILA_main_feature'], normalize=True).round(4) * 100
aesthetic_score_interplay = df_interplay['VILA_aesthetic_score'].describe()[['25%', '50%', '75%']].round(4) * 100

perceptual_features_subjective = df_subjective.value_counts(subset = ['VILA_main_feature'], normalize=True).round(4) * 100
aesthetic_score_subjective = df_subjective['VILA_aesthetic_score'].describe()[['25%', '50%', '75%']].round(4) * 100

df_perceptual_features = pd.concat([perceptual_features_common, perceptual_features_interplay, perceptual_features_subjective], axis=1)
df_perceptual_features.columns = ['Common', 'Interplay', 'Subjective']
df_perceptual_features['Delta'] = df_perceptual_features['Common'] - df_perceptual_features['Subjective']
df_perceptual_features.sort_values(by='Delta', ascending=False, inplace=True)
df_perceptual_features.index.names = ['Scores from VILA']
display(df_perceptual_features)

df_aesthetic_score = pd.concat([aesthetic_score_common, aesthetic_score_interplay, aesthetic_score_subjective], axis=1)
df_aesthetic_score.columns = ['Common', 'Interplay', 'Subjective']
df_aesthetic_score['Delta'] = df_aesthetic_score['Common'] - df_aesthetic_score['Subjective']
df_aesthetic_score.index.names = ['Scores from VILA']
display(df_aesthetic_score)

In [None]:
# Check if the owner of the image is a photographer
is_photographer = []
for i in df_definition.index:
    if 'photographer' in str(df_definition.loc[i, 'favimg_owner_occupation']).lower():
        is_photographer.append(1)
        # print(df.loc[i, 'img_owner_occupation'])
    else:
        is_photographer.append(0)

In [None]:
# Add the 'is_photographer' column to the dataset
df_photographer = df_definition.copy()
df_photographer['is_photographer'] = is_photographer

In [None]:
# Percentage of photographers in each group
df_temp = df_photographer[df_photographer['trend_group'] == 0]
group_0_val = df_temp.value_counts(subset = ['is_photographer'], normalize=True).round(4) * 100
print(f'Photographers in common interesting group: {group_0_val[1]}%')

df_temp = df_photographer[df_photographer['trend_group'] == 1]
group_1_val = df_temp.value_counts(subset = ['is_photographer'], normalize=True).round(4) * 100
print(f'Photographers in interplay group: {group_1_val[1]}%')

df_temp = df_photographer[df_photographer['trend_group'] == 2]
group_2_val = df_temp.value_counts(subset = ['is_photographer'], normalize=True).round(4) * 100
print(f'Photographers in subjective interesting group: {group_2_val[1]}%')

In [None]:
# Connotative Features (cf. Table 3)
connotative_features_common = df_common.value_counts(subset = ['CLIP_emotion_main_category'], normalize=True).round(4) * 100
connotative_features_interplay = df_interplay.value_counts(subset = ['CLIP_emotion_main_category'], normalize=True).round(4) * 100
connotative_features_subjective = df_subjective.value_counts(subset = ['CLIP_emotion_main_category'], normalize=True).round(4) * 100

connotative_features_positive = ['excitement', 'awe', 'contentment', 'amusement']
connotative_features_negative = ['sadness', 'disgust', 'anger', 'fear']

df_connotative_features_positive = pd.concat([connotative_features_common[connotative_features_positive], connotative_features_interplay[connotative_features_positive], connotative_features_subjective[connotative_features_positive]], axis=1)
df_connotative_features_positive.columns = ['Common', 'Interplay', 'Subjective']
df_connotative_features_positive.index = ['Excitement', 'Awe', 'Contentment', 'Amusement']
df_connotative_features_positive['Delta'] = df_connotative_features_positive['Common'] - df_connotative_features_positive['Subjective']
df_connotative_features_positive.sort_values(by='Delta', ascending=False, inplace=True)
df_connotative_features_positive.loc['Sum Positive'] = df_connotative_features_positive.sum()
df_connotative_features_positive.index.names = ['Scores from CLIP']
display(df_connotative_features_positive)

df_connotative_features_negative = pd.concat([connotative_features_common[connotative_features_negative], connotative_features_interplay[connotative_features_negative], connotative_features_subjective[connotative_features_negative]], axis=1)
df_connotative_features_negative.columns = ['Common', 'Interplay', 'Subjective']
df_connotative_features_negative.index = ['Sadness', 'Disgust', 'Anger', 'Fear']
df_connotative_features_negative['Delta'] = df_connotative_features_negative['Common'] - df_connotative_features_negative['Subjective']
df_connotative_features_negative.sort_values(by='Delta', ascending=False, inplace=True)
df_connotative_features_negative.loc['Sum Negative'] = df_connotative_features_negative.sum()
df_connotative_features_negative.index.names = ['Scores from CLIP']
display(df_connotative_features_negative)

### Computational Model of Common Interestingness

#### FlickrUser Additional

In [None]:
# Download the dataset (flickrUser_additional.csv) from the following link (skip if you already have the dataset):
file_id = '1j8J6i14MqyRtlwPGPfNC5lxqIzX_e1N_'
url = f'https://drive.google.com/uc?id={file_id}'
# The file will be saved as 'flickrUser.csv'
output = 'flickrUser_additional.csv'

gdown.download(url, output, quiet=False)

In [None]:
# Load the dataset
df_additional = pd.read_csv('flickrUser_additional.csv')
df_additional.head()

In [None]:
# User examples (cf. Figure 7)
user_id = '130140542@N03'
df_user = df_additional[df_additional['user_id'] == user_id]
df_user_common = df_user[df_user['ci_r_score'] > 0.6].sample(n=3)
df_user_interplay = df_user[(df_user['ci_r_score'] < 0.3) & (df_user['ci_r_score'] > 0.2)].sample(n=3)
df_user_subjective = df_user[df_user['ci_r_score'] < 0.1].sample(n=3)

df_user_trends = pd.concat([df_user_common, df_user_interplay, df_user_subjective])
df_user_trends.reset_index(drop=True, inplace=True)

# Open image url and display it
def open_image_url(url):
    img = Image.open(requests.get(url, stream=True).raw)
    img = center_crop(resize(img))
    return img

# Display the images
fig, ax = plt.subplots(1, 9, figsize=(15, 5))
for i in range(9):
    img = open_image_url(df_user_trends.loc[i, 'favimg_url'])
    ax[i].imshow(img)
    ax[i].set_title(f'CI_R Score: {df_user_trends.loc[i, "ci_r_score"]:.3f}', fontsize=10)
    if i == 0:
        ax[i].set_ylabel(f'{user_id}', fontsize=10, fontweight='bold')
    ax[i].set_xticks([])
    ax[i].set_yticks([])
plt.tight_layout()
plt.show()

#### Use your own Images

In [None]:
# Load CLIP the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14@336px", device=device)

# Define the CI_R model
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(768, 1)
        )

    def forward(self, x):
        return self.fc(x)

# Load the CI_R model
model_ci_r = RegressionModel().to(device)
model_ci_r.load_state_dict(torch.load('CI_R_model.pth'))
model_ci_r.eval()

In [None]:
# Define image path for the sample images
img_path_libertyStatue = [os.path.join('./sampleImages/libertyStatue/', img_path) for img_path in os.listdir('./sampleImages/libertyStatue/')]
img_path_eiffelTower = [os.path.join('./sampleImages/eiffelTower/', img_path) for img_path in os.listdir('./sampleImages/eiffelTower/')]

In [None]:
# Extract features from the images
image_features_libertyStatue = []
image_features_eiffelTower = []
for i in range(len(img_path_libertyStatue)):
    image_libertyStatue = preprocess(Image.open(img_path_libertyStatue[i])).unsqueeze(0).to('cuda')
    image_eiffelTower = preprocess(Image.open(img_path_eiffelTower[i])).unsqueeze(0).to('cuda')
    with torch.no_grad():
        image_features_libertyStatue.append(model.encode_image(image_libertyStatue))
        image_features_eiffelTower.append(model.encode_image(image_eiffelTower))

image_features_libertyStatue = [image_feature.cpu().detach().numpy() for image_feature in image_features_libertyStatue]
image_features_libertyStatue = np.array(image_features_libertyStatue).reshape(len(image_features_libertyStatue), -1)

image_features_eiffelTower = [image_feature.cpu().detach().numpy() for image_feature in image_features_eiffelTower]
image_features_eiffelTower = np.array(image_features_eiffelTower).reshape(len(image_features_eiffelTower), -1)

# Print the shape of the extracted features
print(f'Image features (Liberty Statue): {image_features_libertyStatue.shape}')
print(f'Image features (Eiffel Tower): {image_features_eiffelTower.shape}')

In [None]:
# Predict the CI_R score
ci_score_libertyStatue = model_ci_r(torch.tensor(image_features_libertyStatue, dtype=torch.float32).to(device)).cpu().detach().numpy().flatten()
ci_score_eiffelTower = model_ci_r(torch.tensor(image_features_eiffelTower, dtype=torch.float32).to(device)).cpu().detach().numpy().flatten()

# Create a dataframe with the image paths and their CI scores
df_ci_score_libertyStatue = pd.DataFrame({'img_path': img_path_libertyStatue, 'ci_score': ci_score_libertyStatue})
df_ci_score_libertyStatue.sort_values(by='ci_score', ascending=False, inplace=True)
df_ci_score_eiffelTower = pd.DataFrame({'img_path': img_path_eiffelTower, 'ci_score': ci_score_eiffelTower})
df_ci_score_eiffelTower.sort_values(by='ci_score', ascending=False, inplace=True)

In [None]:
# Display the images with their CI scores (cf. Figure 6)
fig, ax = plt.subplots(1, df_ci_score_eiffelTower.shape[0], figsize=(15, 5))
for i in range(df_ci_score_eiffelTower.shape[0]):
    img = Image.open(df_ci_score_eiffelTower['img_path'].iloc[i])
    ax[i].imshow(center_crop(resize(img)))
    ax[i].axis('off')
    ax[i].set_title(f'CI_R Score: {df_ci_score_eiffelTower["ci_score"].iloc[i]:.3f}', fontsize=10)
plt.tight_layout()
plt.show()

fig, ax = plt.subplots(1, df_ci_score_libertyStatue.shape[0], figsize=(15, 5))
for i in range(df_ci_score_libertyStatue.shape[0]):
    img = Image.open(df_ci_score_libertyStatue['img_path'].iloc[i])
    ax[i].imshow(center_crop(resize(img)))
    ax[i].axis('off')
    ax[i].set_title(f'CI_R Score: {df_ci_score_libertyStatue["ci_score"].iloc[i]:.3f}', fontsize=10)
plt.tight_layout()
plt.show()