In [1]:
import os
import torch
from PIL import Image
from torchvision import transforms
import random
import matplotlib.pyplot as plt
import textwrap
import torchvision.models as models

In [2]:
transform = transforms.Compose([transforms.Resize((128, 128)), 
                                transforms.ToTensor()])

In [3]:
# os.listdir('./data/wikiart_sample/New_Realism')

In [None]:
images = {}

folder_path = './data/wikiart_sample_large/New_Realism'

for img_file in os.listdir('./data/wikiart_sample_large/New_Realism'):
    img_path = os.path.join(folder_path, img_file)
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img)
    img_flat = torch.flatten(img_tensor)
    images[img_path] = img_flat

In [None]:
def pearson_correlation(tensor1, tensor2):
    mean1 = torch.mean(tensor1)
    mean2 = torch.mean(tensor2)
    std1 = torch.std(tensor1)
    std2 = torch.std(tensor2)
    n = len(tensor1)
    
    corr = torch.sum((tensor1 - mean1) * (tensor2 - mean2)) / (std1 * std2 * n)
    return corr

In [None]:
# For storing the most correlated image for each image
most_correlated = {}

for img1, tensor1 in images.items():
    max_corr = float('-inf')
    max_corr_img = None
    
    for img2, tensor2 in images.items():
        if img1 != img2:
            corr = pearson_correlation(tensor1, tensor2).item()
            
            if corr > max_corr:
                max_corr = corr
                max_corr_img = img2
                
    # most_correlated[img1] = max_corr_img
    most_correlated[img1] = (max_corr_img, max_corr)

In [None]:
# with open('random_keys.txt', 'w') as f:
#     for key in random_images:
#         f.write(f"{key}\n")
        
with open('random_keys.txt', 'r') as f:
    random_images = [line.strip() for line in f.readlines()]
    
# random_images = random.sample(list(most_correlated.keys()), 10)

In [None]:
random_images = [im.replace('_sample','_sample_large') for im in random_images]

In [None]:
# Create a 2x10 plot
fig, axes = plt.subplots(2, 10, figsize=(50, 16))

# Add labels for the rows
axes[0, 0].annotate('Original', xy=(0, 0.5), xytext=(-axes[0,0].yaxis.labelpad - 5, 0),
                    xycoords=axes[0, 0].yaxis.label, textcoords='offset points',
                    size=28, ha='right', va='center', weight='bold')
axes[1, 0].annotate('Most Correlated', xy=(0, 0.5), xytext=(-axes[1,0].yaxis.labelpad - 5, 0),
                    xycoords=axes[1, 0].yaxis.label, textcoords='offset points',
                    size=28, ha='right', va='center', weight='bold')

# Plot the actual and most correlated images
for i, img_path in enumerate(random_images):
    # Open and show the actual image
    img = Image.open(img_path)
    axes[0, i].imshow(img)
    axes[0, i].set_aspect('auto')
    wrapped_title = textwrap.fill(os.path.basename(img_path[:-4]), 20)
    axes[0, i].set_title(wrapped_title, fontsize=20, weight = 'bold')
    axes[0, i].axis('off')

    # Open and show the most correlated image and correlation value
    most_corr_img_path, corr_value = most_correlated[img_path]
    most_corr_img = Image.open(most_corr_img_path)
    axes[1, i].imshow(most_corr_img)
    axes[1, i].set_aspect('auto')
    wrapped_title = textwrap.fill(os.path.basename(most_corr_img_path)[:-4], 20)
    axes[1, i].set_title(f"{wrapped_title}\nCorr: {corr_value:.2f}", fontsize=20, weight = 'bold')
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()
plt.savefig('simple_correlation_similarity.png')

In [None]:
# Load the pre-trained VGG16 model + the higher level layers
# vgg16 = models.vgg16(pretrained=True).features.eval() #This extracts only the convolutional features

vgg16 = models.vgg16(pretrained=True) #This approach utilizes not just the convolutional base (features) but also the fully connected layers (classifier)

# Remove the classification layers
new_classifier = torch.nn.Sequential(*list(vgg16.classifier.children())[:-1])
vgg16.classifier = new_classifier

# Disable training for all layers
for param in vgg16.parameters():
    param.requires_grad = False

In [None]:
# def extract_features(image, model):
#     x = image.unsqueeze(0)
#     for layer in model:
#         x = layer(x)
#         if isinstance(layer, nn.MaxPool2d):
#             x = x.view(x.size(0), -1)
#             return x

def extract_features(img_tensor, model):
    x = img_tensor.unsqueeze(0)
    features = model(x)
    return features.squeeze()

In [None]:
images_features = {}

for img_file in os.listdir('./data/wikiart_sample_large/New_Realism'):
    img_path = os.path.join(folder_path, img_file)
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img)
    feature_vector = extract_features(img_tensor, vgg16)
    images_features[img_path] = feature_vector

In [None]:
most_correlated_features = {}

for img1, feature1 in images_features.items():
    max_corr = float('-inf')
    max_corr_img = None
    
    for img2, feature2 in images_features.items():
        if img1 != img2:
            corr = pearson_correlation(feature1, feature2).item()
            
            if corr > max_corr:
                max_corr = corr
                max_corr_img = img2
                
    # most_correlated_features[img1] = max_corr_img
    most_correlated_features[img1] = (max_corr_img, max_corr)

In [None]:
# Pick 10 random keys (image paths) from the most_correlated dictionary
# random_images = random.sample(list(most_correlated_features.keys()), 10)

# Create a 2x10 plot
fig, axes = plt.subplots(2, 10, figsize=(50, 16))


# Add labels for the rows
axes[0, 0].annotate('Original', xy=(0, 0.5), xytext=(-axes[0,0].yaxis.labelpad - 5, 0),
                    xycoords=axes[0, 0].yaxis.label, textcoords='offset points',
                    size=28, ha='right', va='center', weight = 'bold')
axes[1, 0].annotate('Most Correlated', xy=(0, 0.5), xytext=(-axes[1,0].yaxis.labelpad - 5, 0),
                    xycoords=axes[1, 0].yaxis.label, textcoords='offset points',
                    size=28, ha='right', va='center', weight = 'bold')

# Plot the actual and most correlated images
for i, img_path in enumerate(random_images):
    # Open and show the actual image
    img = Image.open(img_path)
    axes[0, i].imshow(img)
    axes[0, i].set_aspect('auto')
    wrapped_title = textwrap.fill(os.path.basename(img_path[:-4]), 20)
    axes[0, i].set_title(wrapped_title, fontsize=20, weight = 'bold')
    axes[0, i].axis('off')
    
    # Open and show the most correlated image
    # most_corr_img_path = most_correlated_features[img_path]
    most_corr_img_path, corr_value = most_correlated_features[img_path]
    most_corr_img = Image.open(most_corr_img_path)
    axes[1, i].imshow(most_corr_img)
    axes[1, i].set_aspect('auto')
    wrapped_title = textwrap.fill(os.path.basename(most_corr_img_path)[:-4], 20)
    # axes[1, i].set_title(wrapped_title, fontsize=20)
    axes[1, i].set_title(f"{wrapped_title}\nCorr: {corr_value:.2f}", fontsize=20, weight = 'bold')
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()
# plt.savefig('feature_correlation_similarity.png')