In [1]:
import torch
from torch.utils.data import DataLoader, Subset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
from pytorch_msssim import ssim  # You can install this package for SSIM calculation in PyTorch
from concurrent.futures import ThreadPoolExecutor
import os

traindir = os.path.join("/work/u1887834/imagenet/", 'train')
imagenet_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
    ]))
# dataloader = DataLoader(imagenet_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, sampler=None)

# Function to perform SSIM comparison between two batches
def compare_batch_to_batch(batch1, batch2, start_idx1, start_idx2, ssim_matrix):
    for i, img1 in enumerate(batch1):
        for j, img2 in enumerate(batch2):
            if start_idx1 + i == start_idx2 + j:  # Skip comparing the image with itself
                continue
            if start_idx1 + i > start_idx2 + j:  # Avoid duplicate comparisons
                continue
            score = ssim(img1.unsqueeze(0), img2.unsqueeze(0)).item()
            ssim_matrix[start_idx1 + i, start_idx2 + j] = score
            ssim_matrix[start_idx2 + j, start_idx1 + i] = score  # Matrix is symmetric

second_elements = np.array([x[1] for x in imagenet_dataset.imgs])
indices = np.where(second_elements == 0)[0]
torch_indices= torch.tensor(indices, dtype=torch.long)
first_category_subset = Subset(imagenet_dataset, torch_indices)
N = len(first_category_subset)

batch_size=50

first_category_dataloader = DataLoader(first_category_subset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, sampler=None)

# Preload all data into a list
all_batches = [batch.cuda() for (batch, _) in first_category_dataloader]

ssim_matrix = torch.zeros((N, N))

# Function to populate SSIM matrix
def perform_comparisons():
    with ThreadPoolExecutor(max_workers=16) as executor:
        futures = []
        for i, batch1 in enumerate(all_batches):
            for j, batch2 in enumerate(all_batches):
                if i > j:
                    continue
                future = executor.submit(compare_batch_to_batch, batch1, batch2, i * batch_size, j * batch_size, ssim_matrix)
                futures.append(future)

        # Wait for all futures to complete
        for future in futures:
            future.result()

# Perform the comparisons
perform_comparisons()

# Show a small portion of the matrix for illustration
ssim_matrix[:5, :5]


In [2]:
import torch
from torch.utils.data import DataLoader, Subset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
from pytorch_msssim import ssim  # You can install this package for SSIM calculation in PyTorch
from pytorch_msssim import SSIM
import os
# batch_size = 64
traindir = os.path.join("/work/u1887834/imagenet/", 'train')
imagenet_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
    ]))
# dataloader = DataLoader(imagenet_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, sampler=None)

ssim_compiled = torch.compile(SSIM(data_range=1.0, size_average=False))


# num = 1


second_elements = np.array([x[1] for x in imagenet_dataset.imgs])
ssim_matrix_list = []
from tqdm import tqdm

for classe in tqdm(range(600, 1000), desc="ssim among all images in one class"):
    
    indices = np.where(second_elements == classe)[0]
    torch_indices= torch.tensor(indices, dtype=torch.long)
    category_subset = Subset(imagenet_dataset, torch_indices)
    N = len(category_subset)
    batch_size=N
    category_dataloader = DataLoader(category_subset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, sampler=None)

    ssim_matrix = torch.zeros((N, N))
    
    for i, (batch, _) in enumerate(category_dataloader):
        batch = batch.cuda()  # Move to GPU
        for x in range(len(batch)):
            for y in range(x + 1, len(batch)):  # Skip redundant comparisons
                # print(x, y)
                img1 = batch[x].unsqueeze(0)  # Convert to 4D tensor (NCHW)
                img2 = batch[y].unsqueeze(0)  # Convert to 4D tensor (NCHW)

                ssim_value = ssim_compiled(img1, img2)

                # Populate the SSIM matrix; calculate the absolute index based on batch and position within batch
                ssim_matrix[x, y] = ssim_value
                ssim_matrix[y, x] = ssim_value  # SSIM is symmetric
    del category_dataloader
    # Move ssim_matrix back to CPU if needed
    ssim_matrix = ssim_matrix.cpu()
    torch.save(ssim_matrix, f'/home/u1887834/Research/data/ssim_matrix_{classe}.pt')


ssim among all images in one class:   2%|â–Ž         | 10/400 [1:14:24<49:13:35, 454.40s/it]

In [4]:
len(ssim_matrix_list)

27

In [5]:
torch.save(ssim_matrix_list, './data/ssim_matrix_list_100_126.pt')

In [2]:
ssim_matrix0

tensor([[0.0000, 0.2224, 0.1475,  ..., 0.2355, 0.1468, 0.1239],
        [0.2224, 0.0000, 0.1813,  ..., 0.4116, 0.2524, 0.2100],
        [0.1475, 0.1813, 0.0000,  ..., 0.2146, 0.1287, 0.1181],
        ...,
        [0.2355, 0.4116, 0.2146,  ..., 0.0000, 0.2676, 0.2283],
        [0.1468, 0.2524, 0.1287,  ..., 0.2676, 0.0000, 0.1266],
        [0.1239, 0.2100, 0.1181,  ..., 0.2283, 0.1266, 0.0000]])