In [5]:
import torch
from torch.utils.data import Dataset, DataLoader

In [6]:
class COCOSegmentationDataset(Dataset):
    def __init__(self, images_dir, annotations_path, transform=None):
        """
        Args:
            images_dir (str): Path to the images directory.
            annotations_path (str): Path to the COCO annotations file.
            transform (callable, optional): Optional transform to be applied
                on an image.
        """
        self.images_dir = images_dir
        self.coco = COCO(annotations_path)
        self.image_ids = self.coco.getImgIds()
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        # Load image
        image_id = self.image_ids[idx]
        image_info = self.coco.loadImgs(image_id)[0]
        image_path = os.path.join(self.images_dir, image_info['file_name'])
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_rgb = cv2.resize(image_rgb, (448, 448))

        # Load mask
        annotation_ids = self.coco.getAnnIds(imgIds=image_id)
        annotations = self.coco.loadAnns(annotation_ids)
        
        # Initialize an empty mask
        mask = np.zeros((image_info['height'], image_info['width']), dtype=np.uint8)
        
        # Populate the mask with annotations
        for annotation in annotations:
            category_id = annotation['category_id']
            binary_mask = self.coco.annToMask(annotation)
            mask[binary_mask == 1] = category_id
        mask = cv2.resize(mask, (112, 112), interpolation=cv2.INTER_NEAREST)

        # Apply transforms if any
        if self.transform:
            image_tensor = self.transform(image_rgb)
        else:
            image_tensor = torch.tensor(image_rgb)

        # Convert to PyTorch tensors
        image_tensor = image_tensor #.permute(2, 0, 1)  # CxHxW format
        mask = torch.tensor(mask, dtype=torch.long)  # Mask in HxW format

        return image_rgb, image_tensor, mask

In [7]:
import torchvision.transforms as T  # Optional for transformations
from torchvision import transforms
from pycocotools.coco import COCO

# Define the transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# dataset_train = COCOSegmentationDataset(images_dir = "../data/coco2017/train2017",
#                                   annotations_path = "../data/coco2017/annotations/instances_train2017.json",
#                                   transform=transform)

dataset_val = COCOSegmentationDataset(images_dir = "../data/coco2017/val2017",
                                  annotations_path = "../data/coco2017/annotations/instances_val2017.json",
                                  transform=transform)

# data_loader_train = DataLoader(
#   dataset_train,
#   batch_size=256,
#   num_workers=10,
#   pin_memory=True,
#   drop_last=False,
#   shuffle=False
# )
data_loader_val = DataLoader(
  dataset_val,
  batch_size=256,
  num_workers=10,
  pin_memory=True,
  drop_last=False,
  shuffle=False
)

loading annotations into memory...
Done (t=0.48s)
creating index...
index created!


In [8]:
# from tqdm import tqdm
# import os
# import cv2
# import numpy as np

# image_rgb_list = []
# mask_list = []
# for (image_rgb, _, mask) in tqdm(data_loader_val):
#     image_rgb_list.append(image_rgb)
#     mask_list.append(mask)
# image_rgb = torch.concat(image_rgb_list)
# masks = torch.concat(mask_list)

In [9]:
# masks = torch.load(f'./data/coco_dumps/masks_112.pth')
# masks.shape

In [10]:
# image_rgb = torch.load(f'./data/coco_dumps/image_rgb_448.pth')
# image_rgb.shape

In [11]:
# image_rgb = image_rgb.float() / 255.

In [12]:
paths_dict  = {
  #'random':'../dino/encoders/random/checkpoint/coco_dumps/coco_dump_28',
  #'stylegan':'../dino/encoders/stylegan-oriented-512/checkpoint/coco_dumps/coco_dump_28',
  #'shaders':'../dino/encoders/shaders/checkpoint/coco_dumps/coco_dump_28',
  #'shaders_mixup':'../dino/encoders/shaders_mixup/checkpoint/coco_dumps/coco_dump_28',
  #'shaders_kml':'../dino/encoders/shaders_kml/checkpoint/coco_dumps/coco_dump_28',
  #'shaders_kml_mixup':'../dino/encoders/shaders_kml_mixup/checkpoint/coco_dumps/coco_dump_28',
  #'places':'../dino/encoders/places/checkpoint/coco_dumps/coco_dump_28',
  'imagenet':'../dino/encoders_prev/imagenet/checkpoint/coco_dumps/coco_dump_28',
}

In [13]:
raw_features_dict = {}
for name, path in paths_dict.items():
  raw_features_dict[name] = torch.concat([torch.load(f'{path}/valfeat_{i}.pth', map_location='cpu') for i in range(20)])

  raw_features_dict[name] = torch.concat([torch.load(f'{path}/valfeat_{i}.pth', map_location='cpu') for i in range(20)])


In [14]:
from torchvision import transforms

features_dict = {}
for name, raw_feat in raw_features_dict.items():
  features_dict[name] = raw_feat[:, 1:, :].view(5000, 28, 28, 384)
  features_dict[name] = transforms.Resize((112, 112))(features_dict[name].permute(0, 3, 1, 2)).permute(0, 2, 3, 1)
  # features_dict[name] = transforms.Resize((112, 112))(image_rgb.permute(0, 3, 1, 2)).permute(0, 2, 3, 1)

In [15]:
features_dict[list(features_dict.keys())[0]].shape

torch.Size([5000, 112, 112, 384])

In [19]:
from sklearn.decomposition import PCA
from tqdm import tqdm
import math

def render_patch_pca(tokens, grid_size):
    tokens = tokens.flatten(0, 1)
    pca = PCA(n_components=3, random_state=0)
    pca.fit(tokens)
    projected_tokens = pca.transform(tokens)

    t = torch.tensor(projected_tokens)
    t_min = t.min(dim=0, keepdim=True).values
    t_max = t.max(dim=0, keepdim=True).values
    normalized_t = (t - t_min) / (t_max - t_min)

    array = normalized_t #(normalized_t * 255).byte().numpy()
    array = array.reshape(*grid_size, 3)

    return array

import numpy as np

def calculate_cluster_centroids(x, m):
    L, C = x.shape
    unique_clusters = np.unique(m)
    K = len(unique_clusters)
    
    centroids = torch.zeros((K+1, C))
    
    for k in unique_clusters:
        cluster_points = x[m == k]  # Extract points belonging to cluster k
        centroids[k] = cluster_points.mean(0)  # Compute mean for the cluster
    
    return centroids

def calculate_normalized_inertia(x, m, centroids):
    L, C = x.shape
    total_pixels = L
    
    inertia = 0.0
    for k in range(1, len(centroids)):
        cluster_points = x[m == k]  # Extract points belonging to cluster k
        inertia += torch.sum((cluster_points - centroids[k])**2)
    
    normalized_inertia = inertia / torch.sum((x - x.mean())**2)
    return normalized_inertia

inertia_dict = {}
for name, feat in features_dict.items():
  inertia_dict[name] = []
  for i in tqdm(range(5000)):
    pca_i = render_patch_pca(feat[i], (112, 112))
    # pca_i = feat[i] # RGB
    mask_i = dataset_val[i][2]
    
    u_i, mask_i = np.unique(mask_i, return_inverse=True)
    mask_i = np.arange(len(u_i))[mask_i].reshape(*dataset_val[i][2].shape)

    pca_i, mask_i = pca_i[mask_i > 0], mask_i[mask_i > 0]

    centroids_i = calculate_cluster_centroids(pca_i, mask_i)
    inertia_i = calculate_normalized_inertia(pca_i, mask_i, centroids_i)

    if not math.isnan(inertia_i):
        inertia_dict[name].append(inertia_i)

100%|██████████| 5000/5000 [07:50<00:00, 10.63it/s]


In [20]:
avg_inertia_dict = {name:np.array(x).mean() for name, x in inertia_dict.items()}

In [21]:
avg_inertia_dict # Inertia = 1 - R^2

# With background elimination
## {'RGB': 0.806041031875779}
## {'random': 0.633371627640186}
## {'stylegan': 0.5151794695156151}
## {'shaders': 0.45028412317973515}
## {'shaders_mixup': 0.486129928535604}
## {'fast2leaves_shaders_shaders': 0.441332687365011}
## {'fast2leaves_shaders_shaders_mixup-2-2-1': 0.4626139436879311}
## {'places': 0.37896990407915015}
## {'imagenet': 0.362603292054376}

{'imagenet': np.float64(0.3542138768916374)}