In [1]:
import os
from pathlib import Path

from PIL import Image
import torch
from torchvision import transforms
from tqdm import tqdm

import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class ImageEmbed:

    def __init__(self, category, path, embed):
        self.category = category
        self.path = path
        self.embed = embed

    def __repr__(self) -> str:
        return f"{self.path}, {self.embed}"

In [3]:
def get_list_of_files(path):

    path = Path(path)
    print(path)

    dirs = os.listdir(path)
    print("Total Dirs:", len(dirs))

    all_files = []
    total_files = 0
    for category_name in dirs:
        file_path_ls = list((path / category_name).glob("*.JPEG"))
        file_path_ls = [ImageEmbed(category_name, p, None) for p in  file_path_ls]
        all_files.extend(file_path_ls)

    print("Categories:", len(dirs), "- Files:", len(all_files))

    return all_files

In [4]:
train_paths = get_list_of_files("/media/work/WorkSpace/dataset/imagenet-mini/train")
val_paths = get_list_of_files("/media/work/WorkSpace/dataset/imagenet-mini/val")
pass

/media/work/WorkSpace/dataset/imagenet-mini/train
Total Dirs: 1000
Categories: 1000 - Files: 34745
/media/work/WorkSpace/dataset/imagenet-mini/val
Total Dirs: 1000
Categories: 1000 - Files: 3923


In [5]:
!ls "/media/work/WorkSpace/dataset/imagenet-mini/train/n02443484" | wc -l

40


In [6]:
# train_paths = train_paths[:5000]

In [7]:
train_paths[:5]

[/media/work/WorkSpace/dataset/imagenet-mini/train/n02443484/n02443484_15470.JPEG, None,
 /media/work/WorkSpace/dataset/imagenet-mini/train/n02443484/n02443484_10204.JPEG, None,
 /media/work/WorkSpace/dataset/imagenet-mini/train/n02443484/n02443484_11275.JPEG, None,
 /media/work/WorkSpace/dataset/imagenet-mini/train/n02443484/n02443484_11368.JPEG, None,
 /media/work/WorkSpace/dataset/imagenet-mini/train/n02443484/n02443484_12259.JPEG, None]

In [8]:
# Model loading.
model = torch.jit.load('../models_pt/v10_swin_base_patch4_window7_224_in22k.pt')
model.to('cuda')
model.eval()
embedding_fn = model

def get_embedding(path):
    # Load image and extract its embedding.
    input_image = Image.open(path).convert("RGB")
    convert_to_tensor = transforms.Compose([transforms.PILToTensor()])
    input_tensor = convert_to_tensor(input_image)
    input_batch = input_tensor.unsqueeze(0).to('cuda')
    with torch.no_grad():
        embedding = torch.flatten(embedding_fn(input_batch)[0]).cpu().data.numpy()
        return embedding

In [9]:
for i, v in enumerate(tqdm(train_paths)):
    v.embed = get_embedding(v.path)

 11%|█▏        | 3914/34745 [02:24<19:00, 27.03it/s]


KeyboardInterrupt: 

In [None]:
category_count = {}
for v in train_paths:
    c = category_count.get(v.category, 0)
    category_count[v.category] = c + 1
# category_count

In [None]:
arr = np.array([v.embed for v in train_paths])

In [None]:
distance_mat = euclidean_distances(arr, arr)
distance_mat.shape

(34745, 34745)

In [None]:
categry_distance = {}


for i, v in enumerate(tqdm(train_paths)):
    

    distance = distance_mat[i]
    indexes = np.argsort(distance)[:10]

    result = [(train_paths[i].category, distance[i]) for i in indexes]

    # skip self
    result = result[1:]

    min_len = min(5, category_count[v.category])

    result = sum([int(r[0] == v.category) for r in result[:min_len]]) / min_len

    rs_ls = categry_distance.get(v.category, [])
    rs_ls.append(result)
    categry_distance[v.category] = rs_ls

    # print(result)

    # break


100%|██████████| 34745/34745 [01:25<00:00, 407.09it/s]


In [None]:
np.argsort(distance_mat[1])[:5]

array([    1, 21997, 32099, 21587, 32092])

In [None]:
score_ls = []

for k, v_ls in categry_distance.items():
    score = sum(v_ls) / len(v_ls)
    score_ls.append(score)

sum(score_ls) / len(score_ls)

0.2670392385721423

In [None]:
# v11_swin_base_patch4_window7_224_in22k.pt - 0.7354730610007483
# 0.7473133925898999

# 0.26703436052336177