In [1]:
import fiftyone as fo
import pandas as pd
import os
import glob
from tqdm import tqdm
import timm
import torch
import torch.nn as nn
import numpy as np
import cv2
import pickle

resize = 32
ds_root = os.path.join(os.environ["DATASET_ROOT"], "bcd2022")
image_root = os.path.join(ds_root, "patch_128_fold_0")
image_root

'/media/node_ale/DATA/datasets/bcd2022/patch_128_fold_0'

In [2]:
fo.list_datasets()

['bcd2022_moco']

In [3]:
try:
    dataset = fo.Dataset("bcd2022_moco", overwrite=True)
except ValueError as e:
    dataset = fo.load_dataset("bcd2022_moco")
    # overwrite=True force the overvwrite
    print(e)

In [4]:
dataset

Name:        bcd2022_moco
Media type:  None
Num samples: 0
Persistent:  False
Tags:        []
Sample fields:
    id:       fiftyone.core.fields.ObjectIdField
    filepath: fiftyone.core.fields.StringField
    tags:     fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)

In [5]:
paths = glob.glob(os.path.join(image_root, "0", "*.png"))
np.random.shuffle(paths)
paths = paths[:10000]
for path in tqdm(paths):
    sample = fo.Sample(filepath=path)
    dataset.add_sample(sample)

100%|████████████████████████████████████| 10000/10000 [00:10<00:00, 932.20it/s]


# Embeddings

In [6]:
# Do Only the first time (Save as emb.npy)
from albumentations.pytorch.transforms import ToTensorV2
import albumentations as A

device = "cuda:0"

class DatasetInf:
    def __init__(self, path):
        self.path = path
    def __len__(self):
        return len(self.path)
    def __getitem__(self, index):
        img = cv2.imread(self.path[index], 0)
        img = A.Compose([A.Resize(width=resize, height=resize), A.ToFloat(), ToTensorV2()])(image=img)["image"]
        return img


d = DatasetInf(paths)
loader = torch.utils.data.DataLoader(d, num_workers=8, batch_size=512)


In [7]:


def load_state_dict_improved(state_dict, model: nn.Module, replace_str=None, prepend=None):
    model_state_dict = model.state_dict()
    ckpt_state_dict = {}

    for key in state_dict:
        keyr = key
        if replace_str is not None:
            keyr = keyr.replace(replace_str[0], replace_str[1])
        if prepend is not None:
            keyr = prepend + keyr
        ckpt_state_dict[keyr] = state_dict[key]
    

    n_load = 0
    for key in model_state_dict:
        if key in ckpt_state_dict.keys():
            model_state_dict[key] = ckpt_state_dict[key]
            n_load += 1
        else:
            print(f"model {key} is not in checkpoint")

    for key in ckpt_state_dict:
        if key not in model_state_dict.keys():
            print(f"checkpoint {key} is not in model")
    
    return model.load_state_dict(model_state_dict)

In [16]:


model = timm.create_model("tf_efficientnetv2_s_in21ft1k", in_chans=1, pretrained=False, num_classes=0)
print(load_state_dict_improved(torch.load("output_moco/tf_efficientnetv2_s_in21ft1k/moco_0009.pth"), model))
model.eval()
model.to(device)
emb = []
for k, img in enumerate(tqdm(loader)):
    with torch.no_grad():
        output = model(img.to(device))
    emb += [output.cpu().numpy()]

emb = np.vstack(emb)
np.save("emb", emb)
print(emb.shape)

checkpoint classifier.weight is not in model
checkpoint classifier.bias is not in model
<All keys matched successfully>


100%|███████████████████████████████████████████| 20/20 [00:01<00:00, 12.30it/s]


(10000, 1280)


In [17]:
import fiftyone.brain as fob
emb = np.load("emb.npy")

# Compute 2D representation
results = fob.compute_visualization(
    dataset,
    embeddings=emb,
    num_dims=2,
    method="umap",
    brain_key="mnist_test",
    verbose=True,
    seed=51,
)

Generating visualization...
UMAP(random_state=51, verbose=True)
Mon Jan  9 12:30:24 2023 Construct fuzzy simplicial set
Mon Jan  9 12:30:24 2023 Finding Nearest Neighbors
Mon Jan  9 12:30:24 2023 Building RP forest with 10 trees
Mon Jan  9 12:30:24 2023 NN descent for 13 iterations
	 1  /  13
	 2  /  13
	 3  /  13
	Stopping threshold met -- exiting after 3 iterations
Mon Jan  9 12:30:25 2023 Finished Nearest Neighbor Search
Mon Jan  9 12:30:25 2023 Construct embedding


Epochs completed:   0%|            0/500 [00:00]

Mon Jan  9 12:30:35 2023 Finished embedding


# Launch Session

In [18]:
fo.config.default_app_port

5151

In [19]:
session = fo.launch_app(dataset)

plot = results.visualize()
plot.show(width=720)

session.plots.attach(plot)





FigureWidget({
    'data': [{'customdata': array(['63bbe25163da0aab13232e30', '63bbe25163da0aab13232e31',
    …