In [1]:
import sys
sys.path.append("../src")

import os
from dotenv import load_dotenv

import timm
import torch
import pyvips
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
from timm.data import resolve_data_config
from torch.utils.data.dataloader import DataLoader
from timm.data.transforms_factory import create_transform

from utils import PatchingDataset, get_model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv(os.path.join("..", ".env"))

hf_token = os.getenv('HF_TOKEN')

In [3]:
model = get_model("UNI", "cpu", hf_token)

In [4]:
type(model)

timm.models.vision_transformer.VisionTransformer

In [5]:
model.pretrained_cfg

{'url': 'https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz',
 'hf_hub_id': 'timm/vit_large_patch16_224.augreg_in21k_ft_in1k',
 'architecture': 'vit_large_patch16_224',
 'tag': 'augreg_in21k_ft_in1k',
 'custom_load': True,
 'input_size': (3, 224, 224),
 'fixed_input_size': True,
 'interpolation': 'bicubic',
 'crop_pct': 0.9,
 'crop_mode': 'center',
 'mean': (0.5, 0.5, 0.5),
 'std': (0.5, 0.5, 0.5),
 'num_classes': 1000,
 'pool_size': None,
 'first_conv': 'patch_embed.proj',
 'classifier': 'head'}

In [6]:
data_dir = os.path.join("..", "..", "raw-data", "patches", "experiment-0", "10336")
img_names = os.listdir(data_dir)

print("num images:", len(img_names))

num images: 10282


In [7]:
patching_dataset = PatchingDataset(data_dir)

patch_loader = DataLoader(patching_dataset, batch_size=5, shuffle=False)

In [8]:
results = {
    "coords":[],
    "embedding": []
}

model.eval()

with torch.inference_mode():
    for img, coords, valid_img in tqdm(patch_loader, desc="Embedding Patches"):
        embedding = model(img).cpu().numpy()

        results["coords"].extend(coords)
        results["embedding"].extend(embedding)

        break

Embedding Patches:   0%|          | 0/2057 [00:00<?, ?it/s]


In [9]:
results["embedding"][0].dtype

dtype('float32')

In [10]:
df = pd.DataFrame(results)

df.head()

Unnamed: 0,coords,embedding
0,patch-0-224-0-224,"[0.643192, 0.2199363, 0.45604688, -2.6054518, ..."
1,patch-224-448-0-224,"[0.89091444, 0.32569528, 0.6282622, -2.4381626..."
2,patch-448-672-0-224,"[0.7543198, 0.48000032, 0.80012953, -2.4537742..."
3,patch-672-896-0-224,"[0.80567425, 0.3716954, 0.82282245, -2.481556,..."
4,patch-896-1120-0-224,"[0.80049336, 0.27832162, 0.7032731, -2.4518306..."


In [11]:
df = pd.DataFrame(results)

df.head()

Unnamed: 0,coords,embedding
0,patch-0-224-0-224,"[0.643192, 0.2199363, 0.45604688, -2.6054518, ..."
1,patch-224-448-0-224,"[0.89091444, 0.32569528, 0.6282622, -2.4381626..."
2,patch-448-672-0-224,"[0.7543198, 0.48000032, 0.80012953, -2.4537742..."
3,patch-672-896-0-224,"[0.80567425, 0.3716954, 0.82282245, -2.481556,..."
4,patch-896-1120-0-224,"[0.80049336, 0.27832162, 0.7032731, -2.4518306..."


In [12]:
type(df.iloc[0]["embedding"])

numpy.ndarray

In [13]:
df.to_parquet("test.parquet")

In [14]:
pd.read_parquet("test.parquet").head()

Unnamed: 0,coords,embedding
0,patch-0-224-0-224,"[0.643192, 0.2199363, 0.45604688, -2.6054518, ..."
1,patch-224-448-0-224,"[0.89091444, 0.32569528, 0.6282622, -2.4381626..."
2,patch-448-672-0-224,"[0.7543198, 0.48000032, 0.80012953, -2.4537742..."
3,patch-672-896-0-224,"[0.80567425, 0.3716954, 0.82282245, -2.481556,..."
4,patch-896-1120-0-224,"[0.80049336, 0.27832162, 0.7032731, -2.4518306..."
