In [1]:
import sys

sys.path.append("../src")

In [2]:
import os

import cv2
import pyvips
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from empatches import EMPatches

from utils import extract_coords

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_checkpoint(coords):
    for i, index in enumerate(coords):
        if index[2] != 0:
            checkpoint = i
            
            return checkpoint

def adjust_coords(coords, new_size):
    checkpoint = get_checkpoint(coords)
    placeholder = [0, new_size, 0, new_size]
    adjusted_coords = []

    for i in range(len(coords)):
        if i % checkpoint == 0 and i > 0:
            placeholder[0] = 0
            placeholder[1] = new_size

            placeholder[2] += new_size
            placeholder[3] += new_size

        y1, y2, = placeholder[0], placeholder[1]
        x1, x2 = placeholder[2], placeholder[3]
        adjusted_coords.append((y1, y2, x1, x2))

        placeholder[0] += new_size
        placeholder[1] += new_size

    return adjusted_coords

In [4]:
data_dir = os.path.join("..", "..", "raw-data", "embeddings", "experiment-0", "UNI")
df = pd.read_parquet(os.path.join(data_dir, "10490.parquet"))

df["processed_coords"] = df["coords"].map(lambda x: extract_coords(x))
df = df.sort_values(by="processed_coords", key=lambda col: col.map(lambda x: (x[2], x[3], x[0], x[1])))

df.head()

Unnamed: 0,coords,embedding,processed_coords
0,patch-0-224-0-224,"[0.73210615, 0.6386739, -0.11966013, -2.685702...","(0, 224, 0, 224)"
9150,patch-224-448-0-224,"[0.9347259, 0.3088819, 0.49207827, -2.511515, ...","(224, 448, 0, 224)"
24900,patch-448-672-0-224,"[0.66905344, 0.8382592, 0.56285363, -2.6083739...","(448, 672, 0, 224)"
26550,patch-672-896-0-224,"[0.712735, 0.099611245, 0.5998612, -2.7894793,...","(672, 896, 0, 224)"
28200,patch-896-1120-0-224,"[0.9970455, 0.22914936, 0.63671064, -2.5378096...","(896, 1120, 0, 224)"


In [5]:
df.shape

(29100, 3)

In [6]:
test = df.iloc[0]["embedding"]
test.shape

(1024,)

In [7]:
test.reshape(1, 1, 1024).shape

(1, 1, 1024)

In [8]:
def merge_patches(patches, coords):
    orig_height = max([c[1] for c in coords])
    orig_width = max([c[3] for c in coords])
    
    num_channels = patches[0].shape[-1]
    merged_img = np.zeros((orig_height, orig_width, num_channels)).astype(np.float32)

    for i, coord in enumerate(coords):
        merged_img[coord[0] : coord[1], coord[2] : coord[3], :] = patches[i]

    return merged_img


In [9]:
df["embedding"] = df["embedding"].map(lambda x: x.reshape(1, 1, 1024))

df.iloc[0]["embedding"].shape

(1, 1, 1024)

In [10]:
new_size = 1

embeddings = df["embedding"].values

In [11]:
len(embeddings)

29100

In [12]:
test = df.iloc[0]["coords"]

test

'patch-0-224-0-224'

In [13]:
extract_coords(test)

(0, 224, 0, 224)

In [14]:
coords = df["coords"].map(lambda x: extract_coords(x)).values

coords

array([(0, 224, 0, 224), (224, 448, 0, 224), (448, 672, 0, 224), ...,
       (42784, 43008, 33376, 33600), (43008, 43232, 33376, 33600),
       (43232, 43456, 33376, 33600)], dtype=object)

In [15]:
orig_height = max([c[1] for c in coords])
orig_width = max([c[3] for c in coords])

print(orig_height, orig_width)

43456 33600


In [16]:
get_checkpoint(coords)

194

In [17]:
adjusted_coords = adjust_coords(coords, new_size)

In [18]:
adjusted_coords

[(0, 1, 0, 1),
 (1, 2, 0, 1),
 (2, 3, 0, 1),
 (3, 4, 0, 1),
 (4, 5, 0, 1),
 (5, 6, 0, 1),
 (6, 7, 0, 1),
 (7, 8, 0, 1),
 (8, 9, 0, 1),
 (9, 10, 0, 1),
 (10, 11, 0, 1),
 (11, 12, 0, 1),
 (12, 13, 0, 1),
 (13, 14, 0, 1),
 (14, 15, 0, 1),
 (15, 16, 0, 1),
 (16, 17, 0, 1),
 (17, 18, 0, 1),
 (18, 19, 0, 1),
 (19, 20, 0, 1),
 (20, 21, 0, 1),
 (21, 22, 0, 1),
 (22, 23, 0, 1),
 (23, 24, 0, 1),
 (24, 25, 0, 1),
 (25, 26, 0, 1),
 (26, 27, 0, 1),
 (27, 28, 0, 1),
 (28, 29, 0, 1),
 (29, 30, 0, 1),
 (30, 31, 0, 1),
 (31, 32, 0, 1),
 (32, 33, 0, 1),
 (33, 34, 0, 1),
 (34, 35, 0, 1),
 (35, 36, 0, 1),
 (36, 37, 0, 1),
 (37, 38, 0, 1),
 (38, 39, 0, 1),
 (39, 40, 0, 1),
 (40, 41, 0, 1),
 (41, 42, 0, 1),
 (42, 43, 0, 1),
 (43, 44, 0, 1),
 (44, 45, 0, 1),
 (45, 46, 0, 1),
 (46, 47, 0, 1),
 (47, 48, 0, 1),
 (48, 49, 0, 1),
 (49, 50, 0, 1),
 (50, 51, 0, 1),
 (51, 52, 0, 1),
 (52, 53, 0, 1),
 (53, 54, 0, 1),
 (54, 55, 0, 1),
 (55, 56, 0, 1),
 (56, 57, 0, 1),
 (57, 58, 0, 1),
 (58, 59, 0, 1),
 (59, 60, 0, 1),

In [19]:
list(coords)

[(0, 224, 0, 224),
 (224, 448, 0, 224),
 (448, 672, 0, 224),
 (672, 896, 0, 224),
 (896, 1120, 0, 224),
 (1120, 1344, 0, 224),
 (1344, 1568, 0, 224),
 (1568, 1792, 0, 224),
 (1792, 2016, 0, 224),
 (2016, 2240, 0, 224),
 (2240, 2464, 0, 224),
 (2464, 2688, 0, 224),
 (2688, 2912, 0, 224),
 (2912, 3136, 0, 224),
 (3136, 3360, 0, 224),
 (3360, 3584, 0, 224),
 (3584, 3808, 0, 224),
 (3808, 4032, 0, 224),
 (4032, 4256, 0, 224),
 (4256, 4480, 0, 224),
 (4480, 4704, 0, 224),
 (4704, 4928, 0, 224),
 (4928, 5152, 0, 224),
 (5152, 5376, 0, 224),
 (5376, 5600, 0, 224),
 (5600, 5824, 0, 224),
 (5824, 6048, 0, 224),
 (6048, 6272, 0, 224),
 (6272, 6496, 0, 224),
 (6496, 6720, 0, 224),
 (6720, 6944, 0, 224),
 (6944, 7168, 0, 224),
 (7168, 7392, 0, 224),
 (7392, 7616, 0, 224),
 (7616, 7840, 0, 224),
 (7840, 8064, 0, 224),
 (8064, 8288, 0, 224),
 (8288, 8512, 0, 224),
 (8512, 8736, 0, 224),
 (8736, 8960, 0, 224),
 (8960, 9184, 0, 224),
 (9184, 9408, 0, 224),
 (9408, 9632, 0, 224),
 (9632, 9856, 0, 224),

In [20]:
patch_embeddings = df["embedding"].tolist()

patch_embeddings

[array([[[ 0.73210615,  0.6386739 , -0.11966013, ...,  1.2097605 ,
          -0.10778331,  0.24512187]]], dtype=float32),
 array([[[ 0.9347259 ,  0.3088819 ,  0.49207827, ...,  1.2703462 ,
          -0.6076199 ,  1.517688  ]]], dtype=float32),
 array([[[ 0.66905344,  0.8382592 ,  0.56285363, ...,  0.95535314,
          -0.23885727, -0.5314308 ]]], dtype=float32),
 array([[[0.712735  , 0.09961125, 0.5998612 , ..., 0.61256605,
          0.23572394, 0.07184948]]], dtype=float32),
 array([[[ 0.9970455 ,  0.22914936,  0.63671064, ...,  0.7486357 ,
          -0.25068116, -0.24798729]]], dtype=float32),
 array([[[ 1.0043981 ,  0.7118375 ,  0.58220035, ...,  0.92733634,
          -0.25411823, -0.8034626 ]]], dtype=float32),
 array([[[ 0.9999612 ,  0.54993004,  0.7385348 , ...,  0.80360705,
          -0.22085707, -0.3327944 ]]], dtype=float32),
 array([[[ 0.66881984,  0.5808019 ,  0.92981154, ...,  0.61421686,
          -0.09401707, -0.02768268]]], dtype=float32),
 array([[[ 1.4986562 ,  0.6336

In [21]:
patch_embeddings[0].shape[-1]

1024

In [22]:
merged = merge_patches(patch_embeddings, adjusted_coords)

merged.shape

(194, 150, 1024)

In [23]:
# emp = EMPatches()
# merged = emp.merge_patches(patch_embeddings, adjusted_coords)

# merged.shape