In [1]:
from os.path import join, abspath
from os import listdir, makedirs
import pickle

import numpy as np
import pandas as pd
import h5py
import torch
from sklearn.cluster import KMeans

In [2]:
features_df = pd.read_csv("patch_dataframe.csv")

In [3]:
features_list = []
for f in listdir("_features_logs/"):
    with open(join("_features_logs", f), 'rb') as p:
        feature = pickle.load(p)
    features_list.extend(feature.cpu())

In [4]:
features_df.loc[:, "features"] = features_list
features_df["features"] = features_df["features"].apply(lambda tensor: tensor.tolist())
features_df = features_df.set_index(['file_name'], inplace=False)

In [5]:
save_dir = "."
kl = 9
R = 0.2

j = 0
mosaics = pd.DataFrame(columns=features_df.columns.append(pd.Index(["feature_cluster"])))
for slide_id in features_df.index.unique():
    print(f"started mosaic generation for {slide_id}...")
    kmeans = KMeans(n_clusters=kl, n_init=10, random_state=0)
    features = np.stack(features_df.loc[slide_id, "features"].values)
    kmeans.fit(features)
    features_df.loc[slide_id, 'feature_cluster'] = kmeans.labels_

    slide_df = features_df.loc[slide_id].copy()
    mosaic = pd.DataFrame(columns=slide_df.columns)
    for i in range(kl):
        cluster_patches = slide_df[slide_df.loc[slide_id, "feature_cluster"]==i]
        n_selected = max(1, int(len(cluster_patches) * R))

        km = KMeans(n_clusters=n_selected, n_init=10, random_state=0)
        loc_features = [[row["coord1"], row["coord2"]] for _, row in cluster_patches.iterrows()]
        ds = km.fit_transform(loc_features)

        c_selected_idx = []
        for idx in range(n_selected):
            sorted_idx = np.argsort(ds[:, idx])
            for sidx in sorted_idx:
                if sidx not in c_selected_idx:
                    c_selected_idx.append(sidx)
                    mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
                    break

    path = join(save_dir, "_mosaics_logs")
    makedirs(path, exist_ok=True)
    with open(join(path, f"mosaic{j}.pkl"), "wb") as f:
        pickle.dump(mosaic, f)
    j +=1

    mosaics = pd.concat([mosaics, mosaic], ignore_index=True)

mosaics.to_json(join(save_dir, "mosaics.json"))
# mosaics.to_hdf(join(save_dir, "mosaics.h5"), key="df", mode="w")
# features_df.to_hdf(join(save_dir, "features_with_cluster.h5"), key="df", mode="w")

print("mosaic generation completed successfully.")
    

started mosaic generation for slide1.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaics = pd.concat([mosaics, mosaic], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for slide3.svs...
started mosaic generation for slide2.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


mosaic generation completed successfully.
