In [1]:
from os.path import join, abspath
from os import listdir, makedirs
import pickle

import numpy as np
import pandas as pd
import h5py
import torch
from sklearn.cluster import KMeans

In [2]:
features_df = pd.read_csv("patch_dataframe.csv")

In [3]:
features_list = []
for f in listdir("_features_logs/"):
    with open(join("_features_logs", f), 'rb') as p:
        feature = pickle.load(p)
    features_list.extend(feature.cpu())

In [4]:
features_df.loc[:, "features"] = features_list
features_df["features"] = features_df["features"].apply(lambda tensor: tensor.tolist())
features_df = features_df.set_index(['file_name'], inplace=False)

In [5]:
save_dir = "."
kl = 9
R = 0.2

j = 0
mosaics = pd.DataFrame(columns=features_df.columns.append(pd.Index(["feature_cluster"])))
for slide_id in features_df.index.unique():
    print(f"started mosaic generation for {slide_id}...")
    kmeans = KMeans(n_clusters=kl, n_init=10, random_state=0)
    features = np.stack(features_df.loc[slide_id, "features"].values)
    kmeans.fit(features)
    features_df.loc[slide_id, 'feature_cluster'] = kmeans.labels_

    slide_df = features_df.loc[slide_id].copy()
    mosaic = pd.DataFrame(columns=slide_df.columns)
    for i in range(kl):
        cluster_patches = slide_df[slide_df.loc[slide_id, "feature_cluster"]==i]
        n_selected = max(1, int(len(cluster_patches) * R))

        km = KMeans(n_clusters=n_selected, n_init=10, random_state=0)
        loc_features = [[row["coord1"], row["coord2"]] for _, row in cluster_patches.iterrows()]
        ds = km.fit_transform(loc_features)

        c_selected_idx = []
        for idx in range(n_selected):
            sorted_idx = np.argsort(ds[:, idx])
            for sidx in sorted_idx:
                if sidx not in c_selected_idx:
                    c_selected_idx.append(sidx)
                    mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
                    break

    path = join(save_dir, "_mosaics_logs")
    makedirs(path, exist_ok=True)
    with open(join(path, f"mosaic{j}.pkl"), "wb") as f:
        pickle.dump(mosaic, f)
    j +=1

    mosaics = pd.concat([mosaics, mosaic], ignore_index=True)

mosaics.to_json(join(save_dir, "mosaics.json"))
# mosaics.to_hdf(join(save_dir, "mosaics.h5"), key="df", mode="w")
# features_df.to_hdf(join(save_dir, "features_with_cluster.h5"), key="df", mode="w")

print("mosaic generation completed successfully.")
    

started mosaic generation for O09-03495.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaics = pd.concat([mosaics, mosaic], ignore_index=True)


started mosaic generation for O10-12717.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for O14-02301.svs...
started mosaic generation for O16-11870.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for O16-18464.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for O17-01529.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for O18-12772.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S08-31466.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S09-15833.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S09-19750.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S10-00647.svs...
started mosaic generation for S10-09633.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S11-15267.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S11-24835.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S12-07776.svs...
started mosaic generation for S12-22754.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S12-28303.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S12-35886.svs...
started mosaic generation for S13-07627.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S13-08586.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S13-11381.svs...
started mosaic generation for S13-15979.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S13-31494.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S13-33035.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-01774.svs...
started mosaic generation for S14-04504.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-12226.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-12770.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-12928.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-13440.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-13994.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S14-15197.svs...
started mosaic generation for S14-16118.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-00942.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-02790.svs...
started mosaic generation for S15-03056.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-05781.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-05979.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-06857.svs...
started mosaic generation for S15-10140.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-12411.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-12838.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-13623.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-14834.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-15318.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-20350.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-27036.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S15-28568.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-00356.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-01567.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-06544.svs...
started mosaic generation for S16-08071.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-11632.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-12501.svs...
started mosaic generation for S16-19596.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-20727.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-23506.svs...
started mosaic generation for S16-28041.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-28729.svs...
started mosaic generation for S16-29941.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-30001.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-30545.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S16-32975.svs...
started mosaic generation for S17-00215.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S17-00970.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S17-08236.svs...
started mosaic generation for S17-08865.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S17-08918.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S17-14190.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S17-14334.svs...
started mosaic generation for S17-20728.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S17-22163.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-03825.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-03934.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-05363.svs...
started mosaic generation for S18-09519.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-10074.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-12553.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-15441.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-18704.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-25097.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-27506.svs...
started mosaic generation for S18-30424.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)
  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


started mosaic generation for S18-31022.svs...
started mosaic generation for S18-32412.svs...


  mosaic = pd.concat([mosaic, cluster_patches.iloc[sidx:sidx+1]], ignore_index=True)


mosaic generation completed successfully.
