In [31]:
import os, torch, numpy as np, pandas as pd
import pickle
import scipy.sparse as sps
from torch_geometric.utils import subgraph, add_remaining_self_loops
from torch_cluster import radius_graph
from collections import Counter
from torch_geometric.data import Data 
from sklearn.preprocessing import LabelEncoder

def return_true_label(patch_info,analysis_type):
    le=LabelEncoder()
    tissue_types=['dermis','subcutaneous tissue','epidermis','hole']
    classes_=np.array(patch_info.columns[6:].tolist()+['unassigned'])
    classes_y=le.fit_transform(classes_)
    classes_dict=dict(zip(le.classes_,classes_y))
    classes_dict['unassigned']=-1
    if analysis_type=='macro':
        for k in tissue_types:
            patch_info.loc[patch_info[k]>0.,'annotation']=k
    patch_info.loc[:,'y_true']=patch_info.loc[:,'annotation'].map(classes_dict)
    return patch_info

# modify, add unassigned
def create_graph_data(basename="163_A1a",
                      analysis_type="tumor",
                      radius=256,
                      min_component_size=600):
    embeddings=torch.load(os.path.join("cnn_embeddings",analysis_type,f"{basename}.pkl"))
    embeddings['patch_info']=return_true_label(embeddings['patch_info'],analysis_type)
    xy=torch.tensor(embeddings['patch_info'][['x','y']].values).float().cuda()
    X=torch.tensor(embeddings['embeddings'])
    y_true=torch.tensor(embeddings['patch_info']['y_true'].values.flatten()).long()
    G=radius_graph(xy, r=radius*np.sqrt(2), batch=None, loop=True)
    G=G.detach().cpu()
    G=add_remaining_self_loops(G)[0]
    xy=xy.detach().cpu()
    datasets=[]
    edges=G.detach().cpu().numpy().astype(int)
    n_components,components=list(sps.csgraph.connected_components(sps.coo_matrix((np.ones_like(edges[0]),(edges[0],edges[1])))))
    comp_count=Counter(components)
    components=torch.LongTensor(components)
    for i in range(n_components):
        if comp_count[i]>=min_component_size:
            G_new=subgraph(components==i,G,relabel_nodes=True)[0]
            xy_new=xy[components==i]
            X_new=X[components==i]
            np.random.seed(42)
            idx=np.arange(X_new.shape[0])
            idx2=np.arange(X_new.shape[0])
            np.random.shuffle(idx)
            train_idx,val_idx,test_idx=torch.tensor(np.isin(idx2,idx[:int(0.8*len(idx))])),torch.tensor(np.isin(idx2,idx[int(0.8*len(idx)):int(0.9*len(idx))])),torch.tensor(np.isin(idx2,idx[int(0.9*len(idx)):]))
            dataset=Data(x=X_new, edge_index=G_new, y_true=y_true[components==i], edge_attr=None, pos=xy_new)
            dataset.mask=y_true[components==i]==-1
            dataset.train_mask=train_idx
            dataset.val_mask=val_idx
            dataset.test_mask=test_idx
            dataset.id=basename
            dataset.component=i
            datasets.append(dataset)
    pickle.dump(datasets,open(os.path.join('graph_datasets',analysis_type,f"{basename}.pkl"),'wb'))

In [32]:
create_graph_data(basename="108_A1c",
                      analysis_type="macro",
                      radius=256,
                      min_component_size=600)

In [34]:
import tqdm,glob
for f in tqdm.tqdm(glob.glob("cnn_embeddings/*/*.pkl")):
    analysis_type,basename=f.replace(".pkl","").split("/")[-2:]
    create_graph_data(basename=basename,
                      analysis_type=analysis_type,
                      radius=256,
                      min_component_size=600)

100%|██████████| 114/114 [12:09<00:00,  6.40s/it]


In [2]:
# GENERATE THUMBNAILS

108_A1c.pkl  15_A1a.pkl  37_A2eX.pkl  44_A1c.pkl  5_A1eX.pkl  85_A1b.pkl
108_A1d.pkl  20_B1d.pkl  3_A2b.pkl    46_A2b.pkl  60_A1c.pkl  90_A2b.pkl
125_A2b.pkl  36_B2e.pkl  3_A2c.pkl    47_A1c.pkl  60_A1d.pkl  91_A2b.pkl
125_A2d.pkl  37_A1c.pkl  41_A2b.pkl   53_A1b.pkl  66_A1b.pkl  9_A7b.pkl
153_A1d.pkl  37_A2d.pkl  43_A2b.pkl   5_A1d.pkl   78_A2d.pkl


In [4]:
import tifffile, os, pandas as pd, cv2, numpy as np, pickle
def generate_thumbnail(graph_dataset_file,compression=16):
    analysis_type,basename=graph_dataset_file.replace(".pkl","").split("/")[-2:]
    img_dirname="new_skin_layers/Skin_Layer_ASAP_TIFF" if analysis_type=='macro' else "new_test_slides/ASAP_Tiff"
    img=tifffile.imread(os.path.join(img_dirname,f"{basename}_ASAP.tif"))
    graph_dataset=pd.read_pickle(graph_dataset_file)
    thumbnails=[]
    for i,graph in enumerate(graph_dataset):
        xmin,ymin,xmax,ymax=np.hstack([graph.pos.min(0).values.numpy(),graph.pos.max(0).values.numpy()]).astype(int)
        arr=img[xmin:xmax,ymin:ymax,:]
        thumbnails.append(cv2.resize(arr,None,fx=1/compression,fy=1/compression,interpolation=cv2.INTER_CUBIC))
    pickle.dump(thumbnails,open(os.path.join("thumbnails",analysis_type,f"{basename}.pkl"),'wb'))

In [7]:
import tqdm, glob
for f in tqdm.tqdm(glob.glob("graph_datasets/*/*.pkl")):
    generate_thumbnail(f,compression=16)

100%|██████████| 114/114 [20:25<00:00, 10.75s/it]
