In [2]:
import os
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
from tifffile import imwrite
from PIL import Image
from tqdm import tqdm
from copy import deepcopy
import json
from skimage.color import gray2rgb


In [24]:
LEVEL = 1

In [3]:
adata = sc.read_h5ad("/data/projects/robin/segmentation/xenium_outs/merged_processed.h5ad")

In [5]:
adata.obs["Slide_ID"].value_counts()

Slide_ID
0011216    502183
0011287    447138
0011695    428990
0011546    407882
0011707    396753
0018775    380044
0011284    360671
0011762    307083
Name: count, dtype: int64

In [6]:
savedir = "data_level_1/"

In [7]:
savedir_adata = os.path.join(savedir, "adata")
savedir_imgs = os.path.join(savedir, "dapi")
savedir_adata, savedir_imgs

('data_level_1/adata', 'data_level_1/dapi')

In [21]:
sample = [s.split("_")[0] for s in adata.obs['Patient_Sample_ID']]
adata.obs["sample"] = sample
sample_set = adata.obs["sample"].unique()
sample_set

array(['X40', 'X39', 'X38', 'X37', 'X36', 'X35', 'X34', 'X33', 'X63',
       'X61', 'X62', 'X60', 'X59', 'X58', 'X57', 'X32', 'X31', 'X30',
       'X29', 'X28', 'X27', 'X26', 'X25', 'X56', 'X55', 'X54', 'X53',
       'X52', 'X50', 'X51', 'X49', 'X8', 'X7', 'X6', 'X4', 'X5', 'X2',
       'X3', 'X1', 'X48', 'X47', 'X46', 'X45', 'X44', 'X43', 'X42', 'X41',
       'X16', 'X15', 'X14', 'X12', 'X13', 'X11', 'X10', 'X9', 'X24',
       'X23', 'X22', 'X21', 'X20', 'X19', 'X18', 'X17'], dtype=object)

In [25]:
def cap(val, index = 0):
    if val < 0:
        val = 0
    # elif val > img.shape[index]:
    #     val = img.shape[index]
    return val

In [None]:
for sample in tqdm(sample_set):
    
    sub = adata[adata.obs["sample"]==sample]
    ID = sub.obs.Slide_ID.unique()[0]
    scale_factor = sub.uns["spatial"][ID]["scalefactors"][f"tissue_dapi_LEVEL{LEVEL}_scalef"]
    #ax=sc.pl.embedding(sub, basis="spatial", s=20, show=False)
    ylim=(sub.obsm["spatial"][:,1].min()-350, sub.obsm["spatial"][:,1].max()+350)
    xlim=(sub.obsm["spatial"][:,0].min()-350, sub.obsm["spatial"][:,0].max()+350)
    ylim = tuple(np.array(ylim)*scale_factor)
    xlim = tuple(np.array(xlim)*scale_factor)
    ylim = tuple([cap(val, 1) for val in ylim])
    xlim = tuple([cap(val, 0) for val in xlim])

    #plt.show()
    #plt.clf()

    img = sub.uns["spatial"][ID]["images"][f"dapi_LEVEL{LEVEL}"]
    cur_coords = np.concatenate([xlim, ylim])
    img_cropped = img[int(cur_coords[2]): int(np.ceil(cur_coords[3])),\
                     int(cur_coords[0]): int(np.ceil(cur_coords[1]))]

    sub.uns["spatial"][ID]["images"][f"dapi_LEVEL{LEVEL}"+"_cropped"] = img_cropped
    sub.uns["spatial"][ID]["scalefactors"]["tissue_dapi_LEVEL{LEVEL}_cropped_scalef"] = 1

    sub.obsm["spatial"][:,0] = sub.obsm["spatial"][:,0]*scale_factor-int(cur_coords[0])
    sub.obsm["spatial"][:,1] = sub.obsm["spatial"][:,1]*scale_factor-int(cur_coords[2])

    keys = list(sub.uns["spatial"].keys())
    newdict = deepcopy(sub.uns["spatial"])
    for key in keys:
        if key!=ID:
            del newdict[key]
    sub.uns["spatial"] = newdict
    
    img_to_save = Image.fromarray((sub.uns["spatial"][ID]["images"][f"dapi_LEVEL{LEVEL}"+"_cropped"]*255).astype(np.uint8))
    im_arr = (sub.uns["spatial"][ID]["images"][f"dapi_LEVEL{LEVEL}"+"_cropped"]*255).astype(np.uint8)
    folder = sub.obs.Slide_ID.unique()[0]
    if not os.path.exists(os.path.join(savedir_imgs, folder)):
        os.mkdir(os.path.join(savedir_imgs, folder))
    if not os.path.exists(os.path.join(savedir_adata, folder)):
        os.mkdir(os.path.join(savedir_adata, folder))
    img_to_save.save(os.path.join(savedir_imgs, f"{sample.split('_')[1]}.png"),"PNG")
    imwrite(os.path.join(savedir_imgs, f"{sample.split('_')[1]}.tiff"), im_arr)
    sub.write(os.path.join(savedir_adata, f"{sample.split('_')[1]}.h5ad"))

  sub.obsm["spatial"][:,0] = sub.obsm["spatial"][:,0]*scale_factor-int(cur_coords[0])
