In [1]:
import sys
sys.path.append("../src")
from utils import *

In [2]:
import pickle

In [3]:
import PIL
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
# load file sizes

img_folder = "../tif_processed"
annots_folder = "../annotations_visium"
adata_file = os.path.join(datadir, "anca_samples_clustered.h5ad")
PIL.Image.MAX_IMAGE_PIXELS = 933120000

In [5]:
data = sc.read(adata_file)
samples = data.obs["Sample"].cat.categories.tolist()

In [6]:
sizes = {}
for sample in samples:
    # img = plt.imread(os.path.join(img_folder, sample+".tif"))
    img = PIL.Image.open(os.path.join(img_folder, sample+".tif"))
    sizes[sample] = img.size[1]

In [7]:
with open(os.path.join(datadir, "image_sizes.pkl"), "wb") as f:
    pickle.dump(sizes, f, protocol=pickle.HIGHEST_PROTOCOL)

In [8]:
def map_coords(x, y, scale_factor=9.913354971):

    x_, y_ = x/scale_factor, y/scale_factor
    return x_, y_

In [9]:
count = 0
data.obs["annot_v1"] = "not_annotated"
for sample in data.obs["Sample"].unique():
    sub = data[data.obs["Sample"]==sample]
    
    hires_scale_factor = data.uns["spatial"][sample]["scalefactors"]['tissue_hires_scalef']
    hires_size = data.uns["spatial"][sample]["images"]["hires"].shape[0]
    scale_tif_to_hires = sizes[sample]/hires_size
    tif_scale_factor = hires_scale_factor*scale_tif_to_hires
    
    annotations = os.listdir(os.path.join(annots_folder, sample))
    for file in annotations:
        if ".csv" in file:
            type_ = file.split(".csv")[0]
            shapes = pd.read_csv(os.path.join(annots_folder, sample, file), index_col=0)
            for shape_n in shapes.index.unique():
                shape = shapes[shapes.index==shape_n].reset_index()
                for i in shape.index:
                    y, x = shape.loc[i,"axis-0"], shape.loc[i, "axis-1"]
                    shape.loc[i, "x"], shape.loc[i, "y"] = map_coords(x, y, tif_scale_factor)
                min_x, min_y = shape["x"].min(), shape["y"].min()
                max_x, max_y = shape["x"].max(), shape["y"].max()
                sample_spots = sub.obsm["spatial"]
                
                mapped_spots = []
                for i in range(len(sample_spots)):
                    x, y = sample_spots[i][0], sample_spots[i][1]
                    if x <= max_x and x >= min_x and y <= max_y and y >= min_y: 
                        mapped_spots.append(i)
                            
                idxs = sub.obs.index[mapped_spots]
                data.obs.loc[idxs, "annot_v1"] = [type_.capitalize()]*len(idxs)
                count = count+1
                data.obs.loc[idxs, "count_v1"] = [count+1]*len(idxs)

In [10]:
data.obs["annot_v1"].value_counts()

not_annotated    10226
Crescent           306
Normal             168
Scarred             47
Cresent             16
Name: annot_v1, dtype: int64

In [11]:
data.obs["annot_v1"].replace({"Cresent": "Crescent"}, inplace=True)

In [12]:
data.obs["annot_v1"].value_counts()

not_annotated    10226
Crescent           322
Normal             168
Scarred             47
Name: annot_v1, dtype: int64

In [13]:
data.write(os.path.join(datadir, "anca_samples_annotated_v1.h5ad"))