## Scope
Starting point: output of AtoMx software in 1 or more folders, flat files (.csv)Read, inspect, annotate and output of AtoMx software.

Requisites:
One or more folders "Exp1", "Exp2".. each containing 3 .csv outputs
- counts_file : ExpN_exprMat_file.csv
- meta_file : ExpN_metadata_file.csv
- fov_file : ExpN_fov_positions_file.csv

A "slide_mapping.tsv" file containing the mapping of the different slides "Exp1", "Exp2"...
i.e. the info of with samples are on which slide. For example: 
name	folder	time_point	individual	tissue	dataset
projX_1	Exp1	TP1	i2	spleen	cosmX
projX_1	Exp1	TP2	i1	spleen	cosmX
Note: rename as "Exp1" may be a very long string and/or not unique
time point, individual, tissue, dataset info are used to annotate the AnnData for later integration

A mapping file : FOV_mapping_corrected.tsv
contains the info of which FOV belongs to which sample on the slide
name	folder	id	FOV_idx	time_point	mouse	tissue	dataset
TOM1	ThomasOttoMouseSlide1060324	1	1	TP_2	m1	spleen	cosmX
TOM1	ThomasOttoMouseSlide1060324	1	2	TP_2	m1	spleen	cosmX

pos_file = {slide_orig}_fov_positions_file.csv"

Optional
img_file : {slide}_IF.jpg"

## Instructions


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import pandas as pd
import os
#import anndata as ad
import squidpy as sq

import warnings
warnings.filterwarnings('ignore')

In [None]:
def read_cosmx(folder, sample, slide):
    # this is a modified version of the importer script
    return sq.read.nanostring_mod(
        path =  f"{folder}/{sample}/",
        slide = slide, 
        counts_file = sample + "_exprMat_file.csv",
        meta_file = sample + "_metadata_file.csv",
        fov_file = sample + "_fov_positions_file.csv",
        )

In [None]:
def plot_fovs_to_slide(pos_file, img_file):
    
    coords = pd.read_csv(pos_file)
    coords["FOV"] = coords["FOV"].astype(str)
    coords["in_metaexpr"] = (coords["FOV"].str.replace("^",slide + "_", regex=True).isin(adata.uns["spatial"]))
    ffovs, totfovs = sum(coords["in_metaexpr"]), len(coords["in_metaexpr"])
    print(f"Found {ffovs} FOVs in metadata out of {totfovs} FOVs in positions file")
    coords  
    
    col = np.where(coords["in_metaexpr"], "cyan" , "red" )
    
    figure, ax = plt.subplots()
    ax.scatter(coords["X_mm"], coords["Y_mm"] , s=30, alpha=0.36, marker="s", c=col)
    for idx, row in coords.iterrows():
            ax.annotate(row["FOV"], (row["X_mm"], row["Y_mm"]) , 
                        textcoords="offset points", xytext=(-1,-1), 
                        fontsize=3, fontweight="bold", fontfamily="sans-serif", color="white" )
    
    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()
    print(xmin, xmax, ymin, ymax)
    plt.imshow(plt.imread(img_file),zorder=0, extent=[xmin, xmax, ymin, ymax])  
    img_out= f"{sample_dir}IF_images/{slide}_fov_positions.from.csv.png"
    figure.savefig(img_out, format="png", dpi=900)

In [None]:
def annotate_fovs(mapping_file, adata, slide, slide_orig):
    
    adata.obs["slide_name"] = slide 
    
    mapping = pd.read_csv(mapping_file, header=0, sep="\t")
    fltr_mapping = mapping[ (mapping["name"] == slide) & (mapping["folder"] == slide_orig ) ]
      
    for idx, row in fltr_mapping.iterrows():
        up_list = ["sample" , "time_point", "mouse", "tissue", "dataset" ]
        in_list = [ row["folder"], row["time_point"], row["mouse"], row["tissue"], row["dataset"] ]
        adata.obs.loc[ adata.obs["fov"] == str(row["FOV_idx"]) , up_list ] = in_list
        
    return adata

In [None]:
def split_anndata(slides_meta_file, adata, slide, slide_orig):
    
    conditions = pd.read_csv(slides_meta_file, header=0, sep="\t")
    fltr_conditions = conditions[ (conditions["name"] == slide) & (conditions["folder"] == slide_orig ) ]
    
    for idx, row in fltr_conditions.iterrows():
        tgt_tp , tgt_tis, tgt_mo = row["time_point"], row["tissue"], row["mouse"]      
        
        name = "_".join([tgt_tp ,tgt_mo, tgt_tis])
        tgt_file = f"{sample_dir}{slide}/{name}_raw.hdf5"
        print(f"=========================\tWriting {name} to {tgt_file}")
        
        subdata = adata[ (adata.obs["time_point"] == tgt_tp) &
            (adata.obs["tissue"] == tgt_tis) &
            (adata.obs["mouse"] == tgt_mo) ]
                        
        print(subdata)
        
        # Optional - Save the annData as hdf5 for later loading
               
        subdata.write(filename= tgt_file)

## Looping across the slides - START HERE

In [None]:
sample_dir = "/mnt/share/Projects/Proj_AN1_P.chabaudi/Datasets/CosmX/"
mapping_f = "FOV_mapping_corrected.tsv"
mapping_s = "slide_mapping.tsv"

conditions = pd.read_csv(sample_dir + mapping_s, header=0, sep="\t")
slide_list = conditions["name"].unique()
print(slide_list)
iter_slides = iter(slide_list)
pm = {}

In [None]:
slide = next(iter_slides)

folders = conditions[conditions["name"] == slide]["folder"].unique()
if len(folders) == 1 :
    slide_orig = folders[0]
else :
    print("Too many or too few working directories were parsed") 

print("Processing slide ", slide) 

pos_file = f"{sample_dir}{slide_orig}/{slide_orig}_fov_positions_file.csv"
img_file = f"{sample_dir}IF_images/{slide}_IF.jpg"

In [None]:
## Reading slide files
# and finding which FOVs are on which tissue
adata = read_cosmx(sample_dir, slide_orig, slide)
print(adata)

# check positions of fovs on the slide to select appropriately which fovs correspond to which sample
# this a partly to fix a bug of AtoMx export that reindexes and shuffles fovs numbers irrespective of the numbers
# selected by the user
# in red are FOVs that exist in positions file but have no counts or metadata, due to failing QC in AtoMx 
# and being discarded
# the background image needs to be in the same coordinate system (same 0 and max for x and y) as the fov positions, i.e. not cropped, but expanded is OK
plot_fovs_to_slide(pos_file, img_file)


In [None]:
# use picture from previous cell to generate FOV_mapping.tsv file
annotate_fovs(sample_dir + mapping_f, adata , slide, slide_orig)
print(adata.obs)

# Subsetting dataframe by FOV
os.makedirs(f"{sample_dir}{slide}", exist_ok = True)
split_anndata(sample_dir + mapping_s, adata , slide, slide_orig)