In [2]:
import numpy as np
import networkx as nx
import pandas as pd
import anndata as ad
from anndata import AnnData
import scanpy as sc

In [3]:
adata_exp0 = ad.read_h5ad('C:/Users/mdichgan/Documents/Helmholtz/send_to_Jakob/spatial/counts_CPc_exp0_BA28.h5ad')
adata_Yao = ad.read_h5ad(
    'C:/Users/mdichgan/Documents/Helmholtz/send_to_Jakob/sc/Yao_150kcells_subsample_with_annotations_sparse_subset.h5ad')

In [4]:
adata_Yao.obs["celltype"] = adata_Yao.obs["label"]

In [5]:
sc.pp.normalize_total(adata_exp0)
sc.pp.normalize_total(adata_Yao)
# sc.pp.log1p(adata_exp0)
# sc.pp.log1p(adata_Yao)
adata_exp0.layers["lognorm"] = adata_exp0.X
adata_Yao.layers["lognorm"] = adata_Yao.X

In [7]:
#TODO: fix the NumbaDeprecationWarning - maybe with @numba.jit(nopython=True)?

def get_knn_mixing_score(adata_st: AnnData, adata_sc: AnnData, obs_key: str = "celltype", k: int = 45,ct_filter_factor: float = 2):
    """Get column in adata_sp.obs with knn mixing score.

    For this we concatenate the spatial and single cell datasets, compute the neighborsgraph for eligible celltypes, get the expected value for the
    modality ratio, compute the actual ratio for each cell and assign a the knn mixing score.

    Parameters
    ----------
    adata_sp : AnnData
        Annotated ``AnnData`` object with counts from spatial data
    adata_sc : AnnData
        Annotated ``AnnData`` object with counts scRNAseq data
    """

    adata_st.obs["modality"] = "spatial"
    adata_sc.obs["modality"] = "sc"
    adata = ad.concat([adata_st, adata_sc])
 
    adata_st.obs["score"] = np.zeros(adata_st.n_obs)  

    # Set counts to log norm data
    adata.X = adata.layers["lognorm"]
    
    # Calculate PCA (Note: we could also think about pca per cell type...)
    assert (adata.obsm is None) or ('X_pca' not in adata.obsm), "PCA already exists."
    sc.tl.pca(adata)
    
    # get cell type groups
    sc_cts = set(adata_sc.obs["celltype"].cat.categories)
    st_cts = set(adata_st.obs["celltype"].cat.categories)
    shared_cts = list(sc_cts.intersection(st_cts))         

    # Get ratio per shared cell type
    for ct in shared_cts:
        enough_cells = (adata.obs.loc[adata.obs[obs_key]==ct,"modality"].value_counts() > (ct_filter_factor * k)).all()     #nochmal: wieso ct_fil?
        if enough_cells:
            a = adata[adata.obs[obs_key]==ct]
            exp_val = (a.obs.loc[a.obs["modality"]=="sc"].shape[0])/a.obs.shape[0]  #sinnvoller EW?
            sc.pp.neighbors(a,n_neighbors=k)
            G = nx.Graph(incoming_graph_data=a.obsp["connectivities"])
            nx.set_node_attributes(G, {i:a.obs["modality"].values[i] for i in range(G.number_of_nodes())}, "modality")   

            ct_df = np.zeros(a.obs.shape[0])
            f = np.vectorize(lambda x: x/exp_val if x>=0 and x<=exp_val else x/(exp_val-1)+1/(1-exp_val))
            i = 0
            for cell in G.nodes():
                ct_df[i] = sum(1 for neighbor in G.neighbors(cell) if G.nodes[neighbor]["modality"]=="sc")  #number_modality_sc
                ct_df[i] = ct_df[i]/G.degree(cell)      #ratio: number modality sc / total cells
                i += 1 
            
            a.obs["score"] = f(ct_df)
            adata_st.obs.loc[adata_st.obs["celltype"] == ct, "score"] = a.obs.loc[a.obs["modality"]=="spatial","score"]


In [11]:
adata_st, adata_sc = adata_exp0, adata_Yao
obs_key = "celltype"
k = 45
ct_filter_factor = 2


adata_st.obs["modality"] = "spatial"
adata_sc.obs["modality"] = "sc"
adata = ad.concat([adata_st, adata_sc], join='inner')  

adata_st.obs["score"] = np.zeros(adata_st.n_obs)

# Set counts to log norm data
adata.X = adata.layers["lognorm"]

# Calculate PCA (Note: we could also think about pca per cell type...)
assert (adata.obsm is None) or ('X_pca' not in adata.obsm), "PCA already exists."
sc.tl.pca(adata)

# get cell type groups
sc_cts = set(adata_sc.obs["celltype"].cat.categories)
st_cts = set(adata_st.obs["celltype"].cat.categories)
shared_cts = list(sc_cts.intersection(st_cts))     

In [12]:
ct = "Astro"
enough_cells = (adata.obs.loc[adata.obs[obs_key]==ct,"modality"].value_counts() > (ct_filter_factor * k)).all()     #nochmal: wieso ct_fil?

a = adata[adata.obs[obs_key]==ct]
exp_val = (a.obs.loc[a.obs["modality"]=="sc"].shape[0])/a.obs.shape[0]  #sinnvoller EW?
sc.pp.neighbors(a,n_neighbors=k)
G = nx.Graph(incoming_graph_data=a.obsp["connectivities"])
nx.set_node_attributes(G, {i:a.obs["modality"].values[i] for i in range(G.number_of_nodes())}, "modality")   

ct_df = np.zeros(a.obs.shape[0])
f = np.vectorize(lambda x: x/exp_val if x>=0 and x<=exp_val else x/(exp_val-1)+1/(1-exp_val))
i = 0

for cell in G.nodes():
    ct_df[i] = sum(1 for neighbor in G.neighbors(cell) if G.nodes[neighbor]["modality"]=="sc")  #number_modality_sc
    ct_df[i] = ct_df[i]/G.degree(cell)      #ratio: number modality sc / total cells
    i += 1 

a.obs["score"] = f(ct_df)
adata_st.obs.loc[adata_st.obs["celltype"] == ct, "score"] = a.obs.loc[a.obs["modality"]=="spatial","score"]


In [10]:
adata_exp0.obs.loc[adata_exp0.obs["celltype"] == "Astro"]

Unnamed: 0,cell_id,ct_majority,ct_majority_cert,celltype,n_counts,n_genes,area,modality,score
Cell_45,1058,Astro,0.504065,Astro,123.0,48,1082.00,spatial,0.273932
Cell_57,1272,Astro,1.000000,Astro,77.0,27,724.60,spatial,0.360025
Cell_118,1308,Astro,0.463576,Astro,151.0,41,1862.00,spatial,0.000000
Cell_122,1191,Astro,0.914439,Astro,187.0,58,1742.00,spatial,0.000000
Cell_126,1394,Astro,1.000000,Astro,88.0,38,616.30,spatial,0.802997
...,...,...,...,...,...,...,...,...,...
Cell_23239,21597,Astro,1.000000,Astro,17.0,13,91.91,spatial,0.000000
Cell_23254,22693,Astro,1.000000,Astro,14.0,11,52.76,spatial,0.134052
Cell_23262,22052,Astro,1.000000,Astro,11.0,7,157.20,spatial,0.108628
Cell_23263,20868,Astro,1.000000,Astro,34.0,15,319.30,spatial,0.000000


In [8]:
#test get_modality_ratio_score
get_knn_mixing_score(adata_exp0, adata_Yao)       

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit(
  @numba.jit(
  @numba.jit()


In [9]:
adata_exp0.obs.loc["score"]

Unnamed: 0,cell_id,ct_majority,ct_majority_cert,celltype,n_counts,n_genes,area,modality,score
Cell_0,1855,CA3,0.859629,CA3,862.0,104,7410.0,spatial,0.106910
Cell_1,21448,L6 CT CTX,1.000000,L6 CT CTX,35.0,20,361.7,spatial,0.153600
Cell_2,1464,Endo,1.000000,Endo,85.0,28,681.2,spatial,0.000000
Cell_3,1646,CA3,0.594203,CA3,276.0,71,2597.0,spatial,0.000000
Cell_4,21882,Oligo,1.000000,Oligo,39.0,16,485.3,spatial,0.000000
...,...,...,...,...,...,...,...,...,...
Cell_23277,22767,Micro-PVM,1.000000,Micro-PVM,2.0,2,,spatial,0.780965
Cell_23278,22837,Endo,1.000000,Endo,2.0,1,,spatial,0.793459
Cell_23279,23113,Endo,1.000000,Endo,1.0,1,,spatial,0.000000
Cell_23280,23114,CR,1.000000,CR,2.0,2,,spatial,0.000000
