In [5]:
import numpy as np
import networkx as nx
import pandas as pd
import anndata as ad
from anndata import AnnData
import scanpy as sc


In [None]:
def get_modality_ratio_score(adata_st: AnnData, adata_sc: AnnData, obs_key: str = "celltype", k: int = 45):
    adata_st.obs["modality"] = "spatial"
    adata_sc.obs["modality"] = "sc"
    adata = ad.concat([adata_st, adata_sc], join='inner')  #wieso inner join?

    # Set counts to log norm data
    adata.X = adata.layers["lognorm"]
    
    # Calculate PCA (Note: we could also think about pca per cell type...)
    assert (adata.obsm is None) or ('X_pca' not in adata.obsm), "PCA already exists."
    sc.tl.pca(adata)
    
    # get cell type groups
    sc_cts = set(adata_sc.obs["celltype"].cat.categories)
    st_cts = set(adata_st.obs["celltype"].cat.categories)
    all_cts = list(sc_cts.union(st_cts))
    shared_cts = list(sc_cts.intersection(st_cts))          #welche celltypes verwenden, wieso diese?

    # Get ratio per shared cell type
    df = pd.DataFrame(columns=["celltype","cell_id","ratio"])
    scores = {ct:np.nan for ct in all_cts}
    for ct in shared_cts:
        # enough_cells = (adata.obs.loc[adata.obs[obs_key]==ct,"modality"].value_counts() > (ct_filter_factor * k)).all()     #weglassen?
        a = adata[adata.obs[obs_key]==ct]
        exp_val = (a.obs.loc[a.obs["modality"]=="sc"].shape[0])/a.obs.shape[0]
        sc.pp.neighbors(a,n_neighbors=k)
        G = nx.Graph(incoming_graph_data=a.obsp["connectivities"])
        nx.set_node_attributes(G, {i:a.obs["modality"].values[i] for i in range(G.number_of_nodes())}, "modality")    #wie Knoten nummer?
        # scores[ct] = np.clip(-nx.attribute_assortativity_coefficient(G, "modality") + 1, 0, 1)
        for cell in G.nodes():
            number_modality_sc = sum(1 for neighbor in G.neighbors(cell) if G.nodes["cell"]["modality"]=="sc")
            total_cells = G.degree(cell)
            f = lambda x: 0 if x<= 0 or x>=1 else (x/exp_val if x>0 and x<=exp_val else x/(exp_val-1)+1/(1-exp_val))
            ratio = number_modality_sc/total_cells
            score = f(ratio)
            # if total_cells != 0:              #0 nicht möglich? immer k?
            #     ratio = number_modality_sc / total_cells
            # else:
            #     ratio = 0        #no neighbors
            df.append({"cell_id": "cell_id", "celltype": ct, "ratio": ratio, "exp_val":exp_val, "score": score})  #wie machen mit cell_id? ineffizient

    return df
