In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
from anndata import AnnData
import anndata
import matplotlib as mpl
import matplotlib.pyplot as plt
from tifffile import imread
from scipy.ndimage import gaussian_filter
from scipy.sparse import csr_matrix, isspmatrix
import itertools

In [None]:
adata_exp0 = anndata.read_h5ad('C:/Users/mdichgan/Documents/Helmholtz/send_to_Jakob/spatial/counts_CPc_exp0_BA28.h5ad')

In [None]:
adata_Yao = anndata.read_h5ad(
    'C:/Users/mdichgan/Documents/Helmholtz/send_to_Jakob/sc/Yao_150kcells_subsample_with_annotations_sparse_subset.h5ad')

In [None]:
adata_Yao.obs["celltype"] = adata_Yao.obs["label"]

In [None]:
#helper function
def check_crop_exists(x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray):
    """Check if crop coordinates exist.
    
    For this, we check if either (x_min, x_max, y_min, y_max) or an image was provided. If not, we raise a ValueError. 

    Parameters
    ----------
    x_min: int, x_max: int, y_min: int, y_max: int
        crop coordinates
    image: np.ndarray

    Returns
    -------
    if no ValueError was raised, returns range  
    """
    if (x_min is None or x_max is None or y_min is None or y_max is None) and image is None:
        raise ValueError("please provide an image or crop")         
        
    if x_min is not None and x_max is not None and y_min is not None and y_max is not None:
        range = [[x_min,x_max],[y_min,y_max]]
    
    else:
        range = [[0,image.shape[0]],[0,image.shape[1]]]
        
    return range 

In [None]:
def matrix_colorbar_plot(matrix: np.ndarray, title: str, x_min: int, x_max: int, y_min: int, y_max: int, smooth: float = 0, show_ticks: bool = False):
    """Display (smoothed and cropped) matrix as an image with a colorbar and title.
    
    Parameters
    ----------
    matrix: np.ndarray
        data
    title: str
    x_min: int, x_max: int, y_min: int, y_max: int
        crop coordinates
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """
    
    matrix = gaussian_filter(matrix,sigma=smooth)
    fig = plt.figure()
    ax = fig.add_subplot(title = title)
    plot = plt.imshow(matrix, interpolation='nearest', extent=[x_min, x_max, y_max, y_min])
    fig.colorbar(plot)
    
    if not show_ticks:
        ax.tick_params(which='both', bottom=False, left=False, labelbottom = False, labelleft = False)

In [None]:
def get_negative_marker_dict(adata_sp: AnnData, adata_sc: AnnData, key: str='celltype'):
    """Add dictionary of negative marker genes for different celltypes.
    
    Parameters
    ----------
    adata_sp : AnnData
        Annotated ``AnnData`` object with counts from spatial data
    adata_sc : AnnData
        Annotated ``AnnData`` object with counts scRNAseq data
    key : str
        Celltype key in adata_sp.obs and adata_sc.obs

    Returns
    -------
    neg_marker_dict : dict[string: list]
        Dictionary with celltypes as keys and corresponding negative marker genes as values
    """

    # Set threshold parameters
    min_number_cells=10 # minimum number of cells belonging to a cluster to consider it in the analysis
    max_ratio_cells=0.005 # maximum ratio of cells expressing a marker to call it a negative marker gene-ct pair

    #check that genes in spatial data is subset of genes in single cell data
    adata_sp = adata_sp[:,adata_sp.var_names.isin(adata_sc.var_names)]

    # Subset adata_sc to genes of spatial data
    adata_sc = adata_sc[:,adata_sp.var_names]           
    
    adata_sc.layers["raw"] = adata_sc.X

    # TMP fix for sparse matrices, ideally we don't convert, and instead have calculations for sparse/non-sparse
    if isspmatrix(adata_sc.layers["raw"]):                                  
        adata_sc.layers["raw"] = adata_sc.layers["raw"].toarray()

    # Get cell types that we find in both modalities
    shared_celltypes = adata_sc.obs.loc[adata_sc.obs[key].isin(adata_sp.obs[key]),key].unique()
    
    # Filter cell types by minimum number of cells
    celltype_count_sc = adata_sc.obs[key].value_counts().loc[shared_celltypes]
    celltype_count_sp = adata_sc.obs[key].value_counts().loc[shared_celltypes]   
    ct_filter = (celltype_count_sc >= min_number_cells) & (celltype_count_sp >= min_number_cells)
    celltypes = celltype_count_sc.loc[ct_filter].index.tolist()             
    
    
    # Filter cells to eligible cell types
    adata_sc = adata_sc[adata_sc.obs[key].isin(celltypes)]
    genes = adata_sc.var_names
    
    # Get ratio of positive cells per cell type
    pos_exp_sc = pd.DataFrame(adata_sc.layers["raw"] > 0,columns=adata_sp.var_names)     
    pos_exp_sc['celltype'] = list(adata_sc.obs[key])
    ratio_celltype_sc = pos_exp_sc.groupby('celltype').mean()
    
    # Get gene-cell type pairs with negative marker expression
    neg_marker_mask = np.array(ratio_celltype_sc < max_ratio_cells)
    

    neg_marker_mask = pd.DataFrame(neg_marker_mask, columns = genes, index = ratio_celltype_sc.index)   
    neg_marker_dict = {celltype: list(row.index[row]) for celltype, row in neg_marker_mask.iterrows()}

    return neg_marker_dict


In [None]:
#test get_negative_marker_dict
dict_exp0 = get_negative_marker_dict(adata_sp = adata_exp0, adata_sc = adata_Yao, key ='celltype')
len(dict_exp0.keys())

In [None]:
def get_neg_mark_read_in_corr_ct(adata_sp: AnnData, adata_sc: AnnData):

    """Add column in_correct_celltype.

       in_correct_celltype is False, if celltype of cell is in the keys of the neg_marker_dict and gene is a negative marker for the celltype. 
       in_correct_celltype is True, if gene is  not a negative marker for any celltype in neg_marker_dict or if celltype of cell is in the keys
       of the neg_marker_dict and gene is not a negative marker for the celltype. Otherwise in_correct_celltype is Unknown.

       Parameters
       ----------
       adata_sp : AnnData
        Annotated ``AnnData`` object with counts from spatial data
       adata_sc : AnnData
        Annotated ``AnnData`` object with counts scRNAseq data
    
       Returns
       ----------
       pandas DataFrame
    """
    
    df = adata_sp.uns["spots"]  
   
    neg_marker_dict = get_negative_marker_dict(adata_sp, adata_sc)

    celltypes = list(neg_marker_dict.keys())   

    df["in_correct_celltype"] = "Unknown"
    all_neg_markers = set(itertools.chain(*neg_marker_dict.values()))
    df.loc[~df["Gene"].isin(all_neg_markers), "in_correct_celltype"] = "True"  #gene is not a negative marker for any celltype in neg_marker_dict
    
    df.loc[df["celltype"].isin(celltypes), "in_correct_celltype"] = "True"

    for ct in celltypes:
      df.loc[(df["Gene"].isin(neg_marker_dict[ct])) & (df["celltype"] == ct), 'in_correct_celltype'] = "False"      
    
    return df

In [None]:
#test get_neg_mark_read_in_corr_ct
df_exp0 = get_neg_mark_read_in_corr_ct(adata_sp=adata_exp0, adata_sc=adata_Yao)
print(df_exp0['x'].min(), df_exp0['x'].max(), df_exp0['y'].min(), df_exp0['y'].max())

In [None]:
def plot_spots(spots: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, show_ticks: bool = False):
    """Plot gene spots.
     
     Spot is red if entry in 'in_correct_celltype' is False, blue if True, grey if Unknown.

     Parameters
     ----------
     spots : pd.DataFrame
          pd.DataFrame with columns: gene, x, y, celltype, in_correct_celltype
     x_min : int, x_max : int, y_min : int, y_max : int 
          crop coordinates
     image : NDArray
          read from image of dapi stained cell-nuclei
     show_ticks : bool 
          default False, show no ticks or labels
    """

    df = spots
    range = check_crop_exists(x_min,x_max,y_min,y_max,image)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()
    
    
    s_factor =  150000/((x_max-x_min)**2)                  

    plt.axis([x_min, x_max, y_max, y_min])        
    plt.imshow(image,cmap = "binary_r") 

    #filter spots
    df = df.loc[(df['x']>= x_min) & (df['x']<=x_max) & (df['y']>=y_min) & (df['y']<=y_max)]
    
    plt.scatter(df.loc[df['in_correct_celltype']=="Unknown","x"],df.loc[df['in_correct_celltype']=="Unknown","y"], s = 0.3*s_factor, color = "grey", label = "insufficient information")
    plt.scatter(df.loc[df['in_correct_celltype']=="True","x"],df.loc[df['in_correct_celltype']=="True","y"], s = 0.5*s_factor, color = "blue", label = "correctly assigned read")
    plt.scatter(df.loc[df['in_correct_celltype']=="False","x"],df.loc[df['in_correct_celltype']=="False","y"], s = 1*s_factor, color = "red", label = "wrongly assigned read")
    
    lgnd = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), scatterpoints=1, markerscale=2)

    #fix size of legend spots 
    lgnd.legend_handles[0]._sizes = [10]
    lgnd.legend_handles[1]._sizes = [10]
    lgnd.legend_handles[2]._sizes = [10]

    if not show_ticks:
        plt.tick_params(which='both', bottom=False, left=False, labelbottom = False, labelleft = False)
    

In [None]:
image = imread(r"C:\Users\mdichgan\Documents\Helmholtz\send_to_Jakob\tif_images\background.tiff")
plt.imshow(image)

In [None]:
plot_spots(df_exp0, 10000,12000,10000,12000,image,show_ticks=True)

In [None]:
def get_neg_spot_density(spots: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins):
    """Get density array of negative spots.

    Parameters
    ----------
    spots : pd.DataFrame
        pd.DataFrame with columns: gene, x, y, celltype, in_correct_celltype
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    Returns
    -------
    H : array of floats
        density of negative marker reads per bin
    range : range of binning 
    """
    df = spots
    np.seterr(invalid='ignore')

    range = check_crop_exists(x_min,x_max,y_min,y_max,image)

    true_spots = df.loc[df['in_correct_celltype'] == "True"]
    H_t = np.histogram2d(true_spots['x'], true_spots['y'], bins, range)[0].T

    false_spots = df.loc[df['in_correct_celltype'] == "False"]
    H_f = np.histogram2d(false_spots['x'], false_spots['y'], bins, range)[0].T

    H = H_f/(H_t+H_f)       #ignore "Unknown" spots in density calculation, since it could be both True or False
    H[np.isnan(H)] = 0      #no negative marker wrongly assigned where no spots

 
    return H,range
    

In [None]:
A,r = get_neg_spot_density(df_exp0,10000,12500,10000,12500,image=None,bins = 20)
A.dtype

In [None]:
def plot_neg_spot_density(spots: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int,
                       image: np.ndarray, bins, smooth: float = 0, show_ticks: bool = False): 
    """Plot neg spot density.

    Parameters
    ----------
    spots : pd.DataFrame
        pd.DataFrame with columns: gene, x, y, celltype, in_correct_celltype
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """

    matrix, range = get_neg_spot_density(spots,x_min,x_max,y_min,y_max,image,bins)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()
    
    title = "negative spot density"
    matrix_colorbar_plot(matrix, title, x_min, x_max, y_min, y_max, smooth, show_ticks)



In [None]:
plot_neg_spot_density(df_exp0,10000,12500,10000,12500,image=None,bins = 50, smooth=1)
plot_neg_spot_density(df_exp0,10000,12500,10000,12500,image=None,bins = 50, show_ticks=True)

In [None]:
def get_spot_density(spots: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins):
    """Get density of spots that are in celltypes of negative_marker_dict.

    Parameters
    ----------
    spots : pd.DataFrame
        pd.DataFrame with columns: gene, x, y, celltype, in_correct_celltype
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    Returns
    -------
    H : array of floats
        density of spots reads per bin
    range : range of binning 
    """
    df = spots

    range = check_crop_exists(x_min,x_max,y_min,y_max,image)
    
    true_spots = df.loc[df['in_correct_celltype'] != "Unknown"]           #sinnvoll?
    H = np.histogram2d(true_spots['x'], true_spots['y'], bins, range)[0].T

    return H, range


In [None]:
get_spot_density(df_exp0,10000,12500,10000,12500,image=None,bins = 20)


In [None]:
def plot_spot_density(spots: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int,
                      image: np.ndarray, bins, smooth: float = 0, show_ticks: bool = False):
    """Plot spot density.

    Parameters
    ----------
    spots : pd.DataFrame
        pd.DataFrame with columns: gene, x, y, celltype, in_correct_celltype
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """
    matrix, range = get_spot_density(spots,x_min,x_max,y_min,y_max,image,bins)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()
    
    title = "spots density"
    matrix_colorbar_plot(matrix, title, x_min, x_max, y_min, y_max, smooth, show_ticks)

In [None]:
plot_spot_density(df_exp0,2000,10000,2000,10000,image=None,bins = 50, show_ticks=True)

In [None]:
def get_cells_df(spots: pd.DataFrame):
    """Get cell location DataFrame.

    Parameters
    ----------
    spots : pd.DataFrame
        pd.DataFrame with columns: gene, x, y, celltype, in_correct_celltype
    Returns
    -------
        DataFrame with columns: cell, celltype, x, y
    """
    df_cells = spots.loc[spots["in_correct_celltype"]!="Unknown"]      
    df_cells = df_cells.groupby(["cell","celltype"])[["x","y"]].mean()
    df_cells = df_cells.reset_index().rename(columns={'cell':'cell', 'celltype':'celltype'})


    return df_cells

In [None]:
df_cells = get_cells_df(df_exp0)
df_cells

In [None]:
len(df_cells["cell"].unique())
# df_cells.loc[df_cells["cell"]==5.0]

In [None]:
df_t = pd.merge(df_cells,adata_exp0.obs,left_on="cell",right_on="cell_id",how="inner")      
df_t.drop(["celltype_y"],axis=1)
df_t.rename(columns={"celltype_x": "celltype"})

#TODO: assign it to adata_exp0.obs, ?different lengths

In [None]:
df = df_cells  
x_min, x_max, y_max, y_min = 11000, 12000, 12000, 11000

fig, ax = plt.subplots()
ax.axis([x_min, x_max, y_max, y_min])        
ax.imshow(image,cmap = "binary_r") 


#filter spots
df = df.loc[(df['x']>= x_min) & (df['x']<=x_max) & (df['y']>=y_min) & (df['y']<=y_max)]

ax.scatter(df['x'], df['y'], s = 0.6, color = "red")

In [None]:
def get_cell_density(df_cells: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins):
    """Get cell density.

    Parameters
    ----------
    df_cells : pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    Returns
    -------
    H : array of floats
        density of cells per bin
    range : range of binning 
    """
     
    range = check_crop_exists(x_min,x_max,y_min,y_max,image)
    
    H = np.histogram2d(df_cells['x'], df_cells['y'], bins, range)[0].T

    return H, range

In [None]:
get_cell_density(df_cells,10000,12500,10000,12500,image=None,bins = 20)

In [None]:
def plot_cell_density(df_cells: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int,
                      image: np.ndarray, bins, smooth: float = 0, show_ticks: bool = False):
    """Plot cell density.

    Parameters
    ----------
    df_cells : pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """
    matrix, range = get_cell_density(df_cells,x_min,x_max,y_min,y_max,image,bins)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()

    title = "cell density"
    matrix_colorbar_plot(matrix, title, x_min, x_max, y_min, y_max, smooth, show_ticks)

    

In [None]:
plot_cell_density(df_cells,10000,12500,10000,12500,image=None,bins = 40)

In [None]:
def get_celltype_density(df_cells: pd.DataFrame, celltype: str, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins):
    """Get celltype density.

    Parameters
    ----------
    df_cells : pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    celltype : str
        celltype
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    Returns
    -------
    H : array of floats
        density of celltype per bin
    range : range of binning 
    """
    
    df = df_cells 
    range = check_crop_exists(x_min,x_max,y_min,y_max,image)
    
    H_total = np.histogram2d(df['x'],df['y'], bins, range)[0]

    df = df.loc[df["celltype"]==celltype]

    H_celltype = np.histogram2d(df['x'],df['y'], bins, range)[0]

    H = H_celltype/H_total
    H[np.isnan(H)] = 0     #0 cells therefore also 0% celltype in the respective area
    H = H.T

    return H, range

In [None]:
A = get_celltype_density(df_cells,"CA3", 10000,12500,10000,12500,image=None,bins = [10,20])[0]
A.shape


In [None]:
def plot_celltype_density(df_cells: pd.DataFrame, celltype: str, x_min: int, x_max: int, y_min: int, y_max: int,
                      image: np.ndarray, bins, smooth: float = 0, show_ticks: bool = False):
    """Plot cell density

    Parameters
    ----------
    df_cells: pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    celltype : str
        celltype
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : int or array_like or [int, int] or [array, array]
        The bin specification:
        If int, the number of bins for the two dimensions (nx=ny=bins).
        If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins).
        If [int, int], the number of bins in each dimension (nx, ny = bins).
        If [array, array], the bin edges in each dimension (x_edges, y_edges = bins).
        A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges.
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """
    matrix, range = get_celltype_density(df_cells,celltype,x_min,x_max,y_min,y_max,image,bins)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()

    title = "celltype density"
    matrix_colorbar_plot(matrix, title, x_min, x_max, y_min, y_max, smooth, show_ticks)

    

In [None]:
plot_celltype_density(df_cells,"CA3", 10000,12500,10000,12500,image=None,bins = 20)
#komplettes bild
#subplots, liste an celltypes

In [None]:
a = np.zeros((2,2,3))
a[0,...] = np.array([[1.0,1,1],[1,1,1]])
a[1,...] = np.array([[0,1,0],[2,0,3]])
print(a)
b = np.max(a, axis = 0)
print(b)
c = np.argmax(a,axis=0)
print(c)
print(np.dtype(c[1,0]))
z = ['a','b']

result = np.take(z,c)
print(result)

In [None]:
def get_number_of_celltypes(df_cells: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins):
    """Get number of celltypes
    
    Parameters
    ---------
    df_cells : pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : [int,int]
        the number of bins in each dimension
    Returns
    -------
    H : array of floats
        number of celltypes per bin
    range : range of binning 
    """
    
    #? celltypes auswählen
    #wie bins: [int,int]
    
    celltypes = df_cells["celltype"].unique()  
    A = np.zeros((len(celltypes),bins[0],bins[1]))   #order of axis?

    for i in range(len(celltypes)):
        A[i,...] = get_celltype_density(df_cells, celltypes[i],x_min,x_max,y_min,y_max,image,bins)[0]
    A = sum(A>0)
    range_ = check_crop_exists(x_min,x_max,y_min,y_max,image)
    return A, range_

In [None]:
get_number_of_celltypes(df_cells,10000,12500,10000,12500,image=None,bins = [20,20])[0]

In [409]:
def plot_get_number_of_celltypes(df_cells: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins, smooth: float = 0, show_ticks: bool = False):    
    """Plot number of celltypes
    
    Parameters
    ----------
    df_cells: pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : [int,int]
        the number of bins in each dimension
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """
    matrix, range = get_number_of_celltypes(df_cells,x_min,x_max,y_min,y_max,image,bins)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()
    
    title = "number of celltypes"
    matrix_colorbar_plot(matrix, title, x_min, x_max, y_min, y_max, smooth, show_ticks)

In [None]:
plot_get_number_of_celltypes(df_cells,10000,12500,10000,12500,image=None,bins = [40,40])

In [None]:
def get_major_celltype_perc(df_cells: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins):
    """Get major celltype percentage.
    
    Parameters
    ---------
    df_cells : pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : [int,int]
        the number of bins in each dimension
    Returns
    -------
    H : array of floats
        major celltype percentage
    range : range of binning 
    """
    celltypes = df_cells["celltype"].unique()
    A = np.zeros((len(celltypes),bins[0],bins[1]))   #order of axis?
    # B = np.zeros((2,bins[0],bins[1]))
    
    for i in range(len(celltypes)):
        A[i,...] = get_celltype_density(df_cells, celltypes[i],x_min,x_max,y_min,y_max,image,bins)[0]
    B = np.max(A,axis=0)
    range_ = get_celltype_density(df_cells, celltypes[0],x_min,x_max,y_min,y_max,image,bins)[1]
    # B[1,...] = np.argmax(A,axis = 0)
    # B[1,...] = np.take(celltypes,B[1,...])

    return B, range_

In [None]:
get_major_celltype_perc(df_cells,10000,12500,10000,12500,image=None,bins = [20,20])[1]

In [None]:
def plot_get_major_celltype_perc(df_cells: pd.DataFrame, x_min: int, x_max: int, y_min: int, y_max: int, image: np.ndarray, bins, smooth: float = 0, show_ticks: bool = False):    
    """Plot major celltype percentage.

    Parameters
    ----------
    df_cells: pd.DataFrame
        pd.DataFrame with columns: cell, celltype, x, y
    x_min : int, x_max : int, y_min : int, y_max : int 
        crop coordinates
    image : NDArray
        read from image of dapi stained cell-nuclei
    bins : [int,int]
        the number of bins in each dimension
    smooth : float = 0
        sigma parameter of scipy.ndimage.gaussian_filter function
    show_ticks : bool 
        default False, show no ticks or labels
    """
    
    matrix, range = get_major_celltype_perc(df_cells,x_min,x_max,y_min,y_max,image,bins)
    x_min, x_max, y_min, y_max = np.ravel(range).tolist()

    title = "major celltype percentage"
    matrix_colorbar_plot(matrix, title, x_min, x_max, y_min, y_max, smooth, show_ticks)

    

In [None]:
plot_get_major_celltype_perc(df_cells,10000,12500,10000,12500,image=None,bins = [40,40])

In [410]:
def get_correlate_mats(mat_1: np.ndarray, mat_2: np.ndarray):
    """Compute the Pearson correlation coefficients between flattened mat_1 and mat_2.
       
    Parameters
    ----------
    mat_1: np.ndarray
        first matrix
    mat_2: np.ndarray
        second matrix
    Returns
    -------
    corr_coeff : float   
    """
    mat_1 = mat_1.flatten()
    mat_2 = mat_2.flatten()

    corr_coeff = np.corrcoef(mat_1,mat_2)[0,1]
    
    return corr_coeff

In [None]:
A = get_neg_spot_density(df_exp0,10000,12500,10000,12500,image=None,bins = 20)[0]
B = get_cell_density(df_cells,10000,12500,10000,12500,image=None,bins = 20)[0]
get_correlate_mats(A,B)

In [411]:
A = get_cell_density(df_cells,10000,12500,10000,12500,image=None,bins = 20)[0]
B = get_number_of_celltypes(df_cells,10000,12500,10000,12500,image=None,bins = [20,20])[0]
get_correlate_mats(A,B)

0.850739572055364