In [47]:
import anndata as ad
from pathlib import Path
from scETM import evaluate
import scanpy as sc
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.metrics import pairwise_distances
sc.set_figure_params(fontsize=10, figsize=(10, 10), dpi=120, dpi_save=250)

In [3]:
root = Path('../results/transfer')

In [9]:
for fpath in root.glob("*.h5ad"):
    adata = ad.read_h5ad(fpath)
    for name in ('delta', 'theta'):
        print(f'{fpath.name}_{name}')
        evaluate(adata, name, resolutions=[0.01, 0.02, 0.03, 0.045, 0.067, 0.10, 0.15, 0.23, 0.34], plot_dir=str(root), plot_fname=f'{fpath.name}_{name}')

9] INFO - scETM.eval_utils: Resolution:  0.02	ARI:  0.4125	NMI:  0.5040	bARI:  0.1142	# labels: 2
[2021-04-30 21:40:44,043] INFO - scETM.eval_utils: Resolution:  0.03	ARI:  0.7999	NMI:  0.7571	bARI:  0.0670	# labels: 4
[2021-04-30 21:40:44,260] INFO - scETM.eval_utils: Resolution: 0.045	ARI:  0.8707	NMI:  0.8288	bARI:  0.0647	# labels: 5
[2021-04-30 21:40:44,466] INFO - scETM.eval_utils: Resolution: 0.067	ARI:  0.8751	NMI:  0.8319	bARI:  0.0659	# labels: 5
[2021-04-30 21:40:44,641] INFO - scETM.eval_utils: Resolution:   0.1	ARI:  0.9358	NMI:  0.8805	bARI:  0.0548	# labels: 6
[2021-04-30 21:40:44,834] INFO - scETM.eval_utils: Resolution:  0.15	ARI:  0.9372	NMI:  0.8819	bARI:  0.0550	# labels: 6
[2021-04-30 21:40:45,013] INFO - scETM.eval_utils: Resolution:  0.23	ARI:  0.9372	NMI:  0.8819	bARI:  0.0550	# labels: 6
[2021-04-30 21:40:45,225] INFO - scETM.eval_utils: Resolution:  0.34	ARI:  0.8589	NMI:  0.8543	bARI:  0.0706	# labels: 9
[2021-04-30 21:40:45,226] INFO - scETM.eval_utils: Calc

In [103]:
def silhouette(intra, inter):
    intra_mean, inter_mean = intra.mean(), inter.mean()
    max_mean = max(intra_mean, inter_mean)
    return (inter_mean - intra_mean) / max_mean

def get_silhouette(adata, name1, name2):
    if isinstance(name1, str):
        name1 = [name1]
    if isinstance(name2, str):
        name2 = [name2]

    mask1, mask2 = adata.obs.cell_types.apply(lambda t: t in name1), adata.obs.cell_types.apply(lambda t: t in name2)
    n1 = mask1.sum()
    sliced_adata = adata[adata.obs_names[mask1].append(adata.obs_names[mask2])]

    result = dict()
    for mat, name in ((sliced_adata.X, 'X'), (sliced_adata.obsm['delta'], 'delta'), (sliced_adata.obsm['theta'], 'theta')):
        d = pairwise_distances(mat)
        d11, d22, d12, d21 = d[:n1, :n1], d[n1:, n1:], d[:n1, n1:], d[n1:, :n1]
        sh1 = silhouette(d11, d12).mean()
        sh2 = silhouette(d22, d21).mean()
        print(f'{name:5s} sh: {", ".join(name1)}: {sh1:8.5f}; {", ".join(name2)}: {sh2:8.5f}')
        result[f'{name};{", ".join(name1)}'], result[f'{name};{", ".join(name2)}'] = sh1, sh2
    return pd.DataFrame(result.values(), index=result.keys())

B_macro, T_macro = defaultdict(list), defaultdict(list)
for fpath in root.glob("*.h5ad"):
    print(f'===== Reading {fpath} =====')
    adata = ad.read_h5ad(fpath)
    beta_mask = adata.obs.cell_types == 'beta'

    # mean & std of the norm of pancreatic beta cells
    for mat, name in ((adata.X, 'X'), (adata.obsm['delta'], 'delta'), (adata.obsm['theta'], 'theta')):
        norm_beta_cells = np.linalg.norm(mat[beta_mask, :], axis=1)
        print(f'{name:5s} norm_beta_cells\tmean:{np.mean(norm_beta_cells):11.4g}\tstd:{np.std(norm_beta_cells):11.4g}')

    trained_dataset = fpath.name.split('_')[0]
    B_macro[trained_dataset].append(get_silhouette(adata, 'B_cell', 'macrophage'))
    T_macro[trained_dataset].append(get_silhouette(adata, 'T_cell', 'macrophage'))

===== Reading ..\results\transfer\FACSWithoutPancreas_MP_scETM_seed1_04_23-01_09_00.h5ad =====
X     norm_beta_cells	mean:       1560	std:       1040
delta norm_beta_cells	mean:      18.16	std:      4.557
theta norm_beta_cells	mean:     0.4037	std:    0.06401
X     sh: B_cell:  0.53528; macrophage:  0.20938
delta sh: B_cell:  0.41528; macrophage:  0.57005
theta sh: B_cell:  0.32665; macrophage:  0.62917
X     sh: T_cell:  0.69233; macrophage:  0.18929
delta sh: T_cell:  0.36717; macrophage:  0.61127
theta sh: T_cell:  0.39905; macrophage:  0.67471
===== Reading ..\results\transfer\FACSWithoutPancreas_MP_scETM_seed2_04_22-13_13_43.h5ad =====
X     norm_beta_cells	mean:       1560	std:       1040
delta norm_beta_cells	mean:       12.8	std:      3.315
theta norm_beta_cells	mean:     0.4044	std:     0.1702
X     sh: B_cell:  0.53528; macrophage:  0.20938
delta sh: B_cell:  0.48809; macrophage:  0.57373
theta sh: B_cell:  0.53028; macrophage:  0.59462
X     sh: T_cell:  0.69233; macrophage:

In [137]:
for entries in (B_macro, T_macro):
    print('\n =====')
    for name, dfs in entries.items():
        df = pd.concat(dfs, axis=1).T
        print(name)
        df = df.describe().iloc[1:3, :].T
        df.index = df.index.map(lambda x: tuple(x.split(';')))
        print(df.unstack(1))


 =====
FACSWithoutPancreas
           mean                  std           
         B_cell macrophage    B_cell macrophage
X      0.535284   0.209381  0.000000   0.000000
delta  0.472786   0.592375  0.051582   0.035532
theta  0.446744   0.606953  0.106624   0.019278
FACS
           mean                  std           
         B_cell macrophage    B_cell macrophage
X      0.535284   0.209381  0.000000   0.000000
delta  0.522926   0.615144  0.033135   0.004251
theta  0.588698   0.655189  0.070755   0.024844
MousePancreas
           mean                  std           
         B_cell macrophage    B_cell macrophage
X      0.535284   0.209381  0.000000   0.000000
delta  0.101949   0.518422  0.052256   0.059006
theta  0.045195   0.740718  0.029888   0.105129

 =====
FACSWithoutPancreas
           mean                  std           
         T_cell macrophage    T_cell macrophage
X      0.692327   0.189289  0.000000   0.000000
delta  0.442187   0.624283  0.066026   0.023761
theta  0.4790