In [21]:
import scanpy as sc
import scvi
from pathlib import Path
import matplotlib.pyplot as plt

In [4]:
DATA = Path("../data")
MODEL = Path("../model")

In [None]:
vae = scvi.model.SCVI.load(MODEL / 'trained2.model')
adata = vae.adata

In [7]:
adata_dnt = adata[
    (adata.obs.cell_type == "aDNT") | 
    (adata.obs.cell_type == "nDNT")]

In [None]:
adata_dnt = adata_dnt.copy()
scvi.model.SCVI.setup_anndata(
    adata_dnt,
    layer='counts',
    batch_key='batch',
    continuous_covariate_keys=['pct_counts_mt'])
vae_dnt = scvi.model.SCVI(adata_dnt)
vae_dnt.train()

In [None]:
scvi.model.SCVI.save(vae_dnt,MODEL / "vae_dnt",save_anndata=True,overwrite=True)

In [None]:
vae_dnt = scvi.model.SCVI.load(MODEL / 'vae_dnt')

In [9]:
adata_dnt = vae_dnt.adata

In [10]:
adata_dnt.obsm['X_scVi'] = vae_dnt.get_latent_representation()
adata_dnt.obsm['X_norm'] = vae_dnt.get_normalized_expression()

In [None]:
sc.pp.neighbors(adata_dnt,use_rep='X_scVi')

In [12]:
sc.tl.leiden(adata_dnt,resolution=0.1)
sc.tl.umap(adata_dnt)

In [None]:
sc.pl.umap(adata_dnt, color=['cell_type'])

In [None]:
de_dnt=vae_dnt.differential_expression(
    groupby='cell_type',
    group1=["aDNT"],
    group2="nDNT",
    batch_correction = True,
    batchid1 = ['b1','b2'])

In [15]:
df_markers = de_dnt[
    (de_dnt['proba_de'] > 0.95) &
    (de_dnt['bayes_factor'] > 1) &
    (de_dnt['lfc_mean'] < 0)
    ]

In [16]:
markers = {}
markers["nDNT"] = df_markers.index.tolist()[:10]

In [None]:
de_dnt2=vae_dnt.differential_expression(
    groupby='cell_type',
    group1=["nDNT"],
    group2="aDNT",
    batch_correction = True,
    batchid1 = ['b1','b2'])

In [18]:
df_markers2 = de_dnt2[
    (de_dnt2['proba_de'] > 0.95) &
    (de_dnt2['bayes_factor'] > 1) &
    (de_dnt2['lfc_mean'] < 0)
    ]

In [19]:
markers["aDNT"] = df_markers2.index.tolist()[:10]

In [20]:
def to_curly(gene: str) -> str:
    curly_gene = ("$" + gene + "$")
    return curly_gene

symbols = []
for name in markers:
    symbols.extend(list(map(to_curly,markers[name])))

In [None]:
ax = sc.pl.dotplot(
    adata_dnt,
    markers,
    groupby='cell_type',
    use_raw=True,
    standard_scale='var',
    show=False)

ax['mainplot_ax'].set_xticklabels(symbols)
plt.show()

In [23]:
adata_ndnt = adata[adata.obs['cell_type'] == 'nDNT']

In [None]:
adata_ndnt = adata_ndnt.copy()
scvi.model.SCVI.setup_anndata(
    adata_ndnt,
    layer='counts',
    batch_key='batch',
    continuous_covariate_keys=['pct_counts_mt']
    )
vae_ndnt = scvi.model.SCVI(adata_ndnt)
vae_ndnt.train()

In [49]:
adata_ndnt.obsm['X_scVi'] = vae_ndnt.get_latent_representation()
adata_ndnt.obsm['X_norm'] = vae_ndnt.get_normalized_expression()

In [61]:
scvi.model.SCVI.save(vae_ndnt,MODEL / 'vae_ndnt',overwrite=True,save_anndata=True)

In [None]:
vae_ndnt = scvi.model.SCVI.load(MODEL / 'vae_ndnt')
adata_ndnt = vae_ndnt.adata

In [25]:
adata_ndnt.obsm['X_scVi'] = vae_ndnt.get_latent_representation()
adata_ndnt.obsm['X_norm'] = vae_ndnt.get_normalized_expression()

In [26]:
sc.pp.neighbors(adata_ndnt, use_rep='X_scVi')

In [27]:
sc.tl.leiden(adata_ndnt,resolution=0.1)
sc.tl.umap(adata_ndnt,min_dist=0.3)

In [None]:
sc.pl.umap(adata_ndnt, color=['leiden'])

In [None]:
de_ndnt = vae_ndnt.differential_expression(
    groupby='leiden'
)

In [31]:
ndnt_markers = de_ndnt[
    (de_ndnt['proba_de'] > 0.95) &
    (de_ndnt['bayes_factor'] > 1) &
    (de_ndnt['lfc_mean'] > 1)
]

In [33]:
markers_group = ndnt_markers.groupby('comparison')

In [34]:
markers2 = {}
for name in markers_group.groups.keys():
    markers2[name] = markers_group.get_group(name).index.tolist()[:10]

In [35]:
symbols = []
for name in markers2:
    symbols.extend(list(map(to_curly,markers2[name])))

In [None]:
ax = sc.pl.dotplot(
    adata_ndnt,
    markers2,
    groupby='leiden',
    use_raw=True,
    standard_scale='var',
    show=False
    )
ax['mainplot_ax'].set_xticklabels(symbols)
plt.show()

In [37]:
adata_adnt = adata[adata.obs['cell_type'] == 'aDNT']

In [None]:
adata_adnt = adata_adnt.copy()
scvi.model.SCVI.setup_anndata(
    adata_adnt,
    layer='counts',
    batch_key='batch',
    continuous_covariate_keys=['pct_counts_mt']
)
vae_adnt = scvi.model.SCVI(adata_adnt)
vae_adnt.train()

In [69]:
scvi.model.SCVI.save(vae_adnt,MODEL / 'vae_adnt',overwrite=True,save_anndata=True)

In [None]:
vae_adnt = scvi.model.SCVI.load(MODEL / 'vae_adnt')
adata_adnt = vae_adnt.adata

In [39]:
adata_adnt.obsm['X_scVi'] = vae_adnt.get_latent_representation()
adata_adnt.obsm['X_normalized'] = vae_adnt.get_normalized_expression()

In [40]:
sc.pp.neighbors(adata_adnt, use_rep='X_scVi')

In [41]:
sc.tl.leiden(adata_adnt, resolution=0.1)
sc.tl.umap(adata_adnt)

In [None]:
sc.pl.umap(adata_adnt, color=['leiden'])
plt.show()

In [None]:
de_adnt = vae_adnt.differential_expression(groupby='leiden')

In [45]:
de_adnt = de_adnt[
    (de_adnt['proba_de'] > 0.95) &
    (de_adnt['bayes_factor'] > 1) &
    (de_adnt['lfc_mean'] > 1)
]

In [46]:
adnt_group = de_adnt.groupby('comparison')

In [47]:
adnt_markers = {name: adnt_group.get_group(name).index.tolist()[:10] for name in adnt_group.groups.keys()}

In [49]:
symbols = []
for name in adnt_markers:
    symbols.extend(list(map(to_curly,adnt_markers[name])))

In [None]:
ax = sc.pl.dotplot(
    adata_adnt,
    adnt_markers,
    groupby='leiden',
    use_raw=True,
    standard_scale='var',
    show=False)

ax['mainplot_ax'].set_xticklabels(symbols)
plt.show()