In [None]:
import os
import argparse
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import squidpy as sq
from anndata import AnnData
import anndata

basedir = "/scratch/wangm10/RU1124/"
adata = anndata.read_h5ad(os.path.join(basedir, 'RU1124_celltypist.h5ad'))

fig, axs = plt.subplots(1, 4, figsize=(15, 4))

axs[0].set_title("Total transcripts per cell")
sns.histplot(
    adata.obs["total_counts"],
    kde=False,
    ax=axs[0],
)

axs[1].set_title("Unique transcripts per cell")
sns.histplot(
    adata.obs["n_genes_by_counts"],
    kde=False,
    ax=axs[1],
)

axs[2].set_title("Nucleus area")
sns.histplot(
    adata.obs["nucleus_area"],
    kde=False,
    ax=axs[2],
)

sns[3].histplot(
    adata.obs['conf_score'],
    kde=False
)

# celltypist labels
top_five_labels = adata.obs['predicted_labels'].value_counts().nlargest(7).index
filtered_adata = adata[adata.obs['predicted_labels'].isin(top_five_labels)].copy()
filtered_adata.obs['predicted_labels'] = filtered_adata.obs['predicted_labels'].astype(str)
sc.pl.umap(
    filtered_adata,
    color=['leiden', "predicted_labels"],
    wspace=0.4, 
    palette = 'Set1'
)