# Muon analysis for CD4 T cells

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import muon as mu
import muon.atac as ac
import muon.prot as pt
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import median_abs_deviation
import sc_toolbox as sct

In [None]:
print(sns.__version__)

In [None]:
import matplotlib
plt.rcParams['figure.dpi'] = 100
matplotlib.rcParams['pdf.fonttype'] = 42

In [None]:
# define working path
data_path = '/home/jupyter/data/preRA_teaseq/EXP-00243/totalVI/'
fig_path = '/home/jupyter/data/preRA_teaseq/EXP-00243/totalVI/figures/'
output_path = '/home/jupyter/data/preRA_teaseq/EXP-00243/totalVI/output_results/'
# os.mkdir(fig_path)
# os.mkdir(output_path)
# define a project name
proj_name = 'PreRA_TEASeq_CD4T_'

sc.settings.figdir = fig_path
sc.settings.autosave = False
sc.set_figure_params(vector_friendly=True, dpi_save=300)

In [None]:
npg_color = ["#E64B35FF", "#4DBBD5FF", "#00A087FF", "#3C5488FF", "#F39B7FFF",
             "#8491B4FF", "#91D1C2FF", "#DC0000FF", "#7E6148FF", "#B09C85FF"]
cluster_colors = ["#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", "#B17BA6", "#FF7F00", "#FDB462", "#E7298A",
                  "#E78AC3", "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999",
                  "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00"]
ari_colors = ['#5AAA46', '#F59F00']

# load data

In [None]:
# load the three modalities data with motif
cd4_mdata = mu.read(data_path+'PreRA_teaseq_cd4_3modality.h5mu')

In [None]:
cd4_mdata

In [None]:
# update the aifi labels with majority voting
tea_aifi_labels = pd.read_csv('/home/jupyter/data/preRA_teaseq/EXP-00243/totalVI/output_results/PreRA_TEASeq_aifi_labels.csv',
                              index_col=0)
tea_aifi_labels.head()

In [None]:
# remove the orinigal labels
labels_cols = ['immunehealth_l1', 'immunehealth_l1_conf_score', 'immunehealth_l2',
               'immunehealth_l2_conf_score',	'immunehealth_l3',	'immunehealth_l3_conf_score']
cd4_mdata.obs = cd4_mdata.obs.drop(columns=labels_cols)

In [None]:
# add back the updated labels
cd4_mdata.obs = cd4_mdata.obs.merge(
    tea_aifi_labels, how='left', left_index=True, right_index=True)

In [None]:
cd4_mdata.update()

# rerun umap for each modality

## process rna

In [None]:
rna = cd4_mdata['rna']

In [None]:
# redo the nromalization

rna.X = rna.layers['counts'].copy()
sc.pp.normalize_total(rna)
sc.pp.log1p(rna)

In [None]:
# rna.layers['counts'][1:25, 1:25].toarray()

In [None]:
# # save log-normalised counts in a .raw slot:
rna.raw = rna
# # scale
sc.pp.scale(rna, max_value=10)

In [None]:
p1 = sns.histplot(rna.obs["total_counts"], bins=100, kde=False)

In [None]:
rna

In [None]:
# %%time
# sc.pp.highly_variable_genes(rna, min_mean=0.0125, max_mean=3, min_disp=0.5)

In [None]:
sc.pl.highly_variable_genes(rna)

In [None]:
# # run umap in all modality
# for m in cd4_mdata.mod.keys():
#     sc.tl.pca(cd4_mdata[m], svd_solver='arpack')
#     sc.pp.neighbors(cd4_mdata[m])
#     sc.tl.umap(cd4_mdata[m], random_state=1)

In [None]:
# redo the PCA
sc.tl.pca(rna, svd_solver='arpack')
sc.pl.pca_variance_ratio(rna, log=True)

In [None]:
sc.pp.neighbors(rna)
sc.tl.umap(rna)

In [None]:
# run cluters and umap
sc.tl.leiden(rna, resolution=1)

In [None]:
sc.tl.leiden(rna, resolution=0.5, restrict_to=[
             'leiden', ['3']], key_added='leiden_c3')

In [None]:
cd4_mdata.update()

In [None]:
cd4_mdata

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="rna:X_umap", legend_fontsize=6,
                color=["clean_l2_cell_types", 'immunehealth_l3'],
                save=proj_name+'cd4_rnaumap.pdf')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="rna:X_umap", color=['immunehealth_l3'])

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="rna:X_umap", color=[
                'rna:leiden_c3'], legend_loc="on data")

## process ADT

In [None]:
# redo the nromalization
adt = cd4_mdata['prot']
adt

In [None]:
# remove isotype in the adt analysis
adt = adt[:, adt.var[adt.var['highly_variable']].index].copy()

In [None]:
adt

In [None]:
cd4_mdata.update()

In [None]:
adt.var

In [None]:
# run pca
sc.tl.pca(adt)
sc.pl.pca_variance_ratio(adt, log=True)

In [None]:
# clustering umap in protein
sc.pp.neighbors(adt)
sc.tl.umap(adt)

In [None]:
cd4_mdata.update()

In [None]:
cd4_mdata

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="prot:X_umap",
                color=["clean_l2_cell_types",
                       'immunehealth_l2',  'immunehealth_l3'],
                save=proj_name+'cd4_adtumap.png',
                legend_loc="on data")

## process atac

In [None]:
# redo the nromalization
atac = cd4_mdata['atac']
atac

In [None]:
sc.pp.neighbors(atac, use_rep='X_LSI')
sc.tl.umap(atac)

In [None]:
# run cluters and umap
sc.tl.leiden(atac, resolution=0.8)

In [None]:
cd4_mdata.update()

In [None]:
atac

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=["clean_l2_cell_types", 'immunehealth_l2',
                       'immunehealth_l3', 'atac:leiden'],
                save=proj_name + 'cd4_ArchRumap.png',
                legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=["clean_l2_cell_types"],
                save=proj_name+'cd4_ArchRumap_clean_l2_cell_types.png')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=["Clusters"],
                save=proj_name+'cd4_ArchRumap_Clusters.png')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=["clean_l2_cell_types", 'immunehealth_l3', 'cohort',
                       'atac:leiden'], legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['cohort'], legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", color=['immunehealth_l3'])

## run 3wnn

In [None]:
cd4_mdata

In [None]:
mu.pp.neighbors(cd4_mdata, key_added='wnn')

In [None]:
mu.tl.umap(cd4_mdata, neighbors_key='wnn', random_state=10)

In [None]:
sc.tl.leiden(cd4_mdata, resolution=0.8, neighbors_key='wnn',
             key_added='leiden_wnn_0.8')

In [None]:
mu.pl.umap(cd4_mdata, color=['rna:mod_weight',
           'prot:mod_weight', 'atac:mod_weight'], cmap='RdBu')

In [None]:
mu.pl.umap(cd4_mdata, color=['rna:IL21'], save=proj_name+'IL21rna_umap.pdf')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=["clean_l2_cell_types"], legend_loc="none",
                save=proj_name+'clean_l2_cell_types_Archr_umap.pdf')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="rna:X_umap", title='rna_umap', legend_loc=None,
                color=['immunehealth_l3'],
                save=proj_name+'immunehealth_l3_rnaumap.png')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="prot:X_umap", title='adt_umap', legend_loc=None,
                color=['immunehealth_l3'],
                save=proj_name+'immunehealth_l3_adtUmap.png')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", title='ATAC_umap', legend_loc=None,
                color=['immunehealth_l3'],
                save=proj_name+'immunehealth_l3_ATACUmap.png')

In [None]:
cd4_mdata

In [None]:
mu.pl.umap(cd4_mdata, color=["immunehealth_l2"],
           # palette=cluster_colors,
           save=proj_name + 'immunehealth_l2_3wnn_umap.pdf')

In [None]:
mu.pl.umap(cd4_mdata, color=["leiden_wnn"], legend_loc='on data',
           # palette=cluster_colors,
           save=proj_name + 'leiden_3wnn_umap.pdf')

### label cd4 t cells by clusters

In [None]:
cd4_mdata.obs['3wnn_labels'] = cd4_mdata.obs['leiden_wnn'].astype('int')
cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin(
    [0, 1, 2, 5, 12]), '3wnn_labels'] = 'CD4 Naive'
cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin(
    [8, 15]), '3wnn_labels'] = 'Treg'
cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin(
    [11]), '3wnn_labels'] = 'CD4 CTL'
cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin(
    [3, 6, 9, 10, 14, 4, 7, 13]), '3wnn_labels'] = 'CD4 Memory'
cd4_mdata.obs['3wnn_labels'] = cd4_mdata.obs['3wnn_labels'].astype('str')

In [None]:
mu.pl.umap(cd4_mdata, color=["3wnn_labels"], legend_loc='on data',
           palette=npg_color,
           save=proj_name + '3wnn_labels_types_3wnn_umap.pdf')

In [None]:
cd4_mdata.obs['cohort']

In [None]:
cd4_mdata.obs['status'] = cd4_mdata.obs['cohort'].astype('str')
cd4_mdata.obs.loc[cd4_mdata.obs['status'] == 'Healthy', 'status'] = 'CON2'
cd4_mdata.obs.loc[cd4_mdata.obs['status'] == 'pre-RA', 'status'] = 'ARI'

In [None]:
p1 = mu.pl.umap(cd4_mdata, color=["3wnn_labels"], legend_loc='on data',
                palette=npg_color, show=False)
p1.set_xlabel("WNN UMAP1")
p1.set_ylabel("WNN UMAP2")
p1.set_title('Cell types')
p1.figure.savefig(fig_path+proj_name + '3wnn_labels_3wnn_umap.pdf')

In [None]:
cd4_mdata

In [None]:
atac.obsm['X_archrumap'] = atac.obsm['X_ArchRumap']
atac.obs['status'] = cd4_mdata.obs['status']

In [None]:
sc.tl.embedding_density(atac, basis='ArchRumap', groupby='status')

In [None]:
sc.pl.embedding_density(
    atac, basis='archrumap', key='archrumap_density_status',
    save=proj_name + 'archrumap_status_density.pdf'
)

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", palette=ari_colors,
                color=['status'])

In [None]:
rna.obs['status'] = cd4_mdata.obs['status']

In [None]:
# plot IL21
with plt.rc_context({"figure.figsize": (2, 1), "figure.dpi": (300)}):
    sc.pl.violin(rna[cd4_mdata.obs['clean_l2_cell_types'] == 'cd4_naive'],
                 'IL21', groupby='status', size=0, save=proj_name+'cd4na_IL21_rna.pdf')

In [None]:
rna.obs['3wnn_labels'] = cd4_mdata.obs['3wnn_labels']

In [None]:
cd4_mdata.var_names

In [None]:
# plot IL21
# sc.pl.violin(cd4_mdata, 'rna:IL21', groupby='3wnn_labels', size=0, save=proj_name+'cd4na_IL21_rna.pdf')

In [None]:
rna.obs['status']

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 1
nrows = 2
figsize = 4
# wspace = 0.5
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize, nrows * figsize + figsize * wspace),
)

mu.pl.umap(cd4_mdata, color=["clean_l2_cell_types"],
           frameon=True, ax=axs[0], outline_width=0, legend_loc=None, show=False)
axs[0].set_xlabel("")
axs[0].set_ylabel("")
axs[0].set_title('cell types')
axs[0].figure.savefig(fig_path+proj_name + 'clean_l2_cell_types_3wnn_umap.pdf')
mu.pl.umap(cd4_mdata, color=["status"], frameon=True, ax=axs[1],
           outline_width=0, show=False)
axs[1].set_xlabel("WNN UMAP1")
axs[1].set_ylabel("WNN UMAP2")
axs[1].figure.savefig(fig_path+proj_name + 'status_3wnn_umap.pdf')

fig.savefig(fig_path+proj_name + 'clean_l2_cell_types_status_3wnn_umap.pdf')

In [None]:
mu.pl.umap(cd4_mdata, color=["prot:CD62L"])

In [None]:
mu.pl.umap(cd4_mdata, color=[
    'immunehealth_l3'],
    save=proj_name + 'immunehealth_l3_3wnn_umap.png')

In [None]:
cd4_mdata

In [None]:
mu.pl.umap(cd4_mdata, legend_loc="on data",
           color=['3wnn_labels', 'immunehealth_l3', 'leiden_wnn',
                  'prot:CD49d', 'prot:CD25', 'prot:CD185', 'prot:CD279',
                  'rna:FAS', 'rna:FOXP3', 'rna:IL2RA'],
           vmin='p1', vmax='p99',
           save=proj_name + 'wsnn_res.0.8_3wnn_umap.pdf')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", vmin='p1', vmax='p99',
                color=["clean_l2_cell_types", 'prot:CD49d',
                       'prot:CD25', 'rna:FAS', 'rna:FOXP3', 'rna:IL2RA'],
                save=proj_name + 'cd4_ArchRumap_markers.png',
                legend_loc="on data")

In [None]:
cd4_mdata

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", vmin='p5', vmax='p95',
                color=['prot:CD5', 'rna:NR4A1'],
                save=proj_name + 'cd4_ArchRumap_cd5_markers.png',
                legend_loc="on data")

In [None]:
cd4_mdata['prot'].obs['Archr_Clusters_0_8'] = cd4_mdata.obs['Archr_Clusters_0_8']

In [None]:
sc.pl.violin(cd4_mdata['prot'], 'CD5', groupby='Archr_Clusters_0_8')

## analyze archr clusters

In [None]:
rna = cd4_mdata['rna']
adt = cd4_mdata['prot']

In [None]:
# load cluster identity in archr and load in mudata
archr_cluster = pd.read_csv(
    '/home/jupyter/data/preRA_teaseq/output_results/cd4_t/atac/PreRA_teaseq_cd4_t_atac_Clusters_0.8_cluster_barcodes.csv', index_col=0)
archr_cluster

In [None]:
cd4_mdata.obs = cd4_mdata.obs.merge(
    archr_cluster, how='left', left_index=True, right_index=True)

In [None]:
cd4_mdata

In [None]:
cd4_mdata.obs['atac_cluster'] = cd4_mdata.obs['Archr_Clusters_0_8'].str.replace(
    'C', 'T').copy()

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", save=proj_name+'cohort_umap.png',
                color=['Archr_Clusters_0_8', 'l2_cell_types'], legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", save=proj_name+'cohort_umap.png',
                color=['cohort', 'Archr_Clusters_0_8', 'l2_cell_types'], legend_loc="on data")

In [None]:
tcluster_colors = ['#8B97A7', "#9E5780", "#E79CA3",
                   "#542654", "#8cd4b7",
                   "#AB9736", "#28594d",  "#B34205", "#1a7585", "#e9616d", "#85d2ff", "#0f2a43"]

In [None]:
cd4_mdata.obs['atac_cluster'] = cd4_mdata.obs['Archr_Clusters_0_8'].str.replace(
    'C', '').copy()
# cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin(['C2', 'C0','C0','C0',]), '3wnn_labels'] = 'CD4 Naive'
# cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin([8, 15]), '3wnn_labels'] = 'Treg'
# cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin([11]), '3wnn_labels'] = 'CD4 CTL'
# cd4_mdata.obs.loc[cd4_mdata.obs['3wnn_labels'].isin([3, 6, 9,10, 14, 4, 7, 13]), '3wnn_labels'] = 'CD4 Memory'
# cd4_mdata.obs['3wnn_labels'] = cd4_mdata.obs['3wnn_labels'].astype('str')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", palette=tcluster_colors,
                save=proj_name+'atac_cluster_umap.pdf',
                color=['atac_cluster'], legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", palette=tcluster_colors,
                save=proj_name+'atac_cluster_umap.pdf',
                color=['atac_cluster'], legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", palette=tcluster_colors,
                save=proj_name+'atac_cluster_legend.pdf',
                color=['atac_cluster'])

In [None]:
cd4_mdata['prot'].var.index[cd4_mdata['prot'].var.index.str.contains('CD49')]

In [None]:
# plot the dotplot to differentiate the clusters
markers = ['CD62L', 'CD162', 'CD45RO', 'CD45RA',
           'CD25', 'KLRG1', 'CD183', 'CD49d']
cd4_mdata['prot'].obs['atac_cluster'] = cd4_mdata.obs['atac_cluster']
p1 = sc.pl.dotplot(cd4_mdata['prot'], markers,  groupby='atac_cluster', swap_axes=True,
                   standard_scale='var', dendrogram=True,
                   save=proj_name+'atac_cluster_dotplot.pdf')

In [None]:
# plot the dotplot to differentiate the clusters
markers = ['PLCG2', 'CD27', 'FAS']
cd4_mdata['rna'].obs['atac_cluster'] = cd4_mdata.obs['atac_cluster']
p1 = sc.pl.dotplot(cd4_mdata['rna'], markers,  groupby='atac_cluster', swap_axes=True,
                   standard_scale='var', categories_order=['T2', 'T5', 'T7', 'T4', 'T8', 'T3', 'T11', 'T1', 'T9', 'T6', 'T10'])

In [None]:
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['immunehealth_l3'])

In [None]:
# plot umap in all modaliry
mu.pl.umap(cd4_mdata, color=["Archr_Clusters_0_8"], legend_loc='on data',
           # palette=cluster_colors,
           save=proj_name + 'Archr_Clusters_0_8_3wnn_umap.pdf')

In [None]:
# plot umap in all modaliry
sc.pl.umap(rna, color=["atac_cluster"], legend_loc='on data',
           title='RNA',
           save=proj_name + 'atac_cluster_rna_umap.pdf')

In [None]:
# plot umap in all modaliry
adt.obs['atac_cluster'] = cd4_mdata.obs['atac_cluster'].copy()
mu.pl.umap(adt, color=["atac_cluster"], legend_loc='on data',
           title='Surface protein',
           save=proj_name + 'atac_cluster_adt_umap.pdf')

In [None]:
mu.pl.umap(cd4_mdata, color=['immunehealth_l2'])

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['rna:CXCR5', 'rna:IL21'],
                ncols=2,  # vmin ='p1', vmax ='p99',
                save=proj_name+'_Archr_Clusters_0_8_umap_tfh_genes.pdf', legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['prot:CD25',
                       'prot:CD45RA', 'prot:CD45RO', 'prot:CD185', 'prot:CD279', 'prot:CD62L', 'rna:CXCR5',
                       'rna:GZMB', 'rna:IL21',  'rna:PDCD1', 'rna:SELL', 'rna:CCR7', 'rna:SELPLG',
                       'Archr_Clusters_0_8'],
                ncols=4, vmin='p1', vmax='p99',
                save=proj_name+'rna_Archr_Clusters_0_8_umap.png', legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['prot:CD45RA', 'prot:CD45RO', 'prot:CD25',
                       'prot:CD185', 'prot:CD279', 'prot:KLRG1'],
                ncols=3, vmin='p1', vmax='p99',
                save=proj_name+'Archr_Clusters_0_8_protein_umap.png', legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=markers, vmin='p1', vmax='p99',
                ncols=2,
                save=proj_name+'Archr_Clusters_0_8_protein_cd62l_CD162_umap.png', legend_loc="on data")

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['prot:CD62L', 'prot:CD162',
                       'rna:SELL', 'rna:SELPLG'],  # vmin ='p1', vmax ='p99',
                ncols=2,
                save=proj_name+'Archr_Clusters_0_8_rna_protein_cd62l_CD162_umap.png')

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap",
                color=['rna:SELL', 'rna:SELPLG'], vmin='p1', vmax='p99',
                ncols=2,
                save=proj_name+'Archr_Clusters_0_8_protein_cd62l_CD162_umap.png', legend_loc="on data")

In [None]:
markers = ['CD62L', 'CD162']
sc.pl.violin(adt, markers, xlabel='Surface Protein',
             groupby='Archr_Clusters_0_8',
             save=proj_name + 'Archr_Clusters_0_8_protein_cd62l_CD162_violin.png')

In [None]:
markers = ['PLCG2']
sc.pl.violin(rna, markers, xlabel='', use_raw=True,
             groupby='Archr_Clusters_0_8',
             save=proj_name + 'Archr_Clusters_0_8_rna_plcg2_violin.png')

In [None]:
markers = ['CD62L', 'CD162']
sc.pl.violin(adt, markers, xlabel='',
             groupby='Archr_Clusters_0_8',
             save=proj_name + 'Archr_Clusters_0_8_protein_cd62l_CD162_violin.png')

In [None]:
sc.pl.violin(rna, ['SELL', 'SELPLG'],
             groupby='Archr_Clusters_0_8', save=proj_name+'Archr_Clusters_0_8_protein_cd62l_CD162_rna_violin.png')

In [None]:
sc.pl.violin(cd4_mdata[cd4_mdata.obs['cohort'] == 'pre-RA']['prot'], markers, title='at-risk',
             groupby='Archr_Clusters_0_8', save=proj_name+'Archr_Clusters_0_8_protein_cd62l_CD162_violin_at_risk.png')

In [None]:
sc.pl.violin(cd4_mdata[cd4_mdata.obs['cohort'] == 'Healthy']['prot'], markers, title='Healthy',
             groupby='Archr_Clusters_0_8', save=proj_name+'Archr_Clusters_0_8_protein_cd62l_CD162_violin_healthy.png')

In [None]:
sc.pl.dotplot(prot, ['CD62L', 'CD162'], "Archr_Clusters_0_8", standard_scale="var",
              save=proj_name+'_rna_PLCG2_dotpolt.png')

In [None]:
sc.pl.violin(rna, ['SELL', 'SELPLG', 'CCR7'],
             groupby='Archr_Clusters_0_8')

In [None]:
prot.obs['subject_id'] = cd4_mdata.obs['subject_id'].copy()
sc.pl.violin(prot, ['CD62L'], rotation=90,
             groupby='subject_id')

In [None]:
sc.pl.scatter(prot, x='CD62L', y='CD45RA',
              color='Archr_Clusters_0_8', legend_loc='on data')

In [None]:
cd4_mdata.obs.loc[cd4_mdata.obs['immunehealth_l3'].str.contains(
    'CD4'), ['immunehealth_l3']].drop_duplicates()

In [None]:
cd4mem_labels = ['CM CD4 T cell', 'GZMB- CD27+ EM CD4 T cell', 'KLRF1- GZMB+ CD27- memory CD4 T cell',
                 'GZMB- CD27- EM CD4 T cell', 'ISG+ memory CD4 T cell']
cd4mem_mdata = cd4_mdata[cd4_mdata.obs['immunehealth_l3'].isin(cd4mem_labels)]
cd4mem_mdata

In [None]:
# for celltype in cd4mem_labels:
#     sc.pl.dotplot(cd4_mdata[cd4_mdata.obs['immunehealth_l3']==celltype]['prot'],
#                   'CD62L', groupby='subject_id')

In [None]:
# markers = {'T-cell': 'CD62L', 'B-cell': 'CD62L', 'myeloid': 'CD62L'}
sc.pl.DotPlot(cd4mem_mdata['prot'], markers, groupby='immunehealth_l3').show()

In [None]:
sc.pl.violin(adt, ['CD62L'],
             groupby='immunehealth_l3')

In [None]:
sc.pl.violin(cd4na_mdata['prot'], ['CD183', 'CD52', 'CD161', 'CD95', 'TIGIT', 'HLA_A_B_C', 'CD62L'],
             groupby='subject_id')

In [None]:
pd.Series(['CD183', 'CD52', 'CD161', 'CD95', 'CD183', 'TIGIT']
          ).isin(cd4_mdata['prot'].var_names)

In [None]:
# plot the umap
mu.pl.embedding(cd4_mdata, basis="atac:X_ArchRumap", color=[
                "clean_l2_cell_types"], save=proj_name+'cd4_ArchRumap_cell_type.png')

In [None]:
cd4_mdata.update()

In [None]:
# test for marker genes in atac clusters
# use rna
rna = cd4_mdata['rna']
rna.obs['Archr_Clusters_0_8'] = cd4_mdata.obs['Archr_Clusters_0_8'].copy()
rna

In [None]:
rna.X

In [None]:
rna.obs['Archr_Clusters_0_8'] = rna.obs['Archr_Clusters_0_8'].astype('str')

In [None]:
# test on degs
sc.tl.rank_genes_groups(rna, groupby='Archr_Clusters_0_8',
                        method='wilcoxon', key_added='Archr_Clusters_0_8_wilcoxon')

In [None]:
cluster_name = 'Archr_Clusters_0_8'
sc.pl.rank_genes_groups_dotplot(
    rna, groupby=cluster_name, standard_scale="var", n_genes=5, key=cluster_name + "_wilcoxon",
    save='_' + proj_name + cluster_name+'_naive_top_genes_dotplot_scale.png'
)

In [None]:
sc.pl.dotplot(rna, ['PLCG2', 'MALAT1', 'FYB1', 'BCL2'], "Archr_Clusters_0_8", standard_scale="var",
              save=proj_name+'_rna_PLCG2_dotpolt.png')

In [None]:
# test on protein
adt = cd4_mdata['prot']
adt.obs['Archr_Clusters_0_8'] = cd4_mdata.obs['Archr_Clusters_0_8'].copy()

In [None]:
adt

In [None]:
# run daps in all adts except the isotypes
sc.tl.rank_genes_groups(adt, groupby='Archr_Clusters_0_8', mask_var=['CD169', 'HLA_DR_DP_DQ', 'CD131', 'HLA_DR_DP_DQ'],
                        method='wilcoxon', key_added='Archr_Clusters_0_8_wilcoxon_vari')

In [None]:
# adt.uns['Archr_Clusters_0_8_wilcoxon_vari']

In [None]:
cluster_name = 'Archr_Clusters_0_8'
sc.pl.rank_genes_groups_dotplot(
    cd4_mdata['prot'], groupby=cluster_name, standard_scale="var", n_genes=5, key='Archr_Clusters_0_8_wilcoxon_vari',
    save='_' + proj_name + cluster_name+'_naive_top_adt_dotplot_scale.pdf'
)

In [None]:
# save the three modalities data
cd4_mdata.update()
cd4_mdata.write(data_path+'PreRA_teaseq_cd4_3modality.h5mu')