# build a muon matrix to store the preRA teaseq data

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import muon as mu
import muon.prot as pt
import seaborn as sns
from mudata import MuData

In [None]:
from matplotlib import pyplot as plt
plt.rcParams['figure.dpi'] = 100

In [None]:
# set fig size
plt.rcParams['figure.figsize'] = [10, 8]

In [None]:
data_path = '/home/jupyter/data/preRA_teaseq/EXP-00243/totalVI/'

# construct the 3 modalities dataset

In [None]:
# load the sc
prera_tea_2mod = mu.read(
    data_path + 'PreRA_teaseq_seurat_qc_filtered_cells_lsi.h5mu')

In [None]:
prera_tea_2mod

In [None]:
# make clean names for the adt names
prera_tea_2mod['prot'].var.index = prera_tea_2mod['prot'].var.index.str.replace(
    '-|\\.', '_', regex=True)

In [None]:
prera_tea_2mod['prot'].var.index.to_list()

In [None]:
# rename the RNA assay in the mudata
prera_tea = MuData({"rna": prera_tea_2mod['RNA'].copy(),
                    "prot": prera_tea_2mod['prot'].copy()})
prera_tea.obs = prera_tea_2mod.obs

In [None]:
prera_tea

## add atac data into the mudata

In [None]:
atac = mu.read(
    data_path+'preRA_teaseq_totalvi_mocha_peakmatrix_l2_celltype.h5ad')

In [None]:
# replace the index name with barcodes
atac.obs.index = atac.obs['barcodes']

In [None]:
atac.var
var_df = atac.var.reset_index().rename(columns={'index': 'peaks'})
var_df

In [None]:
prera_tea.obs_names.difference(atac.obs_names)

In [None]:
prera_tea.mod['atac'] = atac.copy()

In [None]:
# prera_tea.var_names_make_unique()

In [None]:
prera_tea.update()

In [None]:
prera_tea

In [None]:
prera_tea.write(data_path+'PreRA_teaseq_qc_filtered_cells_3modality.h5mu')

## remove BR2024 from the analysis

In [None]:
# load the three modalities data
prera_tea = mu.read(data_path+'PreRA_teaseq_qc_filtered_cells_3modality.h5mu')

In [None]:
prera_tea.obs['subject_id'].unique()

In [None]:
# remove BR2024
prera_tea_fl = prera_tea[prera_tea.obs['subject_id'] != 'BR2024'].copy()

In [None]:
prera_tea_fl['prot'].X[1:50, 1:50].toarray()

In [None]:
prera_tea_fl['prot'].var_names

In [None]:
prera_tea_fl.write(
    data_path+'PreRA_teaseq_qc_filtered_cells_3modality_rmBR2024.h5mu')

# add ATAC gene scores and TF activity into the mudata

In [None]:
# load the three modalities data
prera_tea = mu.read(
    data_path + 'PreRA_teaseq_qc_filtered_cells_3modality_rmBR2024.h5mu')

In [None]:
# load the tf and gene scores
atac_path = '/home/jupyter/data/preRA_teaseq/output_results/atac/'
tf_activity = sc.read_h5ad(atac_path + 'preRA_teaseq_TFscores.h5ad')
gene_scores = sc.read_h5ad(atac_path + 'preRA_teaseq_GeneScoreMatrix.h5ad')

In [None]:
prera_tea

In [None]:
# reset the index for gene scores and tf to match mudata
gene_scores.obs.index = gene_scores.obs.index.str.split('#').str[1]
tf_activity.obs.index = tf_activity.obs.index.str.split('#').str[1]

In [None]:
# remove number in tf index
tf_activity.var.index = tf_activity.var.index.str.split('_').str[0]

In [None]:
prera_tea.obs_names.difference(tf_activity.obs_names)

In [None]:
# recreate the mudata with tf and gene scores
prera_tea_5m = MuData({"rna": prera_tea['rna'].copy(),
                       "prot": prera_tea['prot'].copy(),
                       "atac": prera_tea['atac'].copy(),
                       "genescores": gene_scores.copy(),
                       "tf": tf_activity.copy()
                       })
prera_tea_5m.obs = prera_tea.obs.copy()
prera_tea_5m.uns = prera_tea.uns.copy()
prera_tea_5m.obsm = prera_tea.obsm.copy()
prera_tea_5m.obsp = prera_tea.obsp.copy()

In [None]:
prera_tea_5m['genescores'].var.index = prera_tea_5m['genescores'].var['name']

In [None]:
prera_tea_5m.update()

In [None]:
mu.pl.umap(prera_tea_5m, color=['l2_cell_types'], legend_loc="on data")

In [None]:
prera_tea_5m

In [None]:
prera_tea_5m['genescores'].var.loc[prera_tea_5m['genescores'].var.index == 'BCL6']
prera_tea_5m['tf'].var.loc[prera_tea_5m['tf'].var.index == 'BCL6']

In [None]:
with plt.rc_context({"figure.figsize": (4, 4), "figure.dpi": (300)}):
    mu.pl.umap(prera_tea, color=['manual_labels'],  # palette=cluster_colors_32,
               legend_loc="on data", legend_fontsize='small',
               # save=proj_name + '_manual_labels_legend.pdf',
               show=True)
# p1.set_xlabel("WNN UMAP1")
# p1.set_ylabel("WNN UMAP2")
# p1.set_title('Cell types')
# plt.tight_layout()
# p1.figure.savefig(fig_path+proj_name + '_manual_labels.pdf')

In [None]:
mu.pl.umap(prera_tea_5m, color=['prot:CD3'])

In [None]:
mu.pl.umap(prera_tea_5m, color=['rna:CD3'])

In [None]:
mu.pl.umap(prera_tea_5m, color=['genescores:CD3D'], vmin='p5', vmax='p95')

In [None]:
mu.pl.umap(prera_tea_5m, color=['tf:MAF'], vmin='p5', vmax='p95')

In [None]:
prera_tea_5m.write(
    data_path+'PreRA_teaseq_qc_filtered_cells_5modality_rmBR2024.h5mu')