In [None]:
data_dir = "/data/projects/dschaub/ANCA-GN_transcriptomics/data/single-cell/exploratory"
working_dir = "/data/projects/dschaub/ANCA-GN_transcriptomics"

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os

os.chdir(working_dir)

import yaml
import anndata as ad
import matplotlib.pyplot as plt
import seaborn as sns
import mudata as md
import pandas as pd
import muon as mu
import numpy as np
import scanpy as sc
from matplotlib import gridspec
import scanpy.external as sce
from scipy import sparse
from matplotlib.colors import LinearSegmentedColormap
import matplotlib as mpl

from utils.utils import *
from utils.plotting import *

sc.settings.verbosity = 0
sc.set_figure_params(dpi=80)
sns.set(style="white")

## Load data

In [None]:
path = os.path.join(data_dir, "ANCA_exploratory_27PK27PB_Tcells_processed.h5mu")
mudata = md.read_h5mu(path)

mod_rna = mudata.mod["rna"]
mod_cite = mudata.mod["cite"]

In [None]:
harmony_r_path = os.path.join(data_dir, "ANCA_exploratory_27PK27PB_Tcells_HarmonyR")
metadata = pd.read_csv(f"{harmony_r_path}/metadata.csv", index_col=0)
harmony_embeddings = pd.read_csv(
    f"{harmony_r_path}/harmony_embeddings.csv", index_col=0
)
umap_embeddings = pd.read_csv(f"{harmony_r_path}/umap_embeddings.csv", index_col=0)

## Add embeddings

In [None]:
mod_rna.obsm["X_harmony"] = harmony_embeddings.loc[mod_rna.obs_names].values
mod_rna.obsm["X_umap"] = umap_embeddings.loc[mod_rna.obs_names].values

In [None]:
sc.pl.umap(
    mod_rna,
    color=["sample", "tissue", "patient"],
    wspace=0.8,
)

## Leiden clustering

In [None]:
sc.pp.neighbors(mod_rna, use_rep="X_harmony")
sc.tl.umap(mod_rna)
umap_sc = mod_rna.obsm["X_umap"].copy()

In [None]:
sc.pl.umap(
    mod_rna,
    color=["sample", "tissue", "patient"],
    # return_fig=True,
    # legend_loc="on data",
    # save="_leiden.png",
    wspace=0.8,
)

In [None]:
resolutions = [
    0.3,
    0.4,
    0.5,
    0.6,
    0.7,
    0.8,
    0.9,
    1.0,
    1.1,
    1.2,
    1.3,
    1.4,
    1.5,
    1.6,
    1.7,
    1.8,
    1.9,
]
for res in resolutions:
    sc.tl.leiden(mod_rna, key_added=f"leiden_{res}", resolution=res)

In [None]:
plot_leiden_results(mod_rna, rep_name="")

In [None]:
mod_rna

## Scale data

In [None]:
scale(mod_rna)
scale(mod_cite, layer="clr")

mod_rna.X = mod_rna.layers["log1p"].copy()
mod_cite.X = mod_cite.layers["clr"].copy()

## Save data

In [None]:
mudata = md.MuData(
    {
        "rna": mod_rna.copy(),
        "cite": mod_cite.copy(),
    }
)
save_path = os.path.join(data_dir, "ANCA_exploratory_27PK27PB_Tcells_HarmonyR.h5mu")
mudata.write_h5mu(save_path)