## Python part

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os

os.chdir("../../../../")

import pandas as pd
import scanpy as sc
import mudata as md

from utils.py2r import *

In [None]:
path = "data/anca/exploratory/ANCA_27PK27PB_T_processed.h5mu"
mudata = md.read_h5mu(path)

mod_rna = mudata.mod["rna"]
mod_cite = mudata.mod["cite"]

In [None]:
save_dir = "data/anca/exploratory/ANCA_27PK27PB_T_processed_r"
adata2raw(mod_rna, save_dir)

for file in os.listdir(save_dir):
    if "meta" in file:
        continue
    elif "gz" in file:
        print(f"Removing {file}")
        os.remove(f"{save_dir}/{file}")
    else:
        print(f"Zipping {file}")
        gzip_file(f"{save_dir}/{file}")

## R part

In [None]:
RhpcBLASctl::blas_set_num_threads(1)
library(Seurat)
library(dplyr)
# library(tidyverse)
library(here)
# library(readxl)
# library(Matrix)
library(IMSBSeuratHelpers)
library(harmony)
library(ggplot2)
# library(sctransform)
library(future)
library(data.table)
library(xlsx)
plan("multicore", workers = 1)
options(future.globals.maxSize = 20 * 1000 * 1024^2) # 20GB
options(repr.matrix.max.rows = 600, repr.matrix.max.cols = 200)

In [None]:
seu_K <- readRDS(paste0("/data/projects/ANCA_Atlas/Integrated_objects/Harmony_ANCA_28PK_T.rds"))
seu_B <- readRDS(paste0("/data/projects/ANCA_Atlas/Integrated_objects/Harmony_ANCA_28PB_T.rds"))
seu_old <- merge(seu_K, seu_B)

In [None]:
seu_old@assays$RNA@data

In [None]:
str(seu_old)

In [None]:
seu_old@assays$RNA@counts

In [None]:
path <- "/data/projects/dschaub/anca-project/data/anca/exploratory/ANCA_27PK27PB_T_processed_r"
raw_data <- Read10X(data.dir = path)
metadata <- read.csv(paste0(path, "/metadata.csv"), row.names = 1)
seu <- CreateSeuratObject(counts = raw_data, project = "ANCA", meta.data = metadata)

In [None]:
seu@assays$RNA@counts

In [None]:
seu <- NormalizeData(seu)

In [None]:
Idents(seu) <- "patient"

In [None]:
seu <- FindVariableFeatures(
    object = seu,
    selection.method = "vst",
    nfeatures = 2000
)

In [None]:
seu <- ScaleData(
    seu
    # , vars.to.regress =c("nFeature_RNA","nCount_RNA","frac.mito")
)

In [None]:
seu <- RunPCA(
    object = seu,
    features = VariableFeatures(object = seu),
    verbose = T
)

In [None]:
seu <- seu %>%
    RunHarmony("patient",
        dims.use = 1:30,
        plot_convergence = TRUE
    )

In [None]:
seu <- RunUMAP(object = seu, reduction = "harmony", dims = 1:30)

In [None]:
DimPlot(
    object = seu, reduction = "umap", label = F,
    pt.size = 0.1
) + theme(aspect.ratio = 1)

In [None]:
# Access the metadata from the Seurat object (recommended for Seurat v3.0 and above)
metadata <- seu[[]]

# Write the metadata to a CSV file
write.csv(metadata, file = "/data/projects/dschaub/anca-project/data/anca/exploratory/ANCA_27PK27PB_T_harmony_r/metadata.csv", row.names = TRUE)
saveRDS(seu, file = "/data/projects/dschaub/anca-project/data/anca/exploratory/ANCA_27PK27PB_T_harmony_r/seurat.rds")

In [None]:
str(seu)

In [None]:
# Access the Harmony embeddings from the Seurat object
# This assumes that the Harmony embeddings are named 'harmony'
harmony_embeddings <- seu[["harmony"]]@cell.embeddings
umap_embeddings <- seu[["umap"]]@cell.embeddings

# Write the Harmony embeddings to a CSV file
write.csv(harmony_embeddings, file = "/data/projects/dschaub/anca-project/data/anca/exploratory/ANCA_27PK27PB_T_harmony_r/harmony_embeddings.csv", row.names = TRUE)
write.csv(umap_embeddings, "/data/projects/dschaub/anca-project/data/anca/exploratory/ANCA_27PK27PB_T_harmony_r/umap_embeddings.csv")

## Second Python part

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os

os.chdir("../../../../")

import yaml
import anndata as ad
import matplotlib.pyplot as plt
import seaborn as sns
import mudata as md
import muon as mu
import numpy as np
import scanpy as sc
from matplotlib import gridspec
import scanpy.external as sce
from scipy import sparse
from matplotlib.colors import LinearSegmentedColormap

from utils.utils import *
from utils.plotting import *

sc.settings.verbosity = 0
import matplotlib as mpl

In [None]:
path = "data/anca/exploratory/ANCA_27PK27PB_T_processed.h5mu"
mudata = md.read_h5mu(path)

mod_rna = mudata.mod["rna"]
mod_cite = mudata.mod["cite"]

In [None]:
harmony_r_path = "data/anca/exploratory/ANCA_27PK27PB_T_harmony_r"
metadata = pd.read_csv(f"{harmony_r_path}/metadata.csv", index_col=0)
harmony_embeddings = pd.read_csv(
    f"{harmony_r_path}/harmony_embeddings.csv", index_col=0
)
umap_embeddings = pd.read_csv(f"{harmony_r_path}/umap_embeddings.csv", index_col=0)

In [None]:
mod_rna.obsm["X_harmony"] = harmony_embeddings.loc[mod_rna.obs_names].values
mod_rna.obsm["X_umap"] = umap_embeddings.loc[mod_rna.obs_names].values

In [None]:
sc.pl.umap(mod_rna, color="cell_type_yu")

In [None]:
sc.pl.umap(
    mod_rna,
    color=["sample", "tissue", "patient"],
    # return_fig=True,
    # legend_loc="on data",
    # save="_leiden.png",
    wspace=0.8,
)

In [None]:
sc.pp.neighbors(mod_rna, use_rep="X_harmony", n_pcs=30, metric="cosine", n_neighbors=30)
sc.tl.umap(mod_rna, min_dist=0.3)
umap_adpated = mod_rna.obsm["X_umap"].copy()

In [None]:
sc.pp.neighbors(mod_rna, use_rep="X_harmony")
sc.tl.umap(mod_rna)
umap_sc = mod_rna.obsm["X_umap"].copy()

In [None]:
sc.pl.umap(mod_rna, color="cell_type_yu")

In [None]:
sc.pl.umap(
    mod_rna,
    color=["sample", "tissue", "patient"],
    # return_fig=True,
    # legend_loc="on data",
    # save="_leiden.png",
    wspace=0.8,
)

In [None]:
resolutions = [
    0.3,
    0.4,
    0.5,
    0.6,
    0.7,
    0.8,
    0.9,
    1.0,
    1.1,
    1.2,
    1.3,
    1.4,
    1.5,
    1.6,
    1.7,
    1.8,
    1.9,
]
for res in resolutions:
    sc.tl.leiden(mod_rna, key_added=f"leiden_{res}", resolution=res)

In [None]:
plot_leiden_results(mod_rna, rep_name="")

In [None]:
mudata = md.MuData(
    {
        "rna": mod_rna,
        "cite": mod_cite,
    }
)
mudata.write("data/anca/exploratory/ANCA_27PK27PB_T_harmony_r.h5mu")

## Add scaled layer

In [None]:
mudata = md.read_h5mu("data/anca/exploratory/ANCA_27PK27PB_T_harmony_r.h5mu")
mudata.shape

In [None]:
mod_rna = mudata.mod["rna"]
mod_cite = mudata.mod["cite"]

In [None]:
scale(mod_rna)

In [None]:
scale(mod_cite, layer="clr")

In [None]:
mod_rna.X = mod_rna.layers["log1p"].copy()
mod_cite.X = mod_cite.layers["clr"].copy()

In [None]:
mudata = md.MuData(
    {
        "rna": mod_rna,
        "cite": mod_cite,
    }
)
mudata.write("data/anca/exploratory/ANCA_27PK27PB_T_harmony_r.h5mu")