# Leiden clustering

**Pinned Environment:** [`envs/sc-spatial.yaml`](../../envs/sc-spatial.yaml)

In [None]:
import os
import sys
from pathlib import Path
import scanpy as sc
import matplotlib.pyplot as plt
import session_info

In [None]:
plt.rcParams["figure.figsize"] = (4, 4)

## Import data

In [None]:
sys.path.append(str(Path.cwd().resolve().parents[1]))

from config.paths import BASE_DIR

input_dir = BASE_DIR / "data/h5ad/export_02"
output_dir = BASE_DIR / "data/h5ad/export_03"

output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
adata = sc.read_h5ad(os.path.join(input_dir, "artis-naive-scvi.h5ad"))

In [None]:
sample_to_group = {
    "TIS09473_Control": "Control",
    "TIS09471_Control": "Control",
    "TIS09472_Control": "Control",
    "TIS09475_Trpv1+": "Trpv1-cre",
    "TIS09474_Trpv1+": "Trpv1-cre",
    "TIS09476_Trpv1+": "Trpv1-cre",
}
adata.obs["group"] = adata.obs["sample_id"].map(sample_to_group)

adata.obs.group.value_counts()

## Neighbors, UMAP

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_scVI', random_state = 0)
sc.tl.umap(adata, random_state = 0)

# Leiden Clustering

In [None]:
sc.tl.leiden(adata, resolution=1.2, key_added="leiden")

In [None]:
sc.pl.umap(adata, color="leiden")

## Subcluster ISC and SMC

In [None]:
adata.obs["subcluster_mapping"] = adata.obs["leiden"].copy()

In [None]:
bdata = adata[adata.obs["leiden"] == "5"].copy()  # SMC
cdata = adata[adata.obs["leiden"] == "4"].copy()  # ISC

In [None]:
sc.pp.neighbors(bdata, use_rep="X_scVI")
sc.tl.umap(bdata)
sc.tl.leiden(bdata, resolution=0.2, key_added="bdata_subcluster")

sc.pp.neighbors(cdata, use_rep="X_scVI")
sc.tl.umap(cdata)
sc.tl.leiden(cdata, resolution=0.2, key_added="cdata_subcluster")

Label subclusters:

In [None]:
# Smooth muscle cells
bdata_cluster_names = {
    "0": "SMC_1",
    "1": "SMC_2",
}

bdata.obs["bdata_subcluster"] = bdata.obs["bdata_subcluster"].map(bdata_cluster_names)
sc.pl.umap(bdata, color="bdata_subcluster")

In [None]:
# Intestinal stem cells and Paneth cells (crypt co-localization)
cdata_cluster_names = {
    "0": "Paneth",
    "1": "ISC",
}

cdata.obs["cdata_subcluster"] = cdata.obs["cdata_subcluster"].map(cdata_cluster_names)
sc.pl.umap(cdata, color=["cdata_subcluster", "Lyz2"])

Map subclusters back onto original `adata`:

In [None]:
adata.obs["subcluster_mapping"] = "Other"  # Initialize all as "Other"

adata.obs.loc[bdata.obs.index, "subcluster_mapping"] = bdata.obs["bdata_subcluster"]
adata.obs.loc[cdata.obs.index, "subcluster_mapping"] = cdata.obs["cdata_subcluster"]

In [None]:
sc.pl.umap(adata, color="subcluster_mapping")

## Export

In [None]:
filename = os.path.join(output_dir, "artis-naive-scvi-leiden.h5ad")

In [None]:
adata.write_h5ad(filename, compression="gzip")