### HVG & PCA

In [None]:
import scanpy as sc
import scanpy.external as sce

# -------------------------------------------
# Step: Identify Highly Variable Genes (HVGs)
# -------------------------------------------
sc.pp.highly_variable_genes(
    adata,
    min_mean=0.0125,
    max_mean=3,
    min_disp=0.5
)

# Remove mitochondrial genes & keep only HVGs
adata = adata[:, adata.var['highly_variable'] & ~adata.var['MT']].copy()

# -------------
# Step: Scaling
# -------------
sc.pp.scale(adata, max_value=10)

# ---------
# Step: PCA
# ---------
sc.tl.pca(adata, svd_solver='randomized')
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=False)

### Batch Correction

In [None]:
import bbknn

# ----------------------
# Step: Batch Correction
# ----------------------
sce.pp.harmony_integrate(
    adata,
    key=['batch'],
    max_iter_harmony=20
)

# Alternatively:
# bbknn.bbknn(adata, batch_key='batch')

### Neighbors & Clustering

In [None]:
# ------------------------
# Step: Neighbors & Leiden
# ------------------------
sc.pp.neighbors(adata, use_rep='X_pca_harmony', n_neighbors=15)
sc.tl.leiden(adata, resolution=0.5)

# ----------
# Step: UMAP
# ----------
sc.tl.umap(adata, spread=1,min_dist=0.5)

### Visualization

In [None]:
import matplotlib.pyplot as plt

# ------------
# User options
# ------------
fig, axes = plt.subplots(1, 2, figsize=(15, 7))

group1 = "leiden"
group2 = "batch"
point_size = 10

# ------------------------
# Step: UMAP Visualization
# ------------------------
# Compute global UMAP coordinate ranges for consistent axis limits
all_x = adata.obsm['X_umap'][:, 0]
all_y = adata.obsm['X_umap'][:, 1]
x_range = [all_x.min() - 1, all_x.max() + 1]
y_range = [all_y.min() - 1, all_y.max() + 1]

# Build a color map for Leiden clusters
adata.obs[group1] = adata.obs[group1].astype("category")
categories = adata.obs[group1].cat.categories

palette = sc.pl.palettes.default_20
if len(categories) > len(palette):
    palette = sc.pl.palettes.default_102

color_map = dict(zip(categories,palette[:len(categories)]))

# --------------------------------------
# Plot: Left Panel (group1 distribution)
# --------------------------------------
sc.pl.umap(
    adata,
    color=group1,
    ax=axes[0],
    show=False,
    palette=color_map,
    size=point_size,
    legend_loc='on data',
    frameon=False
)
axes[0].set_xlim(x_range)
axes[0].set_ylim(y_range)
axes[0].set_title(group1, fontsize=22)

# ---------------------------------------
# Plot: Right Panel (group2 distribution)
# ---------------------------------------
sc.pl.umap(
    adata,
    color=group2,
    ax=axes[1],
    show=False,
    size=point_size,
    legend_fontsize=12,
    frameon=False,
)
axes[1].set_xlim(x_range)
axes[1].set_ylim(y_range)
axes[1].set_title(group2, fontsize=22)

plt.tight_layout()
plt.show()