# Open-ST Adult Mouse Hippocampus

This notebook follows the same general processing pattern as `openST-mousehead.ipynb`, adapted for the adult mouse hippocampus dataset at:

- `/Users/chrislangseth/Downloads/GSM7990098_adult_mouse_hippocampus.h5ad`

It loads the `.h5ad`, prepares a counts layer, runs a standard Scanpy workflow, then fits SCVI and CellCharter.

In [1]:
import anndata as ad
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scvi
import squidpy as sq
import cellcharter as cc
from anndata.io import read_elem
from lightning.pytorch import seed_everything

seed_everything(12345)
scvi.settings.seed = 12345

  from pkg_resources import get_distribution, DistributionNotFound
  from .autonotebook import tqdm as notebook_tqdm
Seed set to 12345
Seed set to 12345


In [3]:
adata = sc.read_h5ad('/Users/chrislangseth/Downloads/GSM7990098_adult_mouse_hippocampus.h5ad')

In [6]:
import sys
sys.path.append("/Users/chrislangseth/work/karolinska_institutet/projects/BaloMS/")
from utils import spatial_utils

In [7]:
spatial_utils.plot_spatial_compact_fast(
    adata,
    color="tile_id",
    groupby="tile_id",
    spot_size=1,
    cols=10,
    height=10,
    background="white",
    dpi=120,
)


KeyboardInterrupt: 

In [None]:
H5AD_PATH = "/Users/chrislangseth/Downloads/GSM7990098_adult_mouse_hippocampus.h5ad"

def read_h5ad_robust(path):
    try:
        return ad.read_h5ad(path)
    except Exception:
        with h5py.File(path, "r") as f:
            kwargs = {}
            for key in ["X", "obs", "var", "uns", "obsm", "varm", "obsp", "varp"]:
                if key in f:
                    kwargs[key] = read_elem(f[key])

            if "layers" in f:
                layers = {name: read_elem(f["layers"][name]) for name in f["layers"].keys()}
                kwargs["layers"] = layers

            return ad.AnnData(**kwargs)

adata = read_h5ad_robust(H5AD_PATH)
adata.var_names_make_unique()
adata.obs_names_make_unique()
adata.obs["sample_id"] = "adult_mouse_hippocampus"
adata.obs["sample_id"] = adata.obs["sample_id"].astype("category")

if "raw" in adata.layers:
    adata.layers["counts"] = adata.layers["raw"].copy()
elif adata.raw is not None:
    adata.layers["counts"] = adata.raw.X.copy()
else:
    adata.layers["counts"] = adata.X.copy()

NEIGHBOR_SPATIAL_KEY = "spatial"
adata.obsm["spatial_plot"] = np.asarray(adata.obsm["spatial"])[:, :2].copy()

print(f"Neighbor spatial key: {NEIGHBOR_SPATIAL_KEY}")
adata

In [None]:
adata.obs.head()

Unnamed: 0,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,n_reads,reads_per_counts,exact_entropy,theoretical_entropy,exact_compression,theoretical_compression,tile_id,sample_id
GCGATAGGGCGACGTGAAAAAATAA:L3_tile_2157,0,0.0,0.0,,0,,1.78,1.86,34,36,L3_tile_2157,adult_mouse_hippocampus
AAGCCGCGACAACAGAACAGGACGA:L3_tile_2157,0,0.0,0.0,,0,,1.55,2.0,40,40,L3_tile_2157,adult_mouse_hippocampus
CACGACGTTCAAACAGAGACTCAAC:L3_tile_2157,0,0.0,0.0,,0,,1.84,1.99,42,32,L3_tile_2157,adult_mouse_hippocampus
GCGGCACGCGTGAATATGGCTCTAA:L3_tile_2157,0,0.0,0.0,,0,,1.98,1.76,42,42,L3_tile_2157,adult_mouse_hippocampus
ACTACCAGGCGGGAGGACGTAACAG:L3_tile_2157,0,0.0,0.0,,0,,1.84,1.94,38,32,L3_tile_2157,adult_mouse_hippocampus


In [None]:
adata

AnnData object with n_obs × n_vars = 20025933 × 34079
    obs: 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'n_reads', 'reads_per_counts', 'exact_entropy', 'theoretical_entropy', 'exact_compression', 'theoretical_compression', 'tile_id', 'sample_id'
    uns: 'spatial'
    obsm: 'spatial', 'spatial_plot'
    layers: 'counts'

In [5]:
n_top_genes = min(2000, adata.n_vars)
sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes, flavor="seurat", subset=True)

adata

AnnData object with n_obs × n_vars = 20025933 × 2000
    obs: 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'n_reads', 'reads_per_counts', 'exact_entropy', 'theoretical_entropy', 'exact_compression', 'theoretical_compression', 'tile_id', 'sample_id'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'spatial', 'hvg'
    obsm: 'spatial', 'spatial_plot'
    layers: 'counts'

In [None]:
sc.tl.pca(adata, svd_solver="arpack")
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=min(30, adata.obsm["X_pca"].shape[1]))
sc.tl.umap(adata, min_dist=0.3)

In [None]:
with plt.rc_context({"figure.figsize": (7, 5)}):
    sc.pl.umap(adata, color=[c for c in ["tile_id"] if c in adata.obs.columns], wspace=0.4)

with plt.rc_context({"figure.figsize": (8, 8)}):
    sc.pl.embedding(adata, basis="spatial_plot", color=[c for c in ["tile_id"] if c in adata.obs.columns], s=2, frameon=False)

In [None]:
scvi.model.SCVI.setup_anndata(
    adata,
    layer="counts",
    batch_key="sample_id",
)

model = scvi.model.SCVI(adata)

In [None]:
model.train(early_stopping=True, enable_progress_bar=True, max_epochs=30)

In [None]:
adata.obsm["X_scVI"] = model.get_latent_representation(adata).astype(np.float32)
adata

In [None]:
sq.gr.spatial_neighbors(
    adata,
    library_key="sample_id",
    coord_type="generic",
    spatial_key=NEIGHBOR_SPATIAL_KEY,
    n_neighs=6,
    delaunay=False,
)

In [None]:
cc.gr.aggregate_neighbors(adata, n_layers=3, use_rep="X_scVI", out_key="X_cellcharter", sample_key="sample_id")

In [None]:
# GMM components to try
gmm_components = [5, 10, 15, 20]

In [None]:
from sklearn.mixture import GaussianMixture

X = adata.obsm["X_cellcharter"]
gmm_results = []

for k in gmm_components:
    print(k)
    gmm = GaussianMixture(
        n_components=k,
        covariance_type="full",
        random_state=0,
        n_init=3,
    )
    labels = gmm.fit_predict(X)
    key = f"CellCharter_{k}"
    adata.obs[key] = labels.astype(str)
    gmm_results.append({
        "k": k,
        "bic": gmm.bic(X),
        "aic": gmm.aic(X),
    })
    print(f"k={k}: {adata.obs[key].nunique()} clusters")

pd.DataFrame(gmm_results)

In [None]:
with plt.rc_context({"figure.figsize": (8, 8)}):
    sc.pl.embedding(adata, basis="spatial_plot", color="CellCharter_15", s=2, frameon=False)

In [None]:
OUTPUT_H5AD = "/Users/chrislangseth/Downloads/GSM7990098_adult_mouse_hippocampus.processed.h5ad"
adata.write_h5ad(OUTPUT_H5AD)
OUTPUT_H5AD