## Generate a test data

In [1]:
import numpy as np
from anndata import AnnData
from mudata import MuData
import mudata as md
from pathlib import Path

# -------------------------------
# helper: read valid analyte classes
# -------------------------------
def load_valid_analyte_classes():
    possible_paths = [
        "valid_analyte_classes.txt"
    ]
    for path in possible_paths:
        p = Path(path)
        if p.is_file():
            with open(p) as f:
                return {line.strip() for line in f if line.strip() and not line.startswith("#")}
    raise FileNotFoundError("valid_analyte_classes.txt not found")

def pick_valid_class(valid_classes, guesses):
    valid_lower = {v.lower(): v for v in valid_classes}
    for guess in guesses:
        if guess.lower() in valid_lower:
            return valid_lower[guess.lower()]
        for v in valid_classes:
            if guess.lower().replace("_", "") in v.lower().replace("_", ""):
                return v
    raise ValueError(f"No valid analyte_class found in {sorted(valid_classes)}")

# -------------------------------
# create data
# -------------------------------
np.random.seed(1)
n, d, k = 1000, 100, 10
z = np.random.normal(loc=np.arange(k), scale=np.arange(k)*2, size=(n, k))
w = np.random.normal(size=(d, k))
y = np.dot(z, w.T)

adata = AnnData(y)
adata.obs_names = [f"obs_{i+1}" for i in range(n)]
adata.var_names = [f"var_{j+1}" for j in range(d)]
adata.obsm["annotation"] = np.random.choice(["cell_type_A", "cell_type_B"], size=n)

d2 = 50
w2 = np.random.normal(size=(d2, k))
y2 = np.dot(z, w2.T)
adata2 = AnnData(y2)
adata2.obs_names = [f"obs_{i+1}" for i in range(n)]
adata2.var_names = [f"var2_{j+1}" for j in range(d2)]

mdata = MuData({"A": adata, "B": adata2})

# -------------------------------
# fill required validation fields
# -------------------------------
for key in mdata.mod:
    mod = mdata.mod[key]
    mod.obs["original_obs_id"] = mod.obs_names.astype(str)
    mod.obs["object_type"] = "cell"
    mod.uns["protocol"] = "10.17504/protocols.io.bxyz1234"

for mod_name, adata in mdata.mod.items():
    adata.uns["analyte_class"] = "RNA"  # or whatever class applies


# epic_type
has_annotations = any("annotation" in mod.obsm and len(mod.obsm["annotation"]) > 0
                      for mod in mdata.mod.values())
has_analyses = any(mod.X is not None and getattr(mod.X, "size", 0) > 0
                   for mod in mdata.mod.values())
epic_type = []
if has_annotations:
    epic_type.append("annotations")
if has_analyses:
    epic_type.append("analyses")
mdata.uns["epic_type"] = epic_type

# -------------------------------
# write and reload
# -------------------------------
path_h5mu = "example.h5mu"
mdata.write(path_h5mu)
mdata_r = md.read(path_h5mu, backed=True)
print("Written and reloaded MuData successfully.")
print("epic_type:", mdata.uns["epic_type"])


Written and reloaded MuData successfully.
epic_type: ['annotations', 'analyses']


  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)


In [2]:
# Validate a MuData object
# validate using the h5mu file path
from mudata_validator import validate_mudata
import muon as mu
mdata = mu.read_h5mu(path_h5mu)
validate_mudata(mdata)

  from .autonotebook import tqdm as notebook_tqdm
  if Version(scanpy.__version__) < Version("1.10"):
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
  if "protocol" not in adata.uns_keys() or adata.uns["protocol"] == None:


Validating overall MuData object...
Validating modality: A
The values in AnnData.obs.index will be used as the objects' unique identifiers. They look like:
Index(['obs_1', 'obs_2', 'obs_3', 'obs_4', 'obs_5'], dtype='object')
The HUGO symbol should be included as an annotation for genes and the Uniprot ID should be included as an annotation for proteins.


  if "analyte_class" not in adata.uns_keys():
  if "X_spatial" in adata.obsm_keys():
  if "protocol" not in adata.uns_keys() or adata.uns["protocol"] == None:


Standard plots are expected to be stored in .obsm['X_umap'], .obsm['X_harmony'], .obsm['X_tsne'] and .obsm['X_pca']
If this is spatial data, coordinates should go in .obsm['X_spatial']
Validating modality: B
The values in AnnData.obs.index will be used as the objects' unique identifiers. They look like:
Index(['obs_1', 'obs_2', 'obs_3', 'obs_4', 'obs_5'], dtype='object')
The HUGO symbol should be included as an annotation for genes and the Uniprot ID should be included as an annotation for proteins.
Standard plots are expected to be stored in .obsm['X_umap'], .obsm['X_harmony'], .obsm['X_tsne'] and .obsm['X_pca']
If this is spatial data, coordinates should go in .obsm['X_spatial']
Validation passed!


  if "analyte_class" not in adata.uns_keys():
  if "X_spatial" in adata.obsm_keys():
