This is a set of exercises to familiarize myself with the uses of the AnnData matrix storage format. It is designed primarily so that I can use it as a future reference. The code used here is, my own additions aside, mainly sourced from the Read The Docs AnnData tutorial and other resources.

In [22]:
import pandas as pd 
from scipy.sparse import csr_matrix
import anndata as ad 
import numpy as np 
print(ad.__version__)

#storing in csr matrix is optional, but will make a sparse matrix (plenty of zeroes) take up
#less space
counts = csr_matrix(np.random.poisson(1, size=(100, 2000)), dtype=np.float32)
print(f"Nonzero cells: {counts.count_nonzero()}, \
proportion {counts.count_nonzero() / (100 * 2000)} of total")

adata = ad.AnnData(counts)

0.10.4
Nonzero cells: 126140, proportion 0.6307 of total


In [23]:
print(f"{len(adata.obs)} observations, {len(adata.var)} features, {np.shape(adata.X)}")
adata.X
adata.obs_names = [f"Cell_{i}" for i in adata.obs_names] #should be 100 total
adata.var_names = [f"Gene_{i}" for i in adata.var_names] #should be 2000 total
adata[["Cell_9", "Cell_10"], ["Gene_0", "Gene_2", "Gene_10"]]


100 observations, 2000 features, (100, 2000)


View of AnnData object with n_obs × n_vars = 2 × 3

In [28]:
type(adata.obs)
#add metadata by adding extra columns to obs and var
celltypes = np.random.choice(["B", "T", "Monocyte"], size=adata.n_obs)
adata.obs["cell_type"] = celltypes
adata.obs[:10]
adata #description of overall object will now list extra columns added to obs or to var

AnnData object with n_obs × n_vars = 100 × 2000
    obs: 'cell_type'

In [32]:
adata[adata.obs.cell_type == "T"]

View of AnnData object with n_obs × n_vars = 33 × 2000
    obs: 'cell_type'

In [33]:
import umap
from sklearn.preprocessing import StandardScaler

reducer = umap.UMAP()
scaled_data = StandardScaler().fit_transform(adata.X)
embedding = reducer.fit_transform(scaled_data)


  from .autonotebook import tqdm as notebook_tqdm


'0.5.5'