In [1]:
import numpy as np
import scanpy as sc

In [2]:
# Load dataset
adata = sc.read(
    filename="s4d8_quality_control.h5ad",
    backup_url="https://figshare.com/ndownloader/files/40014331",
)

  0%|          | 0.00/501M [00:00<?, ?B/s]

- An AnnData object (adata) in Scanpy follows this convention:

 - `adata.obs` (observations): Per-cell metadata (rows).
 - `adata.var` (variables): Per-gene metadata (columns).
 - `adata.X`: The main expression matrix (cells × genes).
 - `adata.layers`: Additional expression layers (e.g., raw counts, spliced/unspliced counts).
 - `adata.uns` (unstructured): Miscellaneous annotations (e.g., color schemes, PCA parameters).
 - `adata.obsm/adata.varm`: Matrices with row- or column-specific embeddings or additional data.

In [3]:
adata

AnnData object with n_obs × n_vars = 14814 × 20171
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_20_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'outlier', 'mt_outlier', 'scDblFinder_score', 'scDblFinder_class'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells'
    layers: 'counts', 'soupX_counts'

In [4]:
# Inspect the Dataset Shape
adata.shape

(14814, 20171)

- Shape: `(14814, 20171)`
  - 14814 cells (rows in `adata.obs`)
  - 20171 genes (columns in `adata.var`)

This tells you that there are roughly **14814** single cells and **20171** measured features (genes) in the dataset.

In [6]:
# List available cell metadata fields
adata.obs_keys()

['n_genes_by_counts',
 'log1p_n_genes_by_counts',
 'total_counts',
 'log1p_total_counts',
 'pct_counts_in_top_20_genes',
 'total_counts_mt',
 'log1p_total_counts_mt',
 'pct_counts_mt',
 'total_counts_ribo',
 'log1p_total_counts_ribo',
 'pct_counts_ribo',
 'total_counts_hb',
 'log1p_total_counts_hb',
 'pct_counts_hb',
 'outlier',
 'mt_outlier',
 'scDblFinder_score',
 'scDblFinder_class']

In [7]:
# Check gene Metadata
adata.var.head()

Unnamed: 0,gene_ids,feature_types,genome,mt,ribo,hb,n_cells_by_counts,mean_counts,log1p_mean_counts,pct_dropout_by_counts,total_counts,log1p_total_counts,n_cells
AL627309.1,ENSG00000238009,Gene Expression,GRCh38,False,False,False,30,0.001831,0.001829,99.822842,31.0,3.465736,27
AL627309.5,ENSG00000241860,Gene Expression,GRCh38,False,False,False,146,0.008976,0.008936,99.137829,152.0,5.030438,126
LINC01409,ENSG00000237491,Gene Expression,GRCh38,False,False,False,883,0.063009,0.061104,94.785638,1067.0,6.973543,758
LINC01128,ENSG00000228794,Gene Expression,GRCh38,False,False,False,533,0.03614,0.035503,96.852486,612.0,6.418365,437
LINC00115,ENSG00000225880,Gene Expression,GRCh38,False,False,False,72,0.00437,0.00436,99.57482,74.0,4.317488,64
