In [1]:
import scanpy as sc
from scipy import io

In [2]:
!mkdir matrix_files # Make new directory/folder called "matrix_files"

In [3]:
adata = sc.read_h5ad('human_immune_health_atlas_cd8t-gdt-mait.h5ad') # Read .h5ad file and create variable adata

In [None]:
# adata = adata.raw.to_adata() Run the code behind arrow if working with raw data

In [4]:
with open('matrix_files/barcodes.tsv', 'w') as f: # Run code to create barcodes.tsv needed for Seurat (not .gz yet)
    for item in adata.obs_names:
        f.write(item + '\n')

In [5]:
with open('matrix_files/features.tsv', 'w') as f: # Run code to create features.tsv needed for Seurat (not .gz yet)
    for item in ['\t'.join([x,x,'Gene Expression']) for x in adata.var_names]:
        f.write(item + '\n')

In [6]:
io.mmwrite('matrix_files/matrix', adata.X.T) # Run code to create matrix.mtx needed for Seurat (not .gz yet)

In [7]:
!ls matrix_files/ # Check all 3 files are present (barcodes, features, and matrix)

barcodes.tsv features.tsv matrix.mtx


In [8]:
!gzip matrix_files/* # Zip all files

In [9]:
!ls matrix_files/ # Check all 3 files are zipped (barcodes.gz, features.gz, and matrix.gz)

barcodes.tsv.gz features.tsv.gz matrix.mtx.gz


In [10]:
adata.obs.to_csv('metadata.csv') # Create metadata file as a .csv

In [None]:
# From here switch over to R Studio IDE to transform files (barcodes.gz, features.gz, and matrix.gz) to a single .h5 file

In [None]:
# BiocManager::install("DropletUtils") Run script only first time installing DropletUtils
library(DropletUtils)
library(Seurat)

In [None]:
matrix_dir = '/Directory' # Define the directory containing the 3 files

In [None]:
counts <- Read10X(data.dir = matrix_dir) # Read the data into an expression matrix. The Read10X function automatically looks for the three standard file names within the directory

In [None]:
save_file_path <- '/Directory' # Define the path and name for the output h5 file
save_name <- "filtered_feature_bc_matrix" # The final file will be called filtered_feature_bc_matrix.h5

In [None]:
write10xCounts( # Use write10xCounts to create the H5 file
  file.path(save_file_path, paste0(save_name, ".h5")),
  counts,
  type = "HDF5",
  genome = "GRCh38", # Most recent (Dec,2025) human
  version = "3", # Specify genome and version (e.g., "mm10", "3") as needed for your data. Version 3 is the current standard
  overwrite = TRUE,
  gene.id = rownames(counts),
  gene.symbol = rownames(counts)
)