In [None]:
import pandas as pd
from scipy import io
import scanpy as sc

# Download Supplementary Files listed on https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE211644 before running this
base = "../GSE211644"

# read matrix and metadata
mat = io.mmread(f"{base}/GSE211644_fresh_matrix.mtx.gz").T.tocsr() # transpose and make sparse for efficient storage
genes = pd.read_csv(f"{base}/GSE211644_fresh_genes.tsv.gz", sep="\t", header=None)
barcodes = pd.read_csv(f"{base}/GSE211644_fresh_barcodes.tsv.gz", sep="\t", header=None)

# make AnnData object
adata = sc.AnnData(X=mat) # cell x gene matrix
adata.obs_names = barcodes.iloc[:, 0].astype(str).values # cells
adata.var_names = genes.iloc[:, 0].astype(str).values # genes

In [55]:
# some sanity checks...
print("Matrix shape:", adata.shape)
print("Number of barcodes (cells):", len(adata.obs_names))
print("Number of genes:", len(adata.var_names))

Matrix shape: (22164, 33538)
Number of barcodes (cells): 22164
Number of genes: 33538
