In [18]:
import numpy as np
from scipy import sparse
from scipy.stats import rankdata
import scanpy as sc

Load a test dataset

In [2]:
adata = sc.datasets.pbmc3k()

  0%|          | 0.00/5.58M [00:00<?, ?B/s]

In [3]:
adata

AnnData object with n_obs × n_vars = 2700 × 32738
    var: 'gene_ids'

In [13]:
# Param
layer = None
max_rank = 1500
chunk_cells = 200
ties_method = "average"

In [19]:
X = adata.layers[layer] if layer else adata.X
n_cells, n_genes = X.shape

# Convert to csc for fast column slicing
is_sparse = sparse.issparse(X)
if is_sparse:
    Xcsc = X.tocsc()
else:
    Xarr = np.asarray(X)

In [6]:
adata.layers

Layers with keys: 

In [25]:
data, rows, cols = [], [], []

In [26]:
for cstart in range(0, n_cells, chunk_cells):
    cend = min(n_cells, cstart + chunk_cells)
    if is_sparse:
        chunk = Xcsc[cstart:cend, :].T.toarray()  # genes x chunk
    else:
        chunk = Xarr[cstart:cend, :].T  # genes x chunk
    # now chunk is genes x cells_in_chunk
    for j in range(chunk.shape[1]):
        col = chunk[:, j].astype(float)
        col[np.isnan(col)] = -np.inf
        ranks = rankdata(-col, method=ties_method)
        ranks_clipped = np.minimum(ranks, max_rank).astype(np.int32)
        idx = np.nonzero(ranks_clipped > 0)[0]
        data.extend(ranks_clipped[idx])
        rows.extend(idx)
        cols.extend([cstart + j] * len(idx))

In [42]:
range(chunk.shape[1])

range(0, 100)

In [31]:
ranks_mat = sparse.coo_matrix(
    (np.array(data, dtype=np.int32), (np.array(rows, dtype=np.int32), np.array(cols, dtype=np.int32))),
    shape=(n_genes, n_cells),
).tocsr()

In [35]:
print(ranks_mat[1:5, 1:5])

<Compressed Sparse Row sparse matrix of dtype 'int32'
	with 16 stored elements and shape (4, 4)>
  Coords	Values
  (0, 0)	1500
  (0, 1)	1500
  (0, 2)	1500
  (0, 3)	1500
  (1, 0)	1500
  (1, 1)	1500
  (1, 2)	1500
  (1, 3)	1500
  (2, 0)	1500
  (2, 1)	1500
  (2, 2)	1500
  (2, 3)	1500
  (3, 0)	1500
  (3, 1)	1500
  (3, 2)	1500
  (3, 3)	1500


In [40]:
a = ranks_mat[:, 1]
a_dense = a.toarray().ravel()

top_indices = np.argsort(a_dense)[:10]  # minus sign for descending order
top_values = a_dense[top_indices]

print("Top indices:", top_indices)
print("Top values:", top_values)

Top indices: [19154 24673 14247 30970 15745 23794   908 13302 11447 10709]
Top values: [1 2 3 4 4 6 7 7 9 9]
