In [1]:
from __future__ import annotations

from collections.abc import Sequence
from typing import Iterable

import polars as pl
from anndata import AnnData, read_h5ad
from scipy.sparse import issparse

from cellestial.util.errors import KeyNotFoundError

In [2]:
def anndata_features_frame(
    data: AnnData,
) -> pl.DataFrame:
    return pl.DataFrame()


def anndata_observations_frame(
    data: AnnData,
    keys: str | Sequence[str] | None,
    *,
    index_name: str = "barcode",
    include_dimensions: bool = False,

) -> pl.DataFrame:
    # PART 1: INITIALIZE
    frame = pl.DataFrame()
    # PART 2: ADD obs_names
    frame = frame.with_columns(pl.Series(index_name, data.obs_names))
    # PART 3: ADD AnnData.obs
    for key in data.obs.columns:
        frame = frame.with_columns(pl.Series(key, data.obs[key]))
    if include_dimensions:
        for X in data.obsm:
            frame = frame.with_columns(pl.Series(data.obsm[X]))
    return frame

In [3]:
data = read_h5ad("data/pbmc3k_pped.h5ad")

In [4]:
data.obs.index.identical(data.obs_names)

True

In [5]:
data.obs_vector("MT-ND5")

array([-0.14707292,  0.67480492,  0.4583892 , ..., -0.61634811,
        0.5861539 , -0.41527583], shape=(16680,))

In [6]:
frame = pl.DataFrame()
# PART 2: ADD obs_names
frame = frame.with_columns(pl.Series('barcode', data.obs_names))
# PART 3: ADD AnnData.obs
for key in data.obs.columns:
    frame = frame.with_columns(pl.Series(key, data.obs[key]))

In [7]:
frame

barcode,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,total_counts_ribo,log1p_total_counts_ribo,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,n_genes,leiden
str,cat,i32,f64,f32,f32,f64,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,cat
"""AAACCCAAGGATGGCT-1""","""s1d1""",2103,7.651596,8663.0,9.066932,42.721921,59.667552,69.744892,79.348955,460.0,6.133398,5.309938,3650.0,8.202756,42.133209,17.0,2.890372,0.196237,2103,"""0"""
"""AAACCCAAGGCCTAGA-1""","""s1d1""",3916,8.273081,12853.0,9.461411,35.843772,44.26204,52.376877,62.763557,1790.0,7.49053,13.92671,1719.0,7.450079,13.37431,58.0,4.077538,0.451257,3916,"""10"""
"""AAACCCAAGTGAGTGC-1""","""s1d1""",683,6.527958,1631.0,7.397562,56.284488,62.599632,70.386266,88.77989,581.0,6.36647,35.622318,63.0,4.158883,3.862661,13.0,2.639057,0.797057,683,"""15"""
"""AAACCCACAAGAGGCT-1""","""s1d1""",4330,8.373554,17345.0,9.761117,27.66215,38.420294,48.901701,62.023638,780.0,6.660575,4.496973,3936.0,8.278174,22.692417,44.0,3.806663,0.253675,4330,"""17"""
"""AAACCCACATCGTGGC-1""","""s1d1""",325,5.786897,555.0,6.320768,49.90991,59.459459,77.477477,100.0,159.0,5.075174,28.648647,26.0,3.295837,4.684685,26.0,3.295837,4.684685,325,"""6"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TTTGTTGAGAGTCTGG-1""","""s1d3""",277,5.627621,615.0,6.423247,63.089431,71.219512,87.479675,100.0,282.0,5.645447,45.853661,35.0,3.583519,5.691057,20.0,3.044523,3.252032,277,"""6"""
"""TTTGTTGCAGACAATA-1""","""s1d3""",3797,8.24223,13218.0,9.48941,30.753518,44.628537,53.691935,64.684521,864.0,6.76273,6.536541,3997.0,8.29355,30.239067,42.0,3.7612,0.317749,3797,"""16"""
"""TTTGTTGCATGTTACG-1""","""s1d3""",3089,8.035926,27280.0,10.213945,64.409824,71.671554,78.317449,85.054985,1025.0,6.933423,3.757332,3562.0,8.178358,13.057184,13145.0,9.483872,48.185482,3089,"""5"""
"""TTTGTTGGTAGTCACT-1""","""s1d3""",379,5.940171,698.0,6.549651,52.86533,60.028653,74.355301,100.0,143.0,4.969813,20.487106,58.0,4.077538,8.309455,39.0,3.688879,5.587393,379,"""1"""


In [8]:
for key in data.obsm:
    print(key)
    type(data.obsm[key])

X_pca
X_tsne
X_umap


In [9]:
type(data.obsm[key])

numpy.ndarray

In [10]:
def ab(
        a,
        b=1,
        **kwargs,
):
    print("t" in kwargs)
    print(a,b,kwargs)

In [11]:
ab(1)

False
1 1 {}


In [12]:
from lets_plot import layer_tooltips


In [13]:
type(layer_tooltips("abc"))

lets_plot.plot.tooltip.layer_tooltips

In [14]:
isinstance(layer_tooltips("abc"),layer_tooltips)

True

In [15]:
vars(layer_tooltips(["abc","acad"]))

{'_tooltip_formats': [],
 '_tooltip_lines': None,
 '_tooltip_anchor': None,
 '_tooltip_min_width': None,
 '_tooltip_color': None,
 '_tooltip_variables': ['abc', 'acad'],
 '_tooltip_title': None,
 '_disable_splitting': None,
 'kind': 'tooltips',
 '_FeatureSpec__props': {}}

In [16]:
vars(layer_tooltips(["abc"]))["_tooltip_variables"]

['abc']

In [17]:
import polars as pl

f = pl.DataFrame().with_columns(pl.Series("a",["abc","def"],dtype=pl.Categorical))

In [18]:
f["a"].dtype == pl.Categorical

True

In [19]:
import cellestial as cl

In [20]:
frame = cl.frames.build_frame(data, axis=0)
frame

barcode,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,total_counts_ribo,log1p_total_counts_ribo,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,n_genes,leiden
str,cat,i32,f64,f32,f32,f64,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,cat
"""AAACCCAAGGATGGCT-1""","""s1d1""",2103,7.651596,8663.0,9.066932,42.721921,59.667552,69.744892,79.348955,460.0,6.133398,5.309938,3650.0,8.202756,42.133209,17.0,2.890372,0.196237,2103,"""0"""
"""AAACCCAAGGCCTAGA-1""","""s1d1""",3916,8.273081,12853.0,9.461411,35.843772,44.26204,52.376877,62.763557,1790.0,7.49053,13.92671,1719.0,7.450079,13.37431,58.0,4.077538,0.451257,3916,"""10"""
"""AAACCCAAGTGAGTGC-1""","""s1d1""",683,6.527958,1631.0,7.397562,56.284488,62.599632,70.386266,88.77989,581.0,6.36647,35.622318,63.0,4.158883,3.862661,13.0,2.639057,0.797057,683,"""15"""
"""AAACCCACAAGAGGCT-1""","""s1d1""",4330,8.373554,17345.0,9.761117,27.66215,38.420294,48.901701,62.023638,780.0,6.660575,4.496973,3936.0,8.278174,22.692417,44.0,3.806663,0.253675,4330,"""17"""
"""AAACCCACATCGTGGC-1""","""s1d1""",325,5.786897,555.0,6.320768,49.90991,59.459459,77.477477,100.0,159.0,5.075174,28.648647,26.0,3.295837,4.684685,26.0,3.295837,4.684685,325,"""6"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TTTGTTGAGAGTCTGG-1""","""s1d3""",277,5.627621,615.0,6.423247,63.089431,71.219512,87.479675,100.0,282.0,5.645447,45.853661,35.0,3.583519,5.691057,20.0,3.044523,3.252032,277,"""6"""
"""TTTGTTGCAGACAATA-1""","""s1d3""",3797,8.24223,13218.0,9.48941,30.753518,44.628537,53.691935,64.684521,864.0,6.76273,6.536541,3997.0,8.29355,30.239067,42.0,3.7612,0.317749,3797,"""16"""
"""TTTGTTGCATGTTACG-1""","""s1d3""",3089,8.035926,27280.0,10.213945,64.409824,71.671554,78.317449,85.054985,1025.0,6.933423,3.757332,3562.0,8.178358,13.057184,13145.0,9.483872,48.185482,3089,"""5"""
"""TTTGTTGGTAGTCACT-1""","""s1d3""",379,5.940171,698.0,6.549651,52.86533,60.028653,74.355301,100.0,143.0,4.969813,20.487106,58.0,4.077538,8.309455,39.0,3.688879,5.587393,379,"""1"""


In [21]:

def _add_anndata_variable_columns(
    data: AnnData, frame: pl.DataFrame, keys: str | Sequence[str]
) -> pl.DataFrame:
    """Add variable keys to the DataFrame."""
    if isinstance(keys, str):
        keys = [keys]
    for key in keys:
        if key in frame.columns:
            continue
        elif key in data.var_names:
            column = data.obs_vector(key)
            # add the variable column to the frame
            frame = frame.with_columns(
                pl.Series(key, column.astype("float32")),
            )
        else:
            msg = f"Key `{key}` not found in data."
            raise KeyNotFoundError(msg)

    return frame

In [22]:
type(data.obs_vector("MT-ND5"))

numpy.ndarray

In [23]:
data.obs_vector("MT-ND5").dtype

dtype('float64')

In [24]:
data.obs_vector("leiden").dtype

CategoricalDtype(categories=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
                  '12', '13', '14', '15', '16', '17', '18', '19'],
, ordered=False, categories_dtype=str)

In [25]:
_add_anndata_variable_columns(data,frame,keys=data.var_names)

barcode,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,total_counts_ribo,log1p_total_counts_ribo,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,n_genes,leiden,AL390719.2,C1QTNF12,AL162741.1,LINC01786,AL391244.2,TMEM52,AL589739.1,PLCH2,AL513320.1,CHD5,AL021155.5,VPS13D,AL031283.1,FAM131C,LINC01772,LINC01783,…,EFNB1,P2RY4,KIF4A,GJB1,ITGB1BP2,PHKA1,TSIX,FTX,AL353804.2,Z83843.1,ATP7A,LPAR4,KLHL4,AC234775.4,GPRASP1,AL035427.2,RAB40A,AL021308.1,RAB9B,AC004000.1,TENM1,SMIM10L2A,AL683813.1,LINC00894,GABRE,PNMA6F,PDZD4,AC244090.3,MTCP1,TMLHE-AS1,AC012078.2,PCDH11Y,PRKY,KDM5D,TTTY10,MT-ND2,MT-ND5
str,cat,i32,f64,f32,f32,f64,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""AAACCCAAGGATGGCT-1""","""s1d1""",2103,7.651596,8663.0,9.066932,42.721921,59.667552,69.744892,79.348955,460.0,6.133398,5.309938,3650.0,8.202756,42.133209,17.0,2.890372,0.196237,2103,"""0""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,1.449522,-0.011401,2.668131,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,1.305765,1.535527,-0.106079,0.031961,-0.147073
"""AAACCCAAGGCCTAGA-1""","""s1d1""",3916,8.273081,12853.0,9.461411,35.843772,44.26204,52.376877,62.763557,1790.0,7.49053,13.92671,1719.0,7.450079,13.37431,58.0,4.077538,0.451257,3916,"""10""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,2.086557,-0.011401,3.30876,-0.259284,-0.044446,-0.022394,-0.011197,2.459468,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,3.489016,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,1.072455,-0.106079,0.900468,0.674805
"""AAACCCAAGTGAGTGC-1""","""s1d1""",683,6.527958,1631.0,7.397562,56.284488,62.599632,70.386266,88.77989,581.0,6.36647,35.622318,63.0,4.158883,3.862661,13.0,2.639057,0.797057,683,"""15""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,2.701318,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.27646,0.458389
"""AAACCCACAAGAGGCT-1""","""s1d1""",4330,8.373554,17345.0,9.761117,27.66215,38.420294,48.901701,62.023638,780.0,6.660575,4.496973,3936.0,8.278174,22.692417,44.0,3.806663,0.253675,4330,"""17""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,1.705391,0.781341,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.24115,-0.011401,1.498937,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,-0.324958,-0.037698
"""AAACCCACATCGTGGC-1""","""s1d1""",325,5.786897,555.0,6.320768,49.90991,59.459459,77.477477,100.0,159.0,5.075174,28.648647,26.0,3.295837,4.684685,26.0,3.295837,4.684685,325,"""6""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.466883,1.941121
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TTTGTTGAGAGTCTGG-1""","""s1d3""",277,5.627621,615.0,6.423247,63.089431,71.219512,87.479675,100.0,282.0,5.645447,45.853661,35.0,3.583519,5.691057,20.0,3.044523,3.252032,277,"""6""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.454038,2.491669
"""TTTGTTGCAGACAATA-1""","""s1d3""",3797,8.24223,13218.0,9.48941,30.753518,44.628537,53.691935,64.684521,864.0,6.76273,6.536541,3997.0,8.29355,30.239067,42.0,3.7612,0.317749,3797,"""16""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,1.535295,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.417411,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,0.865235,-0.316394,-0.106079,0.529579,0.067357
"""TTTGTTGCATGTTACG-1""","""s1d3""",3089,8.035926,27280.0,10.213945,64.409824,71.671554,78.317449,85.054985,1025.0,6.933423,3.757332,3562.0,8.178358,13.057184,13145.0,9.483872,48.185482,3089,"""5""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.395541,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,1.010661,-0.106079,-0.736241,-0.616348
"""TTTGTTGGTAGTCACT-1""","""s1d3""",379,5.940171,698.0,6.549651,52.86533,60.028653,74.355301,100.0,143.0,4.969813,20.487106,58.0,4.077538,8.309455,39.0,3.688879,5.587393,379,"""1""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,0.55639,0.586154


In [26]:
type(data.X)

numpy.ndarray

In [27]:
from scipy import sparse

In [28]:
data.X = sparse.csr_matrix(data.X)

In [29]:
type(data.X)

scipy.sparse._csr.csr_matrix

In [30]:
_add_anndata_variable_columns(data,frame,keys=data.var_names)

barcode,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,total_counts_ribo,log1p_total_counts_ribo,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,n_genes,leiden,AL390719.2,C1QTNF12,AL162741.1,LINC01786,AL391244.2,TMEM52,AL589739.1,PLCH2,AL513320.1,CHD5,AL021155.5,VPS13D,AL031283.1,FAM131C,LINC01772,LINC01783,…,EFNB1,P2RY4,KIF4A,GJB1,ITGB1BP2,PHKA1,TSIX,FTX,AL353804.2,Z83843.1,ATP7A,LPAR4,KLHL4,AC234775.4,GPRASP1,AL035427.2,RAB40A,AL021308.1,RAB9B,AC004000.1,TENM1,SMIM10L2A,AL683813.1,LINC00894,GABRE,PNMA6F,PDZD4,AC244090.3,MTCP1,TMLHE-AS1,AC012078.2,PCDH11Y,PRKY,KDM5D,TTTY10,MT-ND2,MT-ND5
str,cat,i32,f64,f32,f32,f64,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""AAACCCAAGGATGGCT-1""","""s1d1""",2103,7.651596,8663.0,9.066932,42.721921,59.667552,69.744892,79.348955,460.0,6.133398,5.309938,3650.0,8.202756,42.133209,17.0,2.890372,0.196237,2103,"""0""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,1.449522,-0.011401,2.668131,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,1.305765,1.535527,-0.106079,0.031961,-0.147073
"""AAACCCAAGGCCTAGA-1""","""s1d1""",3916,8.273081,12853.0,9.461411,35.843772,44.26204,52.376877,62.763557,1790.0,7.49053,13.92671,1719.0,7.450079,13.37431,58.0,4.077538,0.451257,3916,"""10""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,2.086557,-0.011401,3.30876,-0.259284,-0.044446,-0.022394,-0.011197,2.459468,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,3.489016,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,1.072455,-0.106079,0.900468,0.674805
"""AAACCCAAGTGAGTGC-1""","""s1d1""",683,6.527958,1631.0,7.397562,56.284488,62.599632,70.386266,88.77989,581.0,6.36647,35.622318,63.0,4.158883,3.862661,13.0,2.639057,0.797057,683,"""15""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,2.701318,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.27646,0.458389
"""AAACCCACAAGAGGCT-1""","""s1d1""",4330,8.373554,17345.0,9.761117,27.66215,38.420294,48.901701,62.023638,780.0,6.660575,4.496973,3936.0,8.278174,22.692417,44.0,3.806663,0.253675,4330,"""17""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,1.705391,0.781341,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.24115,-0.011401,1.498937,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,-0.324958,-0.037698
"""AAACCCACATCGTGGC-1""","""s1d1""",325,5.786897,555.0,6.320768,49.90991,59.459459,77.477477,100.0,159.0,5.075174,28.648647,26.0,3.295837,4.684685,26.0,3.295837,4.684685,325,"""6""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.466883,1.941121
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TTTGTTGAGAGTCTGG-1""","""s1d3""",277,5.627621,615.0,6.423247,63.089431,71.219512,87.479675,100.0,282.0,5.645447,45.853661,35.0,3.583519,5.691057,20.0,3.044523,3.252032,277,"""6""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.454038,2.491669
"""TTTGTTGCAGACAATA-1""","""s1d3""",3797,8.24223,13218.0,9.48941,30.753518,44.628537,53.691935,64.684521,864.0,6.76273,6.536541,3997.0,8.29355,30.239067,42.0,3.7612,0.317749,3797,"""16""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,1.535295,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.417411,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,0.865235,-0.316394,-0.106079,0.529579,0.067357
"""TTTGTTGCATGTTACG-1""","""s1d3""",3089,8.035926,27280.0,10.213945,64.409824,71.671554,78.317449,85.054985,1025.0,6.933423,3.757332,3562.0,8.178358,13.057184,13145.0,9.483872,48.185482,3089,"""5""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.395541,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,1.010661,-0.106079,-0.736241,-0.616348
"""TTTGTTGGTAGTCACT-1""","""s1d3""",379,5.940171,698.0,6.549651,52.86533,60.028653,74.355301,100.0,143.0,4.969813,20.487106,58.0,4.077538,8.309455,39.0,3.688879,5.587393,379,"""1""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,0.55639,0.586154


In [31]:
from scipy.sparse import issparse

In [32]:


def _add_anndata_variable_columns2(
    data: AnnData, frame: pl.DataFrame, keys: str | Sequence[str]
) -> pl.DataFrame:
    """Add variable keys to the DataFrame."""
    if isinstance(keys, str):
        keys = [keys]
    for key in keys:
        if key in frame.columns:
            continue
        elif key in data.var_names:
            # get the index of the variable
            index = data.var_names.get_loc(key)
            # handle sparse matrix
            if issparse(data.X):  # sparse matrix
                column = data.X[:, index].toarray()
            else:  # numpy array
                column = data.X[:, index]

            # add the variable to the frame
            frame = frame.with_columns(
                pl.Series(key, column.flatten().astype("float32")),
            )
        else:
            msg = f"Key `{key}` not found in data."
            raise KeyNotFoundError(msg)

    return frame

In [33]:
_add_anndata_variable_columns2(data,frame,keys=data.var_names)

barcode,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,total_counts_ribo,log1p_total_counts_ribo,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,n_genes,leiden,AL390719.2,C1QTNF12,AL162741.1,LINC01786,AL391244.2,TMEM52,AL589739.1,PLCH2,AL513320.1,CHD5,AL021155.5,VPS13D,AL031283.1,FAM131C,LINC01772,LINC01783,…,EFNB1,P2RY4,KIF4A,GJB1,ITGB1BP2,PHKA1,TSIX,FTX,AL353804.2,Z83843.1,ATP7A,LPAR4,KLHL4,AC234775.4,GPRASP1,AL035427.2,RAB40A,AL021308.1,RAB9B,AC004000.1,TENM1,SMIM10L2A,AL683813.1,LINC00894,GABRE,PNMA6F,PDZD4,AC244090.3,MTCP1,TMLHE-AS1,AC012078.2,PCDH11Y,PRKY,KDM5D,TTTY10,MT-ND2,MT-ND5
str,cat,i32,f64,f32,f32,f64,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""AAACCCAAGGATGGCT-1""","""s1d1""",2103,7.651596,8663.0,9.066932,42.721921,59.667552,69.744892,79.348955,460.0,6.133398,5.309938,3650.0,8.202756,42.133209,17.0,2.890372,0.196237,2103,"""0""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,1.449522,-0.011401,2.668131,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,1.305765,1.535527,-0.106079,0.031961,-0.147073
"""AAACCCAAGGCCTAGA-1""","""s1d1""",3916,8.273081,12853.0,9.461411,35.843772,44.26204,52.376877,62.763557,1790.0,7.49053,13.92671,1719.0,7.450079,13.37431,58.0,4.077538,0.451257,3916,"""10""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,2.086557,-0.011401,3.30876,-0.259284,-0.044446,-0.022394,-0.011197,2.459468,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,3.489016,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,1.072455,-0.106079,0.900468,0.674805
"""AAACCCAAGTGAGTGC-1""","""s1d1""",683,6.527958,1631.0,7.397562,56.284488,62.599632,70.386266,88.77989,581.0,6.36647,35.622318,63.0,4.158883,3.862661,13.0,2.639057,0.797057,683,"""15""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,2.701318,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.27646,0.458389
"""AAACCCACAAGAGGCT-1""","""s1d1""",4330,8.373554,17345.0,9.761117,27.66215,38.420294,48.901701,62.023638,780.0,6.660575,4.496973,3936.0,8.278174,22.692417,44.0,3.806663,0.253675,4330,"""17""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,1.705391,0.781341,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.24115,-0.011401,1.498937,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,-0.324958,-0.037698
"""AAACCCACATCGTGGC-1""","""s1d1""",325,5.786897,555.0,6.320768,49.90991,59.459459,77.477477,100.0,159.0,5.075174,28.648647,26.0,3.295837,4.684685,26.0,3.295837,4.684685,325,"""6""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.466883,1.941121
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TTTGTTGAGAGTCTGG-1""","""s1d3""",277,5.627621,615.0,6.423247,63.089431,71.219512,87.479675,100.0,282.0,5.645447,45.853661,35.0,3.583519,5.691057,20.0,3.044523,3.252032,277,"""6""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,1.454038,2.491669
"""TTTGTTGCAGACAATA-1""","""s1d3""",3797,8.24223,13218.0,9.48941,30.753518,44.628537,53.691935,64.684521,864.0,6.76273,6.536541,3997.0,8.29355,30.239067,42.0,3.7612,0.317749,3797,"""16""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,1.535295,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.417411,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,0.865235,-0.316394,-0.106079,0.529579,0.067357
"""TTTGTTGCATGTTACG-1""","""s1d3""",3089,8.035926,27280.0,10.213945,64.409824,71.671554,78.317449,85.054985,1025.0,6.933423,3.757332,3562.0,8.178358,13.057184,13145.0,9.483872,48.185482,3089,"""5""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,0.395541,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,1.010661,-0.106079,-0.736241,-0.616348
"""TTTGTTGGTAGTCACT-1""","""s1d3""",379,5.940171,698.0,6.549651,52.86533,60.028653,74.355301,100.0,143.0,4.969813,20.487106,58.0,4.077538,8.309455,39.0,3.688879,5.587393,379,"""1""",-0.015695,-0.051608,-0.032246,-0.031868,-0.052656,-0.044452,-0.012353,-0.115476,-0.028972,-0.019927,-0.361637,-0.321426,-0.049415,-0.01088,-0.092983,-0.018487,…,-0.112393,-0.012001,-0.166721,-0.011792,-0.048267,-0.036549,-0.036738,-0.500946,-0.011401,-0.205814,-0.259284,-0.044446,-0.022394,-0.011197,-0.151027,-0.03615,-0.018812,-0.013312,-0.053241,-0.03531,-0.133735,-0.041904,-0.080708,-0.11965,-0.01962,-0.029089,-0.108901,-0.0248,-0.041151,-0.023045,-0.011157,-0.022755,-0.35076,-0.316394,-0.106079,0.55639,0.586154
