In [None]:
from __future__ import annotations

from collections.abc import Sequence

s
import polars as pl
from anndata import AnnData, read_h5ad


In [26]:
data = read_h5ad("../../data/pbmc3k_pped.h5ad")

In [90]:
def _add_variable_columns(frame: pl.DataFrame, keys: str | Sequence[str]) -> pl.DataFrame:
    """Add variable keys to the DataFrame."""
    if isinstance(keys, str):
        keys = [keys]
    for key in keys:
        if key in frame.columns:
            continue
        elif key in data.var_names:
            # get the index of the gene
            index = data.var_names.get_indexer([key])
            # add the variable to the frame
            frame = frame.with_columns(
                pl.Series(key, data.X[:, index].flatten().astype("float32")),
            )
        else:
            msg = f"Key `{key}` not found in data."
            raise ValueError(msg)

    return frame

In [None]:
def anndata_observations_frame(
    data: AnnData,
    /,
    keys: str | Sequence[str] | None = None,
    *,
    observations_name="barcode",
    include_dimensions: bool = False,
) -> pl.DataFrame:
    """
    Build an Observations DataFrame from an AnnData object.

    Parameters
    ----------
    data : AnnData
        The AnnData object containing the observations.
    keys : str or Sequence[str] or None
        Variable keys to add to the DataFrame. If None, no additional keys are added.
    observations_name : str, optional
        The name of the observations column, by default "barcode".
    include_dimensions : bool, optional
        Whether to include dimensions from `obsm` in the DataFrame, by default False.

    Returns
    -------
    pl.DataFrame
        A DataFrame containing the observations, with optional variable keys and dimensions.
    """
    # Check if data is an AnnData object
    if not isinstance(data, AnnData):
        msg = "data must be an `AnnData` object"
        raise TypeError(msg)
    # PART 1: INITIALIZE
    frame = pl.DataFrame()
    # PART 3: ADD obs_names
    frame = frame.with_columns(pl.Series("obs_names", data.obs_names))
    # PART 2: ADD AnnData.obs
    for key in data.obs.columns:
        frame = frame.with_columns(pl.Series(key, data.obs[key]))
    # PART 4: ADD dimensions if needed
    if include_dimensions:
        for X in data.obsm:
            col_count = data.obsm[X].shape[1]  # Number of dimensions (columns)
            for col in range(col_count):
                frame = frame.with_columns(pl.Series(f"{X}_{col+1}", data.obsm[X][:, col]))

    # PART 5: ADD keys if provided
    if keys is not None:
        frame = _add_variable_columns(frame, keys)

    return frame


In [None]:
def anndata_variables_frame(
    data: AnnData,
    *,
    variables_name: str = "variable",
    include_dimensions: bool = False,
) -> pl.DataFrame:
    """
    Build a Variables DataFrame from an AnnData object.

    Parameters
    ----------
    data : AnnData
        The AnnData object containing the variables.

    Returns
    -------
    pl.DataFrame
        A DataFrame containing the variables.
    """
    # PART 1: INITIALIZE
    if not isinstance(data, AnnData):
        msg = "data must be an `AnnData` object"
        raise TypeError(msg)
    frame = pl.DataFrame()

    # PART 2: ADD var_names
    frame = frame.with_columns(pl.Series("variable", data.var_names))

    # PART 3: ADD AnnData.var
    for key in data.var.columns:
        frame = frame.with_columns(pl.Series(key, data.var[key]))

    # PART 4: ADD dimensions if needed
    if include_dimensions:
        for X in data.varm:
            col_count = data.varm[X].shape[1] # Number of dimensions (columns)
            for col in range(col_count):
                frame = frame.with_columns(pl.Series(f"{X}_{col+1}", data.varm[X][:, col]))

    return frame

In [93]:
data.var_names

Index(['AL390719.2', 'C1QTNF12', 'AL162741.1', 'LINC01786', 'AL391244.2',
       'TMEM52', 'AL589739.1', 'PLCH2', 'AL513320.1', 'CHD5',
       ...
       'AC244090.3', 'MTCP1', 'TMLHE-AS1', 'AC012078.2', 'PCDH11Y', 'PRKY',
       'KDM5D', 'TTTY10', 'MT-ND2', 'MT-ND5'],
      dtype='object', length=2000)

In [94]:
anndata_observations_frame(data,"PRKY")

obs_names,sample,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,total_counts_ribo,log1p_total_counts_ribo,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,n_genes,leiden,PRKY
str,cat,i32,f64,f32,f32,f64,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,cat,f32
"""AAACCCAAGGATGGCT-1""","""s1d1""",2103,7.651596,8663.0,9.066932,42.721921,59.667552,69.744892,79.348955,460.0,6.133398,5.309938,3650.0,8.202756,42.133209,17.0,2.890372,0.196237,2103,"""0""",1.305765
"""AAACCCAAGGCCTAGA-1""","""s1d1""",3916,8.273081,12853.0,9.461411,35.843772,44.26204,52.376877,62.763557,1790.0,7.49053,13.92671,1719.0,7.450079,13.37431,58.0,4.077538,0.451257,3916,"""10""",-0.35076
"""AAACCCAAGTGAGTGC-1""","""s1d1""",683,6.527958,1631.0,7.397562,56.284488,62.599632,70.386266,88.77989,581.0,6.36647,35.622318,63.0,4.158883,3.862661,13.0,2.639057,0.797057,683,"""15""",-0.35076
"""AAACCCACAAGAGGCT-1""","""s1d1""",4330,8.373554,17345.0,9.761117,27.66215,38.420294,48.901701,62.023638,780.0,6.660575,4.496973,3936.0,8.278174,22.692417,44.0,3.806663,0.253675,4330,"""17""",-0.35076
"""AAACCCACATCGTGGC-1""","""s1d1""",325,5.786897,555.0,6.320768,49.90991,59.459459,77.477477,100.0,159.0,5.075174,28.648647,26.0,3.295837,4.684685,26.0,3.295837,4.684685,325,"""8""",-0.35076
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TTTGTTGAGAGTCTGG-1""","""s1d3""",277,5.627621,615.0,6.423247,63.089431,71.219512,87.479675,100.0,282.0,5.645447,45.853661,35.0,3.583519,5.691057,20.0,3.044523,3.252032,277,"""8""",-0.35076
"""TTTGTTGCAGACAATA-1""","""s1d3""",3797,8.24223,13218.0,9.48941,30.753518,44.628537,53.691935,64.684521,864.0,6.76273,6.536541,3997.0,8.29355,30.239067,42.0,3.7612,0.317749,3797,"""16""",0.865235
"""TTTGTTGCATGTTACG-1""","""s1d3""",3089,8.035926,27280.0,10.213945,64.409824,71.671554,78.317449,85.054985,1025.0,6.933423,3.757332,3562.0,8.178358,13.057184,13145.0,9.483872,48.185482,3089,"""5""",-0.35076
"""TTTGTTGGTAGTCACT-1""","""s1d3""",379,5.940171,698.0,6.549651,52.86533,60.028653,74.355301,100.0,143.0,4.969813,20.487106,58.0,4.077538,8.309455,39.0,3.688879,5.587393,379,"""1""",-0.35076


In [98]:
anndata_variables_frame(data,include_dimensions=True)

variable,mt,ribo,hb,n_cells_by_counts,mean_counts,log1p_mean_counts,pct_dropout_by_counts,total_counts,log1p_total_counts,n_cells,highly_variable,means,dispersions,dispersions_norm,mean,std,PCs_1,PCs_2,PCs_3,PCs_4,PCs_5,PCs_6,PCs_7,PCs_8,PCs_9,PCs_10,PCs_11,PCs_12,PCs_13,PCs_14,PCs_15,PCs_16,PCs_17,PCs_18,PCs_19,PCs_20,PCs_21,PCs_22,PCs_23,PCs_24,PCs_25,PCs_26,PCs_27,PCs_28,PCs_29,PCs_30,PCs_31,PCs_32,PCs_33,PCs_34,PCs_35,PCs_36,PCs_37,PCs_38,PCs_39,PCs_40,PCs_41,PCs_42,PCs_43,PCs_44,PCs_45,PCs_46,PCs_47,PCs_48,PCs_49,PCs_50
str,bool,bool,bool,i64,f32,f32,f64,f32,f32,i64,bool,f64,f64,f32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""AL390719.2""",false,false,false,5,0.000292,0.000292,99.970803,5.0,1.791759,5,true,0.001001,1.840088,1.211725,0.00038,0.024219,0.000507,0.00007,0.000219,0.00003,0.000163,0.000078,-0.000552,-0.00048,-7.1845e-7,0.001257,-0.000835,-0.001095,-0.000722,0.000355,0.001387,-0.000444,0.000592,0.000521,-0.000477,-0.000384,0.000573,-0.002016,-0.000279,-0.000666,0.000539,0.000141,0.00033,-0.001908,0.0006,0.00089,0.000338,0.002042,-0.000708,0.000136,-0.002319,0.001995,-0.000187,-0.000273,-0.000444,-0.000566,-0.000658,0.00057,0.000103,0.000003,-0.000841,-0.001221,0.001091,-0.000526,-0.000717,0.001971
"""C1QTNF12""",false,false,false,62,0.003737,0.00373,99.637956,64.0,4.174387,62,true,0.008604,1.871498,1.27418,0.003594,0.069648,0.002157,0.00009,0.001072,0.002018,0.001577,-0.000074,-0.002174,0.005761,0.002667,-0.001618,0.002554,0.001043,0.002002,0.002322,-0.000932,-0.000108,-0.006791,-0.001877,0.001695,0.005245,-0.002132,0.001788,0.004756,-0.003123,0.004221,-0.00006,0.000963,-0.004629,0.006832,-0.004371,-0.000144,-0.006061,0.002932,-0.004847,0.002872,-0.008509,0.003131,-0.003947,0.003756,0.002759,-0.00394,0.00827,-0.005125,0.009589,0.010219,-0.00255,-0.001272,-0.001618,0.008488,0.004894
"""AL162741.1""",false,false,false,26,0.001518,0.001517,99.848175,26.0,3.295837,26,true,0.003878,1.944082,1.418505,0.001503,0.046608,0.000624,-0.000157,-0.000299,0.001803,0.000242,0.001054,0.000856,0.002438,0.000624,0.001913,0.000034,-0.000836,-0.000592,-0.003244,-0.002215,-0.002546,-0.001654,0.003159,-0.000388,0.001114,0.000115,0.001183,0.004532,-0.005745,-0.004119,-0.003631,0.001258,-0.003019,-0.002348,0.000854,0.001166,0.00168,0.006794,0.003127,0.002967,0.002509,-0.000432,0.001553,-0.003598,-0.006155,0.0005,0.002273,-0.001148,-0.001519,0.000738,-0.002293,-0.002336,-0.004694,0.004402,-0.003541
"""LINC01786""",false,false,false,23,0.001343,0.001342,99.865693,23.0,3.178054,23,true,0.004039,1.990555,1.510911,0.001524,0.047831,0.000686,-0.000398,-0.000138,0.000278,0.000569,0.000919,-0.000051,-0.00089,-0.000718,0.002334,-0.001839,0.001517,0.001815,-0.001537,-0.001807,-0.000427,0.000801,-0.000601,0.001574,-0.000991,-0.004927,-0.000891,-0.001228,0.003091,0.001604,0.0015,-0.001694,-0.001285,-0.001243,0.001403,0.003339,-0.001336,0.002543,-0.00127,0.000354,0.005011,0.000007,0.004297,0.003308,-0.000664,0.005573,-0.003334,-0.005405,0.001483,-0.004556,0.003451,-0.001502,-0.00602,0.00007,-0.003216
"""AL391244.2""",false,false,false,73,0.004438,0.004428,99.573723,76.0,4.343805,73,true,0.008619,1.8798,1.290687,0.00363,0.068932,0.000484,-0.001068,0.002193,0.002492,0.003235,0.001446,0.001347,0.003344,0.002216,0.002755,0.000024,-0.004278,-0.010278,-0.000011,0.001102,-0.003959,-0.001865,-0.003884,-0.003769,0.003923,0.002117,-0.002225,0.00564,-0.008406,0.003633,-0.003804,0.00759,-0.009032,-0.002789,-0.004343,0.002436,0.001075,-0.003592,0.003432,-0.001539,-0.001271,-0.004488,0.002004,0.011496,0.002047,-0.010374,0.00567,0.003475,-0.007139,0.002013,0.001362,0.002979,0.001949,-0.007422,-0.002366
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""PRKY""",false,false,false,2294,0.165139,0.15284,86.60438,2828.0,7.947679,2289,true,0.346883,1.815441,1.162716,0.16251,0.463308,0.007736,-0.026888,-0.017994,0.00831,0.030556,0.034987,-0.062071,-0.013758,0.003033,0.001976,-0.017966,0.017737,-0.056791,-0.02367,0.011339,-0.017298,-0.012666,-0.055533,-0.021517,0.04266,-0.02546,-0.032365,-0.007404,0.044908,-0.016968,0.007544,-0.072016,-0.032632,0.025854,0.042334,-0.054467,-0.031225,-0.079508,0.013216,-0.089356,-0.061124,0.000753,-0.021704,0.036659,-0.027166,-0.05235,-0.020908,0.06178,0.031703,0.087126,-0.062791,0.002076,0.003484,0.068329,-0.04102
"""KDM5D""",false,false,false,2030,0.140438,0.131412,88.145985,2405.0,7.785721,2028,true,0.283985,1.810605,1.153101,0.131121,0.414425,0.003528,-0.002136,0.000097,0.014622,0.002955,0.042118,-0.069233,0.010054,0.024767,0.009312,0.000501,0.003343,-0.019792,0.002186,0.010508,0.014847,0.007467,-0.010181,0.036239,0.017111,-0.019162,0.013276,0.014674,-0.018118,0.002217,-0.000007,-0.027243,0.002303,-0.014169,0.031988,0.004525,-0.008574,-0.035438,0.023459,0.036425,0.007055,-0.007149,0.102474,0.073169,-0.039281,0.039477,-0.000764,-0.010766,0.024605,-0.051385,0.08016,0.059791,0.052302,0.038893,0.034512
"""TTTY10""",false,false,false,265,0.016876,0.016735,98.452555,289.0,5.669881,265,true,0.033677,1.812069,1.156013,0.014633,0.137948,-0.003248,0.003903,0.008258,0.008701,-0.010011,0.011993,-0.022978,0.015721,0.027954,0.017358,-0.006907,-0.001465,-0.011102,0.000739,0.005671,-0.003988,-0.01131,-0.004085,0.002347,0.005591,-0.02207,-0.006165,0.012771,-0.00324,-0.004794,0.012172,-0.006271,0.004217,-0.00455,0.027321,-0.013401,-0.029942,0.013956,0.004138,-0.003487,0.005651,-0.005983,0.026074,-0.033445,0.010744,0.00947,0.002781,0.004452,-0.014523,-0.010696,-0.004137,0.021195,0.026951,-0.011609,-0.010438
"""MT-ND2""",true,false,false,16324,23.342249,3.192214,4.677372,399736.0,12.898562,15951,true,3.988345,4.379118,1.680991,3.313847,1.351333,0.063123,-0.064687,-0.000193,0.018888,0.084332,0.16023,-0.086556,-0.033752,0.014187,0.010519,-0.018926,0.030075,0.012551,-0.030484,-0.00237,0.039199,0.039248,0.136352,0.034783,-0.010586,0.13494,-0.033945,-0.090721,-0.008598,0.022876,-0.041794,0.009683,0.056117,-0.045846,-0.050559,-0.073577,-0.080295,0.023782,0.041736,0.050557,-0.01775,-0.018999,0.066855,-0.042569,0.036312,0.034829,-0.009177,-0.015544,-0.007084,-0.012532,0.010486,-0.012239,-0.023408,-0.008289,-0.009577
