### Set up OpenAI API key

In [1]:
OPENAI_API_KEY = "INSERT YOUR OPENAI KEY HERE"

### Define imports and functions

In [2]:
import json
import os
from concurrent.futures import ThreadPoolExecutor
import cellxgene_census
import tiledbsoma as soma
import pickle
import numpy as np
import openai
from cellxgene_census.experimental.pp import mean_variance, highly_variable_genes
import pandas as pd


def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return np.array(
        openai.Embedding.create(input=[text], model=model, api_key=OPENAI_API_KEY)["data"][0]["embedding"]
    )

def get_embedding_dissimilarity(tgt_emb,ref_emb):
    return np.linalg.norm(tgt_emb.flatten()-ref_emb.flatten())
    
def find_most_similar_key(key, embeddings):
    query_emb = get_embedding(key)
    dists = np.array([get_embedding_dissimilarity(query_emb, embeddings[k]) for k in embeddings])
    return np.array(list(embeddings.keys()))[np.argmin(dists)]

def translate_user_input_to_args(query: str) -> dict:
    # TODO if required: Chain of thought prompting - second prompt gives it more examples and asks it to refine the solution
    description = (
    """Examples:
    1. "Find me cells that are glial cells or neurons with expressions that are greater than 1000 or find me cells from assays that are 10X"
    Output: [{"category": "cell_type", "operator": "in", "value": ["glial cells", "neuron"]},{"join": "and"},{"category": "expression", "operator": ">", "value": 1000},{"join": "or"},{"category": "assay", "operator": "==", "value": "10X"}]
    2. "Retrieve cells from males or females in the developmental stage of embryo."
    Output: [{"category": "sex", "operator": "in", "value": ["males", "females"]},{"join": "and"},{"category": "development_stage", "operator": "in", "value": ["embryo"]}]
    3. "Show cells with suspension types either liquid or solid that are not associated with any disease."
    Output: [{"category": "suspension_type", "operator": "in", "value": ["liquid", "solid"]},{"join": "and"},{"category": "disease", "operator": "==", "value": "healthy"}]
    """
    )
    functions = [{'name': 'translate_query_list_terms_to_database_terms',
      'description': description,
      'parameters': {'type': 'object',
       'properties': {'query_list': {'type': 'array',
         'items': {'type': 'object',
          'properties': {'operator': {'type': 'string',
            'description': "Use 'in' or 'not in' when a category must map to multiple values. Otherwise, use '==' or '!='. The inequalities are used for continuous values.",
            'enum': ['in', 'not in', '==', '!=', '>', '<', '>=', '<=']},
           'join': {'type': 'string', 'enum': ['and', 'or']},
           'category': {'type': 'string',
            'enum': ['assay',
             'cell_type',
             'development_stage',
             'disease',
             'is_primary_data',
             'self_reported_ethnicity',
             'sex',
             'suspension_type',
             'tissue',
             'expression_sum',
             'num_genes_expressed',
             'n_measured_genes',
                    ]},
           'value': {"description": "If the previous operator is 'in', the value MUST be a list of strings (e.g. ['neurons'])."}}},
            'description': 'A list containing dictionary elements that represent different parts of a query. Each dictionary has a "category" representing the type of the element ("operator", "category", "value", "join") and a value representing the actual element\'s value.'}},
                    "required": ["query_list"]}}]
    messages = [{"role": "user", "content": query}]
    
    response = openai.ChatCompletion.create(
        model="gpt-4-0613",
        messages=messages,
        functions=functions,
        function_call={"name": "translate_query_list_terms_to_database_terms"},
        api_key=OPENAI_API_KEY,
    )
    response_message = response["choices"][0]["message"]
    return json.loads(response_message["function_call"]["arguments"])['query_list']

class LLMAxisQuery(soma.AxisQuery):
    def __init__(self, query):

        parsed_query = translate_user_input_to_args(query)
        
        import copy
        pq = copy.deepcopy(parsed_query)

        # Rewrite
        for node in pq:
            value = node.get("value")
            if type(value) == str and node.get("operator") == "in":
                node["value"] == [value]
            if type(value) == str:                    
                most_similar = find_most_similar_key(value, d[node["category"]])
                node["value"] = str(most_similar)
                # print(value, most_similar)
            elif type(value) == list:
                node["value"] = [str(find_most_similar_key(x, d[node["category"]])) for x in value]
            if "category" in node:
                keys = {
                    "num_genes_expressed": "nnz",
                    "expression_sum": "raw_sum",
                    "n_measured_genes": "n_measured_vars",
                }
                if node["category"] in keys:
                    node["category"] = keys[node["category"]]

        s = ""
        for node in pq:
            if not "join" in node:
                if type(node["value"]) == str:
                    value = f"'{node['value']}'"
                else:
                    value = node["value"]
                s += f'{node["category"]} {node["operator"]} {value} '
            else:
                s += f'{node["join"]} '
        
        candidate_query = s.strip()        
        super().__init__(value_filter=candidate_query)

def get_cell_metadata(cell_query_str: str, organism: str = "homo_sapiens"):
    axis_query = LLMAxisQuery(f"find me: {cell_query_str}")
    print(f"Getting cell metadata with the following parameters: {locals()}")
    
    query = census["census_data"][organism].axis_query(
        measurement_name="RNA", obs_query=axis_query
    )
    return query.obs().concat().to_pandas()

def get_anndata(cell_query_str: str, organism: str = "homo_sapiens"):
    axis_query = LLMAxisQuery(f"find me: {cell_query_str}")
    print(f"Getting AnnData with the following parameters: {locals()}")    
    
    query = census["census_data"][organism].axis_query(
        measurement_name="RNA", obs_query=axis_query
    )
    return query.to_anndata(X_name="raw", X_layers=["raw", "normalized"])

def get_highly_variable_genes(cell_query_str: str, organism: str = "homo_sapiens", number_of_genes=2000, span=0.3, batch_key=None, return_expression_data = False):
    axis_query = LLMAxisQuery(f"find me: {cell_query_str}")
    print(f"Getting HVGs with the following parameters: {locals()}")    

    query = census["census_data"][organism].axis_query(
        measurement_name="RNA", obs_query=axis_query
    )

    hvgs_df = highly_variable_genes(
        query,
        n_top_genes=number_of_genes,
        batch_key=batch_key,
        span=span
    )

    hvgs = hvgs_df[hvgs_df.highly_variable]

    if not return_expression_data:
        return hvgs
    else:
        query = census["census_data"][organism].axis_query(
            measurement_name="RNA", obs_query=axis_query, var_query=soma.AxisQuery(coords=(hvgs.index.tolist(),))
        )
        return query.to_anndata(X_name="raw", X_layers=["raw", "normalized"])
    
def calculate_mean_and_or_variance(cell_query_str: str, organism: str = "homo_sapiens", layer = "normalized", calculate_mean=False, calculate_variance=False):
    axis_query = LLMAxisQuery(f"find me: {cell_query_str}")
    print(f"Calculating mean and variance with the following parameters: {locals()}")    

    query = census["census_data"][organism].axis_query(
        measurement_name="RNA", obs_query=axis_query
    )

    mv_df = mean_variance(query, calculate_mean=calculate_mean, calculate_variance=calculate_variance, layer=layer) 

    var_df = query.var().concat().to_pandas()
    return pd.concat([var_df.set_index("soma_joinid"), mv_df], axis=1)

def assistant(query: str):
    
    get_anndata_function_definition = {
        "name": "get_anndata",
        "description": "Create and return an AnnData object containing expression data and metadata for requested cells.",
        "parameters": {"type": "object",
                       "properties": {
                           "cell_query_str": {
                               "type": "string",
                               "description": "a text description of which cells users would like to get an AnnData object for."
                           },
                           "organism": {
                               "type": "string",
                               "enum": ["homo_sapiens","mus_musculus"],
                               "description": "The organism in which cells are queried. Homo sapiens is human, Mus musculus is mouse."
                           }
                       }
                      }
    }

    get_cell_metadata_function_definition = {
        "name": "get_cell_metadata",
        "description": "Create and return a Pandas DataFrame object containing metadata for requested cells.",
        "parameters": {"type": "object",
                       "properties": {
                           "cell_query_str": {
                               "type": "string",
                               "description": "a text description of which cells users would like to get the metadata for."
                           },
                           "organism": {
                               "type": "string",
                               "enum": ["homo_sapiens","mus_musculus"],
                               "description": "The organism in which cells are queried. Homo sapiens is human, Mus musculus is mouse."
                           }
                       }
                      }
    }

    get_highly_variable_genes_function_definition = {
        "name": "get_highly_variable_genes",
        "description": "Calculate the highly variable genes (HVGs) for a particular set of cells and optionally return the corresponding expression data.",
        "parameters": {"type": "object",
                       "properties": {
                           "cell_query_str": {
                               "type": "string",
                               "description": "a text description of which cells users would like to get the highly variable genes (HVGs) for."
                           },
                           "number_of_genes": {
                               "type": "integer",
                               "description": "the number of top highly variable genes to return"
                           },
                           "organism": {
                               "type": "string",
                               "enum": ["homo_sapiens","mus_musculus"],
                               "description": "The organism in which cells are queried. Homo sapiens is human, Mus musculus is mouse."
                           },
                           "span": {
                               "type": "number",
                               "description": "The fraction of cells used to estimate the loess variance model fit."
                           },
                           "batch_key": {
                               "type": "string",
                               "enum": [
                                   "assay",
                                   "cell_type",
                                   "disease",
                                   "self_reported_ethnicity",
                                   "sex",
                                   "suspension_type",
                                   "tissue",
                               ],
                               "description": "The cell metadata category that will be used to correct for batch effects."
                           },
                           "return_expression_data": {
                               "type": "boolean",
                               "description": "Users can request to get the cell expression data with genes sliced by the calculated HVGs."
                           }
                       },
                       "required": ["cell_query_str"],
                      }
    }

    calculate_mean_var_function_definition = {
        "name": "calculate_mean_and_or_variance",
        "description": "Calculate the mean and/or variance across the requested cells for every gene.",
        "parameters": {"type": "object",
                       "properties": {
                           "cell_query_str": {
                               "type": "string",
                               "description": "a text description of cells users would like to calculate mean and variance across."
                           },
                           "layer": {
                               "type": "string",
                               "description": "the data layer across which means and/or variances will be computed",
                               "enum": ["raw", "normalized"]
                           },
                           "organism": {
                               "type": "string",
                               "enum": ["homo_sapiens","mus_musculus"],
                               "description": "The organism in which cells are queried. Homo sapiens is human, Mus musculus is mouse."
                           },
                           "calculate_mean": {
                               "type": "boolean",
                               "description": "Whether to calculate the mean or not."
                           },
                           "calculate_variance": {
                               "type": "boolean",
                               "description": "Whether to calculate the variance or not."
                           },                       
                       },
                       "required": ["cell_query_str"],
                      }
    }    
    functions = [
        get_anndata_function_definition,
        get_cell_metadata_function_definition,
        get_highly_variable_genes_function_definition,
        calculate_mean_var_function_definition
    ]

    functions_to_call = {
        "get_cell_metadata": get_cell_metadata,
        "get_anndata": get_anndata,
        "get_highly_variable_genes": get_highly_variable_genes,
        "calculate_mean_and_or_variance": calculate_mean_and_or_variance
    }    
    
    messages = [{"role": "user", "content": query}]
    response = openai.ChatCompletion.create(
        model="gpt-4-0613",
        messages=messages,
        functions=functions,
        function_call="auto",
        api_key=OPENAI_API_KEY,
    )
    response_message = response["choices"][0]["message"]
    if response_message.get("function_call"):
        function_name = response_message["function_call"]["name"]
        function_args = json.loads(response_message["function_call"]["arguments"])
        result = functions_to_call[function_name](**function_args)
        return result
    else:
        print("I'm sorry. I could not tell which Census function to call for you. Please modify your request and try again.")
        

# create embeddings if they exist
census = cellxgene_census.open_soma(census_version='latest')
df = census["census_info"]["summary_cell_counts"].read().concat().to_pandas()

if os.path.exists('embeddings.pkl'):
    embeddings = pickle.load(open('embeddings.pkl','rb'))
else:
    embeddings = []
    categories = []
    
    with ThreadPoolExecutor() as executor:
        labels = [label for label in df["label"].values]
        embeddings = list(executor.map(get_embedding, labels))
    
    pickle.dump(embeddings,open('embeddings.pkl','wb'))
    
d = {}
for category in set(df["category"]):
    sliced = df[df["category"] == category]
    d[category] = {label["label"]:embeddings[idx] for idx, label in sliced.iterrows()}    

The "latest" release is currently 2023-10-09. Specify 'census_version="2023-10-09"' in future calls to open_soma() to ensure data consistency.


In [4]:
assistant("Get an AnnData object for cells from heart tissue in humans.")

Getting AnnData with the following parameters: {'cell_query_str': 'heart tissue', 'organism': 'homo_sapiens', 'axis_query': LLMAxisQuery(value_filter="tissue == 'heart'", coords=())}


AnnData object with n_obs × n_vars = 137624 × 60664
    obs: 'soma_joinid', 'dataset_id', 'assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue', 'tissue_ontology_term_id', 'tissue_general', 'tissue_general_ontology_term_id', 'raw_sum', 'nnz', 'raw_mean_nnz', 'raw_variance_nnz', 'n_measured_vars'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length', 'nnz', 'n_measured_obs'
    layers: 'normalized'

In [5]:
assistant("Retrieve metadata for brain cells in mice.")

Getting cell metadata with the following parameters: {'cell_query_str': 'brain cells', 'organism': 'mus_musculus', 'axis_query': LLMAxisQuery(value_filter="cell_type in ['neural cell']", coords=())}


Unnamed: 0,soma_joinid,dataset_id,assay,assay_ontology_term_id,cell_type,cell_type_ontology_term_id,development_stage,development_stage_ontology_term_id,disease,disease_ontology_term_id,...,suspension_type,tissue,tissue_ontology_term_id,tissue_general,tissue_general_ontology_term_id,raw_sum,nnz,raw_mean_nnz,raw_variance_nnz,n_measured_vars
0,1650584,a13bda79-9134-46c9-9ed1-a2858be9aafe,10x 5' v1,EFO:0011025,neural cell,CL:0002319,Theiler stage 21,MmusDv:0000028,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,12426.0,3985,3.118193,37.148928,23830
1,1661161,a13bda79-9134-46c9-9ed1-a2858be9aafe,10x 5' v1,EFO:0011025,neural cell,CL:0002319,Theiler stage 20,MmusDv:0000027,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,34963.0,6723,5.200506,182.717602,23830
2,1661285,a13bda79-9134-46c9-9ed1-a2858be9aafe,10x 5' v1,EFO:0011025,neural cell,CL:0002319,Theiler stage 20,MmusDv:0000027,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,67083.0,9073,7.393696,491.732994,23830
3,1661502,a13bda79-9134-46c9-9ed1-a2858be9aafe,10x 5' v1,EFO:0011025,neural cell,CL:0002319,Theiler stage 20,MmusDv:0000027,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,10145.0,3920,2.588010,24.227006,23830
4,1662135,a13bda79-9134-46c9-9ed1-a2858be9aafe,10x 5' v1,EFO:0011025,neural cell,CL:0002319,Theiler stage 20,MmusDv:0000027,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,19261.0,4242,4.540547,137.591494,23830
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1965,1733489,12967895-3d58-4e93-be2c-4e1bcf4388d5,10x 3' v2,EFO:0009899,neural cell,CL:0002319,Theiler stage 21,MmusDv:0000028,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,9386.0,3294,2.849423,80.399427,25263
1966,1733663,12967895-3d58-4e93-be2c-4e1bcf4388d5,10x 3' v2,EFO:0009899,neural cell,CL:0002319,Theiler stage 21,MmusDv:0000028,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,10853.0,3749,2.894905,66.252026,25263
1967,1733692,12967895-3d58-4e93-be2c-4e1bcf4388d5,10x 3' v2,EFO:0009899,neural cell,CL:0002319,Theiler stage 21,MmusDv:0000028,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,18677.0,4697,3.976368,87.927678,25263
1968,1733724,12967895-3d58-4e93-be2c-4e1bcf4388d5,10x 3' v2,EFO:0009899,neural cell,CL:0002319,Theiler stage 21,MmusDv:0000028,normal,PATO:0000461,...,cell,gonad,UBERON:0000991,reproductive system,UBERON:0000990,17896.0,4203,4.257911,133.659311,25263


In [6]:
assistant("Retrieve metadata for cells from brain tissue in mice.")

Getting cell metadata with the following parameters: {'cell_query_str': 'brain tissue', 'organism': 'mus_musculus', 'axis_query': LLMAxisQuery(value_filter="tissue in ['brain']", coords=())}


Unnamed: 0,soma_joinid,dataset_id,assay,assay_ontology_term_id,cell_type,cell_type_ontology_term_id,development_stage,development_stage_ontology_term_id,disease,disease_ontology_term_id,...,suspension_type,tissue,tissue_ontology_term_id,tissue_general,tissue_general_ontology_term_id,raw_sum,nnz,raw_mean_nnz,raw_variance_nnz,n_measured_vars
0,245979,f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c,Smart-seq2,EFO:0008931,oligodendrocyte,CL:0000128,18 month-old stage,MmusDv:0000089,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,46486688.0,2505,18557.560080,1.469779e+10,18025
1,246263,f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c,Smart-seq2,EFO:0008931,oligodendrocyte,CL:0000128,18 month-old stage,MmusDv:0000089,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,7268099.0,2481,2929.503829,1.937525e+08,18025
2,246404,f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c,Smart-seq2,EFO:0008931,"CD8-positive, alpha-beta T cell",CL:0000625,18 month-old stage,MmusDv:0000089,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,2488726.0,1806,1378.032115,1.233717e+07,18025
3,246673,f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c,Smart-seq2,EFO:0008931,oligodendrocyte,CL:0000128,18 month-old stage,MmusDv:0000089,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,28755250.0,1409,20408.268275,4.609669e+09,18025
4,246814,f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c,Smart-seq2,EFO:0008931,microglial cell,CL:0000129,18 month-old stage,MmusDv:0000089,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,64333.0,559,115.085868,2.433766e+06,18025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154335,4004545,c08f8441-4a10-4748-872a-e70c0bcccdba,Smart-seq2,EFO:0008931,microglial cell,CL:0000129,3 month-old stage,MmusDv:0000063,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,130900.0,1937,67.578730,7.574117e+04,20406
154336,4004546,c08f8441-4a10-4748-872a-e70c0bcccdba,Smart-seq2,EFO:0008931,microglial cell,CL:0000129,3 month-old stage,MmusDv:0000063,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,326156.0,2674,121.973074,2.654109e+05,20406
154337,4004547,c08f8441-4a10-4748-872a-e70c0bcccdba,Smart-seq2,EFO:0008931,microglial cell,CL:0000129,3 month-old stage,MmusDv:0000063,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,149182.0,1674,89.117085,1.007175e+05,20406
154338,4004548,c08f8441-4a10-4748-872a-e70c0bcccdba,Smart-seq2,EFO:0008931,microglial cell,CL:0000129,3 month-old stage,MmusDv:0000063,normal,PATO:0000461,...,cell,brain,UBERON:0000955,brain,UBERON:0000955,1175708.0,4252,276.507056,3.366829e+06,20406


In [7]:
assistant("Identify the top 50 highly variable genes for hepatocyte cells from liver in mice, correcting for assay batch effects.")

Getting HVGs with the following parameters: {'cell_query_str': 'hepatocyte cells from liver', 'organism': 'mus_musculus', 'number_of_genes': 50, 'span': 0.3, 'batch_key': 'assay', 'return_expression_data': False, 'axis_query': LLMAxisQuery(value_filter="cell_type in ['hepatocyte'] and tissue in ['liver']", coords=())}


Unnamed: 0_level_0,means,variances,highly_variable_nbatches,highly_variable_rank,variances_norm,highly_variable
soma_joinid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
609,28.499144,1595592.0,1,26.0,3.778819,True
744,179.020777,7828814.0,2,0.5,17.841199,True
1211,4.86287,2352.776,1,25.0,3.645885,True
1261,14.186507,360467.2,1,21.0,3.723037,True
1775,83.828893,1587870.0,1,3.0,5.974481,True
3203,3.770961,18069.57,1,24.0,3.613441,True
3449,6.766561,91568.93,1,9.0,6.266693,True
3831,260.241017,4850460.0,1,23.0,4.010626,True
3837,3757.684674,135118900.0,1,19.0,3.923221,True
4366,0.464271,23.45083,1,12.0,4.561114,True


In [8]:
assistant("Identify the top 50 highly variable genes for hepatocyte cells from liver in mice, correcting for suspension type batch effects, and return their expression data.")

Getting HVGs with the following parameters: {'cell_query_str': 'hepatocyte cells from liver', 'organism': 'mus_musculus', 'number_of_genes': 50, 'span': 0.3, 'batch_key': 'suspension_type', 'return_expression_data': True, 'axis_query': LLMAxisQuery(value_filter="cell_type == 'hepatocyte' and tissue == 'liver'", coords=())}


AnnData object with n_obs × n_vars = 12273 × 50
    obs: 'soma_joinid', 'dataset_id', 'assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue', 'tissue_ontology_term_id', 'tissue_general', 'tissue_general_ontology_term_id', 'raw_sum', 'nnz', 'raw_mean_nnz', 'raw_variance_nnz', 'n_measured_vars'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length', 'nnz', 'n_measured_obs'
    layers: 'normalized'

In [9]:
assistant("Calculate the mean and variance for genes across plasma cells in lung in humans using normalized data.")

Calculating mean and variance with the following parameters: {'cell_query_str': 'plasma cells in lung', 'organism': 'homo_sapiens', 'layer': 'normalized', 'calculate_mean': True, 'calculate_variance': True, 'axis_query': LLMAxisQuery(value_filter="cell_type in ['plasma cell'] and tissue in ['lung']", coords=())}


Unnamed: 0_level_0,feature_id,feature_name,feature_length,nnz,n_measured_obs,mean,variance
soma_joinid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,ENSG00000237613,FAM138A,1219,1847,15902086,0.000000e+00,0.000000e+00
1,ENSG00000186092,OR4F5,2618,488,18608838,0.000000e+00,0.000000e+00
2,ENSG00000238009,RP11-34P13.7,3726,386725,45017221,2.181791e-07,1.500172e-10
3,ENSG00000239945,RP11-34P13.8,1319,12959,34541400,3.721699e-09,1.206758e-12
4,ENSG00000239906,RP11-34P13.14,323,1514,25977748,9.632632e-10,8.084026e-14
...,...,...,...,...,...,...,...
60659,ENSG00000288719,RP4-669P10.21,4252,2826,1248980,0.000000e+00,0.000000e+00
60660,ENSG00000288720,RP11-852E15.3,7007,99,1248980,0.000000e+00,0.000000e+00
60661,ENSG00000288721,RP5-973N23.5,7765,0,0,0.000000e+00,0.000000e+00
60662,ENSG00000288723,RP11-553N16.6,1015,18,1248980,0.000000e+00,0.000000e+00
