## Load packages

In [2]:
import scanpy as sc
import pandas as pd
import numpy as np
import sys
import os
# get project root (two levels up from this notebook)
project_root = os.path.abspath(os.path.join(os.path.dirname('src'), '..'))
# if in notebook:
# project_root = os.path.abspath('..')   # or adjust as needed

if project_root not in sys.path:
    sys.path.insert(0, project_root)


In [5]:
from importlib import reload
import src.io.cellxgene_pp_utils as cxg_pp 
reload(cxg_pp)

<module 'src.io.cellxgene_pp_utils' from '/mnt/lscratch/users/adhal/SingleCellUtils/src/io/cellxgene_pp_utils.py'>

## Execute different queries

In [7]:

# Initialize with default filter
utils = cxg_pp.CellxgenePpUtils(organism='homo_sapiens')

# Example 1: Use default filter (disease == 'normal' and is_primary_data == True)
query1 = utils.get_multiple_tissues(['lung', 'heart'])

# Example 2: Add additional filters to the default
query2 = utils.get_multiple_tissues(
    ['lung', 'heart'],
    additional_filters=["sex == 'female'", "development_stage == 'adult'"]
)

# Example 3: Use a completely custom filter (no default)
query3 = utils.get_multiple_tissues(
    ['lung'],
    custom_filter="disease == 'COVID-19' and is_primary_data == True",
    use_default_filter=False
)

# Example 4: Query without default filter
query4 = utils.get_by_cell_type(
    'B cell',
    use_default_filter=False,
    additional_filters=["tissue == 'blood'"]
)

# Example 5: Complex custom filter
query5 = utils.get_custom_dataset_complex_filter(
    "cell_type in ['B cell', 'T cell'] and tissue == 'blood' and sex == 'male'",
    use_default_filter=False
)

# Example 6: Update default filter for all subsequent queries
utils.update_default_filter("is_primary_data == True and development_stage == 'adult'")
query6 = utils.get_multiple_tissues(['brain'])  # Will use new default filter

# Example 7: Initialize with custom default filter
utils_custom = cxg_pp.CellxgenePpUtils(
    organism='homo_sapiens',
    default_obs_filter="disease in ['normal', 'COVID-19'] and is_primary_data == True"
)
query7 = utils_custom.get_by_cell_type('T cell')

The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.
The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.


## Visualize queries in dataframe format

In [10]:
df_query5 = query5.obs().concat().to_pandas()

In [11]:
df_query5

Unnamed: 0,soma_joinid,dataset_id,assay,assay_ontology_term_id,cell_type,cell_type_ontology_term_id,development_stage,development_stage_ontology_term_id,disease,disease_ontology_term_id,...,tissue,tissue_ontology_term_id,tissue_type,tissue_general,tissue_general_ontology_term_id,raw_sum,nnz,raw_mean_nnz,raw_variance_nnz,n_measured_vars
0,559804,c874f155-9bf9-4928-b821-f52c876b3e48,10x 3' v3,EFO:0009922,B cell,CL:0000236,49-year-old stage,HsapDv:0000143,COVID-19,MONDO:0100096,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,9664.0,2934,3.293797,186.640555,33145
1,559829,c874f155-9bf9-4928-b821-f52c876b3e48,10x 3' v3,EFO:0009922,B cell,CL:0000236,49-year-old stage,HsapDv:0000143,COVID-19,MONDO:0100096,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,5582.0,2095,2.664439,90.601290,33145
2,559844,c874f155-9bf9-4928-b821-f52c876b3e48,10x 3' v3,EFO:0009922,B cell,CL:0000236,49-year-old stage,HsapDv:0000143,COVID-19,MONDO:0100096,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,4698.0,1670,2.813174,62.128047,33145
3,559862,c874f155-9bf9-4928-b821-f52c876b3e48,10x 3' v3,EFO:0009922,B cell,CL:0000236,49-year-old stage,HsapDv:0000143,COVID-19,MONDO:0100096,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,6832.0,2552,2.677116,110.666383,33145
4,559865,c874f155-9bf9-4928-b821-f52c876b3e48,10x 3' v3,EFO:0009922,B cell,CL:0000236,49-year-old stage,HsapDv:0000143,COVID-19,MONDO:0100096,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,4901.0,1903,2.575407,68.147702,33145
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469397,103537164,53d208b0-2cfd-4366-9866-c3c6114081bc,10x 3' v3,EFO:0009922,B cell,CL:0000236,33-year-old stage,HsapDv:0000127,normal,PATO:0000461,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,7282.0,2277,3.198068,676.033248,61759
469398,103537168,53d208b0-2cfd-4366-9866-c3c6114081bc,10x 3' v3,EFO:0009922,B cell,CL:0000236,33-year-old stage,HsapDv:0000127,normal,PATO:0000461,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,7691.0,2770,2.776534,37.014328,61759
469399,103537175,53d208b0-2cfd-4366-9866-c3c6114081bc,10x 3' v3,EFO:0009922,B cell,CL:0000236,33-year-old stage,HsapDv:0000127,normal,PATO:0000461,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,10897.0,3525,3.091348,51.982005,61759
469400,103537178,53d208b0-2cfd-4366-9866-c3c6114081bc,10x 3' v3,EFO:0009922,B cell,CL:0000236,33-year-old stage,HsapDv:0000127,normal,PATO:0000461,...,blood,UBERON:0000178,tissue,blood,UBERON:0000178,10264.0,3156,3.252218,44.415605,61759
