# Ingest CXG spatial h5ad file and create SOMA object on disk

In [1]:
import tiledbsoma
from tiledbsoma.experimental.ingest import from_cxg_spatial_h5ad

In [2]:
from pathlib import Path

In [3]:
REPO_ROOT_DIR = Path("../../..")
BUILDER_DIR = REPO_ROOT_DIR / "tools/cellxgene_census_builder"
WORKING_DIR = REPO_ROOT_DIR / "issues/census_1127_spatial-builder"
ANNDATA_DIR = WORKING_DIR / "source_h5ad"
SOMA_DIR = WORKING_DIR / "spatial_soma"

MANIFEST_FILE_PATH = BUILDER_DIR / "spatial_dev_tools/manifest.csv"
BLOCKLIST_FILE_PATH = BUILDER_DIR / "spatial_dev_tools/blocklist.txt"

In [4]:
SOMA_DIR.mkdir(exist_ok=True)

In [5]:
!pwd

/home/ubuntu/github/cellxgene-census/tools/cellxgene_census_builder/spatial_dev_tools


In [6]:
SOMA_DIR.mkdir(exist_ok=True)

In [7]:
# NOTE: Replace value of `my_h5ad_path` with an appropriate path to a cellxgene h5ad spatial file
dataset_id = "3396c353-d720-4588-8724-75546e2f18cc"
my_h5ad_path = ANNDATA_DIR / f"{dataset_id}.h5ad"
my_soma_path = SOMA_DIR / dataset_id

In [8]:
if my_soma_path.is_dir():
    !rm -r $my_soma_path

In [9]:
from_cxg_spatial_h5ad(
    input_h5ad_path=my_h5ad_path,
    experiment_uri=str(my_soma_path),
    measurement_name="RNA",
    scene_name=dataset_id,
    uns_keys=[],
)


For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.

For creation, use `anndata.experimental.sparse_dataset(X)` instead.



'../../../issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc'

## Inspect the structure of the SOMA object on disk

In [10]:
sp = tiledbsoma.open(str(my_soma_path))
sp

<Experiment 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc' (open for 'r') (4 items)
    'ms': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms' (unopened)
    'obs': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/obs' (unopened)
    'obs_scene': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/obs_scene' (unopened)
    'spatial': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial' (unopened)>

In [11]:
sp.ms

<Collection 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms' (open for 'r') (1 item)
    'RNA': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA' (unopened)>

In [12]:
sp.ms["RNA"]

<Measurement 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA' (open for 'r') (5 items)
    'X': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/X' (unopened)
    'obsm': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/obsm' (unopened)
    'uns': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/uns' (unopened)
    'var': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/var' (unopened)
    'var_scene': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/var_scene' (unop

In [13]:
sp.ms["RNA"].var_scene

<DataFrame 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/var_scene' (open for 'r')>

In [14]:
var_scene_df = sp.ms["RNA"].var_scene
var_scene_df.read().concat().to_pandas()

Unnamed: 0,soma_joinid,scene_id,data
0,0,3396c353-d720-4588-8724-75546e2f18cc,True
1,3,3396c353-d720-4588-8724-75546e2f18cc,True
2,7,3396c353-d720-4588-8724-75546e2f18cc,True
3,12,3396c353-d720-4588-8724-75546e2f18cc,True
4,13,3396c353-d720-4588-8724-75546e2f18cc,True
...,...,...,...
22747,33148,3396c353-d720-4588-8724-75546e2f18cc,True
22748,33149,3396c353-d720-4588-8724-75546e2f18cc,True
22749,33151,3396c353-d720-4588-8724-75546e2f18cc,True
22750,33153,3396c353-d720-4588-8724-75546e2f18cc,True


In [15]:
sp.obs_scene

<DataFrame 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/obs_scene' (open for 'r')>

In [16]:
obs_scene_df = sp.obs_scene
obs_scene_df.read().concat().to_pandas()

Unnamed: 0,soma_joinid,scene_id,data
0,0,3396c353-d720-4588-8724-75546e2f18cc,True
1,1,3396c353-d720-4588-8724-75546e2f18cc,True
2,2,3396c353-d720-4588-8724-75546e2f18cc,True
3,3,3396c353-d720-4588-8724-75546e2f18cc,True
4,4,3396c353-d720-4588-8724-75546e2f18cc,True
...,...,...,...
4987,4987,3396c353-d720-4588-8724-75546e2f18cc,True
4988,4988,3396c353-d720-4588-8724-75546e2f18cc,True
4989,4989,3396c353-d720-4588-8724-75546e2f18cc,True
4990,4990,3396c353-d720-4588-8724-75546e2f18cc,True


In [17]:
sp.spatial

<Collection 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial' (open for 'r') (1 item)
    '3396c353-d720-4588-8724-75546e2f18cc': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc' (unopened)>

In [18]:
sp.spatial[dataset_id]

<Scene 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc' (open for 'r') (3 items)
    'img': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/img' (unopened)
    'obsl': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/obsl' (unopened)
    'varl': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/varl' (unopened)>

In [19]:
sp.spatial[dataset_id]["img"]["hires"]

<DenseNDArray 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/img/hires' (open for 'r')>

In [20]:
hires_ndarray = sp.spatial[dataset_id]["img"]["hires"].read()
hires_ndarray.to_numpy()

array([[[237, 237, 237, ..., 237, 237, 237],
        [237, 237, 237, ..., 237, 237, 237],
        [237, 237, 237, ..., 237, 237, 237],
        ...,
        [237, 237, 237, ..., 234, 234, 233],
        [237, 237, 237, ..., 234, 234, 233],
        [237, 237, 237, ..., 234, 234, 233]],

       [[241, 241, 241, ..., 240, 240, 240],
        [241, 241, 241, ..., 240, 240, 240],
        [241, 241, 241, ..., 240, 240, 240],
        ...,
        [241, 241, 241, ..., 237, 237, 237],
        [241, 241, 241, ..., 237, 237, 236],
        [241, 241, 241, ..., 237, 237, 236]],

       [[240, 240, 240, ..., 243, 243, 243],
        [240, 240, 240, ..., 243, 243, 243],
        [240, 240, 240, ..., 243, 243, 243],
        ...,
        [240, 240, 240, ..., 240, 240, 239],
        [240, 240, 240, ..., 240, 240, 238],
        [240, 240, 240, ..., 240, 240, 238]]], dtype=uint8)

In [21]:
sp.spatial[dataset_id]["obsl"]

<Collection 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/obsl' (open for 'r') (1 item)
    'loc': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/obsl/loc' (unopened)>

In [22]:
sp.spatial[dataset_id]["obsl"]["loc"]

<DataFrame 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/spatial/3396c353-d720-4588-8724-75546e2f18cc/obsl/loc' (open for 'r')>

In [23]:
obsl_loc_df = sp.spatial[dataset_id]["obsl"]["loc"]
obsl_loc_df.read().concat().to_pandas()

Unnamed: 0,y,x,soma_joinid,in_tissue,array_row,array_col,sample,n_counts,log_counts,n_genes,...,cell_type,assay,disease,organism,sex,tissue,self_reported_ethnicity,development_stage,observation_joinid,_soma_geometry
0,206,1311,750,0,0,76,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,g~Lsd05afR,8.912657
1,206,1339,298,0,0,78,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,O+rFu9G&=6,8.912657
2,206,1366,113,0,0,80,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,s#*Dxj^y6e,8.912657
3,206,1393,1672,0,0,82,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,x1QHW<9wB@,8.912657
4,206,1503,725,0,0,90,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,pe`+KlB-s7,8.912657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,2044,1572,4750,0,77,95,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,#p{^w*9E%G,8.912657
4988,2044,1654,4138,0,77,101,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,K>@-#^U$@;,8.912657
4989,2044,1874,4809,0,77,117,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,O1^&ikNSeg,8.912657
4990,2044,1901,4110,0,77,119,WS_PLA_S9101770,,,,...,unknown,Visium Spatial Gene Expression,normal,Homo sapiens,unknown,decidua basalis,unknown,Carnegie stage 23,TR33s2qj7R,8.912657


In [None]:
sp.spatial[dataset_id]["obsl"]["loc"]

In [None]:
sp.spatial[dataset_id]

In [24]:
sp

<Experiment 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc' (open for 'r') (4 items)
    'ms': Collection 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms' (open for 'r') (1 item)
        'RNA': Measurement 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA' (open for 'r') (5 items)
            'X': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/X' (unopened)
            'obsm': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-d720-4588-8724-75546e2f18cc/ms/RNA/obsm' (unopened)
            'uns': 'file:///home/ubuntu/github/cellxgene-census/issues/census_1127_spatial-builder/spatial_soma/3396c353-