1. Download data (img, shapes, etc)
```bash
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/st/INT1.h5ad .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/wsis/INT1.tif .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/tissue_seg/INT1_contours.geojson .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/tissue_seg/INT1_vis.jpg .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/cellvit_seg/INT1_cellvit_seg.geojson .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/cellvit_seg/INT1_cellvit_seg.parquet .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/pixel_size_vis/INT1_pixel_size_vis.png .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/patches/INT1.h5 .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/metadata/INT1.json .
scp etienne.doumazane@10.100.236.11:/data/etienne.doumazane/theremia/hest_data/spatial_plots/INT1_spatial_plots.png .
```

In [2]:
from geopandas import GeoDataFrame
from shapely import Point
import numpy as np
from spatialdata.models import ShapesModel, TableModel, Image2DModel
import anndata as ad
import zarr
import dask.array as da
from spatialdata import SpatialData, read_zarr




In [3]:
shapes = ShapesModel.parse("data/INT1_contours.geojson", radius=[10, 10])
print(shapes)

                                            geometry
0  POLYGON ((4422 13907, 4422 13917, 4422 13927, ...
1  POLYGON ((2136 1807, 2136 1817, 2136 1827, 212...


In [4]:
adata = ad.read_h5ad("data/INT1.h5ad")
print(adata)
print(TableModel.parse(adata))
table = TableModel.parse(adata)

AnnData object with n_obs × n_vars = 1084 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito'
    var: 'gene_ids', 'feature_types', 'genome', 'mito', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'
AnnData object with n_obs × n_vars = 1084 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito

In [21]:
name = "INT1"
uri = f"data/{name}.zarr"
store = zarr.open(uri, mode="r")
multiscale_data = [da.from_zarr(zarr_array_tuple[1]) for zarr_array_tuple in store.arrays()]

In [23]:
multiscale_data

[dask.array<from-zarr, shape=(19968, 19200, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(9984, 9600, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(4992, 4800, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(2496, 2400, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(1248, 1200, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(624, 600, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(312, 300, 3), dtype=uint8, chunksize=(256, 256, 3), chunktype=numpy.ndarray>,
 dask.array<from-zarr, shape=(156, 150, 3), dtype=uint8, chunksize=(156, 150, 3), chunktype=numpy.ndarray>]

In [34]:
scale_factors = [multiscale_data[i].shape[1] / multiscale_data[i+1].shape[1] for i in range(len(multiscale_data)-1)]
print(scale_factors)
scale_factors = [int(scale_factor) for scale_factor in scale_factors] ## I had to convert to int
print(scale_factors)


[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]
[2, 2, 2, 2, 2, 2, 2]


In [35]:
image = Image2DModel.parse(multiscale_data[0], dims=("x", "y", "c"), scale_factors=scale_factors) # I had to use only the 1st multiscale data

[34mINFO    [0m Transposing `data` of type: [1m<[0m[1;95mclass[0m[39m [0m[32m'dask.array.core.Array'[0m[1m>[0m to [1m([0m[32m'c'[0m, [32m'y'[0m, [32m'x'[0m[1m)[0m.                           


In [44]:
elements = {"image": image,
    "tissue_contours": shapes,
    "visium_data": table}

In [46]:
sdata = SpatialData.from_elements_dict(elements)

In [47]:
path = f"data/{name}_sdata.zarr"
sdata.write(path, overwrite=True)

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside data/INT1_sdata.zarr[1m)[0m. Please see the documentation of `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to         
         understand the implications of working with SpatialData objects that are not self-contained.              
[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path: data/INT1_sdata.zarr                 


In [51]:
sdata = read_zarr(path)

In [52]:
print(sdata)

SpatialData object, with associated Zarr store: /Users/edmz/code/theremia/st_challenge/data/INT1_sdata.zarr
├── Images
│     └── 'image': DataTree[cyx] (3, 19200, 19968), (3, 9600, 9984), (3, 4800, 4992), (3, 2400, 2496), (3, 1200, 1248), (3, 600, 624), (3, 300, 312), (3, 150, 156)
├── Shapes
│     └── 'tissue_contours': GeoDataFrame shape: (2, 1) (2D shapes)
└── Tables
      └── 'visium_data': AnnData (1084, 36601)
with coordinate systems:
    ▸ 'global', with elements:
        image (Images), tissue_contours (Shapes)


In [5]:
adata

AnnData object with n_obs × n_vars = 1084 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'pxl_row_in_fullres', 'pxl_col_in_fullres', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito'
    var: 'gene_ids', 'feature_types', 'genome', 'mito', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'