# InSituPy demonstration
Example dataset: https://www.10xgenomics.com/products/xenium-in-situ/preview-dataset-human-breast

In [1]:
## The following code ensures that all functions and init files are reloaded before executions.
%load_ext autoreload
%autoreload 2

In [2]:
from xeniumdata import XeniumData
import xeniumdata
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from xeniumdata.utils.utils import decode_robust_series
from dask_image.imread import imread
from xeniumdata.images import deconvolve_he
import numpy as np
import scanpy as sc

## Load data

In [3]:
# input directories
data_dirs = [
    Path("C:/Users/ge37voy/data/2301_CRC/2301-01_20230504/01_xenium_data/output-XETG00050__0003621__Region_1__20230504__121954"),
    Path("C:/Users/ge37voy/data/datasets/Xenium_FFPE_Human_Breast_Cancer_Rep1_outs/output-XETG00000__slide_id__sample_id/"),
    Path("C:/Users/ge37voy/data/20230811__134602__2314_OTCs_Wollenberg/output-XETG00050__0003555__OTC0304-1__20230811__134819")
]

In [4]:
img_dirs = [elem.parent / "unregistered_images" for elem in data_dirs]

In [5]:
i = 1
data_dir = data_dirs[i]
img_dir = img_dirs[i]

In [6]:
xd = XeniumData(data_dir)

In [7]:
xd

[1m[31mXeniumData[0m
[1mSlide ID:[0m	slide_id
[1mRegion ID:[0m	sample_id
[1mData path:[0m	C:\Users\ge37voy\data\datasets\Xenium_FFPE_Human_Breast_Cancer_Rep1_outs
[1mData folder:[0m	output-XETG00000__slide_id__sample_id
[1mMetadata file:[0m	experiment_modified.xenium

## Read different data modalities

In [8]:
xd.read_all()

No folder named `annotations` found. Function `read_annotations()` was skipped.
Running read_boundaries()
Running read_images()
Running read_matrix()
Running read_transcripts()


### Show overview of data

In [9]:
xd

[1m[31mXeniumData[0m
[1mSlide ID:[0m	slide_id
[1mRegion ID:[0m	sample_id
[1mData path:[0m	C:\Users\ge37voy\data\datasets\Xenium_FFPE_Human_Breast_Cancer_Rep1_outs
[1mData folder:[0m	output-XETG00000__slide_id__sample_id
[1mMetadata file:[0m	experiment_modified.xenium
    ➤ [34m[1mimages[0m
       [1mnuclei:[0m	(25778, 35416)
       [1mCD20:[0m	(25778, 35416)
       [1mHER2:[0m	(25778, 35416)
       [1mHE:[0m	(25778, 35416, 3)
    ➤[32m[1m matrix[0m
       AnnData object with n_obs × n_vars = 167780 × 313
	       obs: 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area'
	       var: 'gene_ids', 'feature_types', 'genome'
	       obsm: 'spatial'
    ➤[96m[1m transcripts[0m
	   DataFrame with shape 42638083 x 8
    ➤ [95m[1mboundaries[0m
       [1mcells[0m
       [1mnuclei[0m

## View data interactively

In [10]:
xd.show()

Viewer(camera=Camera(center=(0.0, 2738.80625, 3762.84375), zoom=0.11358912648431078, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 5477.825, 0.2125), (0.0, 7525.9, 0.2125)), current_step=(12888, 17707), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x208899d13a0>, <Image layer 'CD20' at 0x20889a2b5e0>, <Image layer 'HER2' at 0x2088a100f70>, <Image layer 'HE' at 0x2099cfb3790>], help='use <2> for transform', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='napari', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_double_click_callbacks=[], mouse_wheel_callbacks=[<function dims_scroll at 0x0000020872DF4B80>], _persisted_mouse_event={}, _mouse_drag_gen={}, _mouse_w

## Crop data

In [11]:
xx = xd.crop(shape_layer="Shapes", inplace=False)

napari.Viewer: napari


  if not is_categorical_dtype(df_full[k]):


In [12]:
xx

[1m[31mXeniumData[0m
[1mSlide ID:[0m	slide_id
[1mRegion ID:[0m	sample_id
[1mData path:[0m	C:\Users\ge37voy\data\datasets\Xenium_FFPE_Human_Breast_Cancer_Rep1_outs
[1mData folder:[0m	output-XETG00000__slide_id__sample_id
[1mMetadata file:[0m	experiment_modified.xenium
    ➤ [34m[1mimages[0m
       [1mnuclei:[0m	(9340, 9191)
       [1mCD20:[0m	(9340, 9191)
       [1mHER2:[0m	(9340, 9191)
       [1mHE:[0m	(9340, 9191, 3)
    ➤[32m[1m matrix[0m
       AnnData object with n_obs × n_vars = 24279 × 313
	       obs: 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area'
	       var: 'gene_ids', 'feature_types', 'genome'
	       obsm: 'spatial'
    ➤[96m[1m transcripts[0m
	   DataFrame with shape 6038786 x 8
    ➤ [95m[1mboundaries[0m
       [1mcells[0m
       [1mnuclei[0m

## Perform preprocessing steps

In [13]:
xx.normalize()
xx.hvg()
xx.reduce_dimensions(umap=True, tsne=False)

Store raw counts in anndata.layers['counts']...
Normalization, log-transformation...
Calculate highly-variable genes across all samples using seurat flavor...


  disp_grouped = df.groupby('mean_bin')['dispersions']
  if not is_categorical_dtype(df_full[k]):


Dimensionality reduction...
Leiden clustering...


In [14]:
xx.show(cell_type_key="leiden")

Viewer(camera=Camera(center=(0.0, 992.26875, 976.4375), zoom=0.31350112446601314, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 1984.9850244253673, 0.2125), (0.0, 1953.0875, 0.2125)), current_step=(4669, 4595), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x209b5f4c640>, <Image layer 'CD20' at 0x209bdc2a9d0>, <Image layer 'HER2' at 0x2099eb26940>, <Image layer 'HE' at 0x209b3bcce80>, <Points layer '2' at 0x209aca3fc70>, <Points layer '7' at 0x209ac901910>, <Points layer '5' at 0x209bd927700>, <Points layer '4' at 0x209aa005df0>, <Points layer '6' at 0x209b7864b80>, <Points layer '8' at 0x209bc28e8b0>, <Points layer '11' at 0x209b605bb80>, <Points layer '3' at 0x209bd8a3fd0>, <Points layer '0' at 0x209bdc32340>, <Points l



## Save results

In [15]:
out_dir = data_dir.parent / "cropped_processed"

In [16]:
xx.save(out_dir)