# InSituPy demonstration
Example dataset: https://www.10xgenomics.com/products/xenium-in-situ/preview-dataset-human-breast

In [1]:
## The following code ensures that all functions and init files are reloaded before executions.
%load_ext autoreload
%autoreload 2

In [2]:
from insitupy import XeniumData
from pathlib import Path

## Load data

In [3]:
# input directories
data_dirs = [
    Path("C:/Users/ge37voy/data/2301_CRC/2301-01_20230504/01_xenium_data/output-XETG00050__0003621__Region_1__20230504__121954"),
    Path("C:/Users/ge37voy/data/datasets/Xenium_FFPE_Human_Breast_Cancer_Rep1_outs/output-XETG00000__slide_id__sample_id/"),
    Path("C:/Users/ge37voy/data/20230811__134602__2314_OTCs_Wollenberg/output-XETG00050__0003555__OTC0304-1__20230811__134819")
]

In [4]:
img_dirs = [elem.parent / "unregistered_images" for elem in data_dirs]

In [5]:
i = 0
data_dir = data_dirs[i]
img_dir = img_dirs[i]

In [6]:
data_dir

WindowsPath('C:/Users/ge37voy/data/2301_CRC/2301-01_20230504/01_xenium_data/output-XETG00050__0003621__Region_1__20230504__121954')

In [7]:
xd = XeniumData(data_dir)

In [8]:
xd

[1m[31mXeniumData[0m
[1mSlide ID:[0m	0003621
[1mRegion ID:[0m	Region_1
[1mData path:[0m	C:\Users\ge37voy\data\2301_CRC\2301-01_20230504\01_xenium_data
[1mData folder:[0m	output-XETG00050__0003621__Region_1__20230504__121954
[1mMetadata file:[0m	experiment_modified.xenium

## Read different data modalities

In [9]:
xd.read_annotations()
xd.read_images()
xd.read_matrix()

### Show overview of data

In [13]:
xd

[1m[31mXeniumData[0m
[1mSlide ID:[0m	0003621
[1mRegion ID:[0m	Region_1
[1mData path:[0m	C:\Users\ge37voy\data\2301_CRC\2301-01_20230504\01_xenium_data
[1mData folder:[0m	output-XETG00050__0003621__Region_1__20230504__121954
[1mMetadata file:[0m	experiment_modified.xenium
    ➤ [34m[1mimages[0m
       [1mnuclei:[0m	(27352, 34139)
       [1mHE:[0m	(27352, 34139, 3)
       [1mpanCK:[0m	(27352, 34139, 3)
    ➤[32m[1m matrix[0m
       AnnData object with n_obs × n_vars = 151305 × 280
	       obs: 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area'
	       var: 'gene_ids', 'feature_types', 'genome'
	       obsm: 'spatial'
    ➤ [36m[1mannotations[0m
       [1mtanja:[0m	41 annotations, 5 classes ('Tumor', 'Fat', 'Mucus', 'Immune cells', 'Peritumoral Zone') 

## View data interactively

In [16]:
xd.show()

Viewer(camera=Camera(center=(0.0, 2906.04375, 3627.1625), zoom=0.06652388246666312, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 5812.299999999999, 0.2125), (0.0, 7254.537499999999, 0.2125)), current_step=(13675, 17069), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x14284bf26d0>, <Image layer 'HE' at 0x14285b69eb0>, <Image layer 'panCK' at 0x14287c81fd0>], help='use <2> for transform', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='napari', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_double_click_callbacks=[], mouse_wheel_callbacks=[<function dims_scroll at 0x00000142F1EBA8B0>], _persisted_mouse_event={}, _mouse_drag_gen={}, _mouse_wheel_gen={}, keymap

### Crop data

In [13]:
xx = xd.crop(shape_layer="crop", inplace=False)

napari.Viewer: napari


  if not is_categorical_dtype(df_full[k]):


In [14]:
xx

[1m[31mXeniumData[0m
[1mSlide ID:[0m	0003621
[1mRegion ID:[0m	Region_1
[1mData path:[0m	C:\Users\ge37voy\data\2301_CRC\2301-01_20230504\01_xenium_data
[1mData folder:[0m	output-XETG00050__0003621__Region_1__20230504__121954
[1mMetadata file:[0m	experiment_modified.xenium
    ➤ [34m[1mimages[0m
       [1mnuclei:[0m	(3239, 4530)
       [1mHE:[0m	(3239, 4530, 3)
       [1mpanCK:[0m	(3239, 4530, 3)
    ➤[32m[1m matrix[0m
       AnnData object with n_obs × n_vars = 5463 × 280
	       obs: 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area'
	       var: 'gene_ids', 'feature_types', 'genome'
	       obsm: 'spatial'
    ➤ [36m[1mannotations[0m
       [1mtanja:[0m	2 annotations, 2 classes ('Tumor', 'Peritumoral Zone') 

In [17]:
xx.matrix.obs.columns

Index(['transcript_counts', 'control_probe_counts', 'control_codeword_counts',
       'unassigned_codeword_counts', 'total_counts', 'cell_area',
       'nucleus_area'],
      dtype='object')

In [47]:
xx.matrix.obs["total_counts"].astype("category")

aalncfpb-1     92
aalndmje-1    125
aalnkbfm-1    208
aalobdob-1     69
aalocmdk-1     33
             ... 
lagebbcc-1    124
lagegmnl-1     25
lagejmkl-1     48
lagepcdm-1    146
lagfcing-1    179
Name: total_counts, Length: 5463, dtype: category
Categories (470, int64): [0, 1, 2, 3, ..., 648, 672, 741, 755]

In [11]:
xx.show()

NameError: name 'xx' is not defined

## Perform preprocessing steps

In [48]:
xx.normalize()
xx.hvg()
xx.reduce_dimensions(umap=True, tsne=False)

Store raw counts in anndata.layers['counts']...
Normalization, log-transformation...
Calculate highly-variable genes across all samples using seurat flavor...


  disp_grouped = df.groupby('mean_bin')['dispersions']
  if not is_categorical_dtype(df_full[k]):


Dimensionality reduction...
Leiden clustering...


In [71]:
xx.show()



Viewer(camera=Camera(center=(0.0, 344.0375, 481.20625), zoom=1.2128338525422735, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 688.2875, 0.2125), (0.0, 962.625, 0.2125)), current_step=(1619, 2264), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x1a612167910>, <Image layer 'HE' at 0x1a612269940>, <Image layer 'panCK' at 0x1a61e6c3520>], help='use <2> for transform', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='napari', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_double_click_callbacks=[], mouse_wheel_callbacks=[<function dims_scroll at 0x000001A48CCD4A60>], _persisted_mouse_event={}, _mouse_drag_gen={}, _mouse_wheel_gen={}, keymap={})

In [57]:
xx.show(keys=["leiden", "ACTA2", "LYZ"])

Viewer(camera=Camera(center=(0.0, 344.0375, 481.20625), zoom=0.4608752110115569, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 688.4727430774632, 0.2125), (0.0, 962.7127100054628, 0.2125)), current_step=(1619, 2264), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x1a5f35fa910>, <Image layer 'HE' at 0x1a5ee1f4670>, <Image layer 'panCK' at 0x1a5f5863df0>, <Points layer 'leiden' at 0x1a5f7003700>, <Points layer 'ACTA2' at 0x1a5edb86d30>, <Points layer 'LYZ' at 0x1a5f72138e0>], help='use <5> for transform, use <2> for add points, use <3> for select points', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='napari', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_dou

In [72]:
screenshot = xx.viewer.screenshot()

## Save results

In [19]:
out_dir = data_dir.parent / "cropped_processed"

In [21]:
xx.save(out_dir, overwrite=True)