In [None]:
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "anywidget>=0.9.0",
#     "jupyter-scatter-scsketch>=0.21.0",
#     "llvmlite>=0.44.0",
#     "numpy>=1.26.0",
#     "pandas>=2.0.0",
#     "scanpy>=1.9.0",
#     "scipy>=1.11.0",
#     "ipywidgets>=8.0.0",
#     "matplotlib>=3.7.0",
#     "requests>=2.31.0",
#     "watchfiles>=0.20.0",
# ]
#
# [tool.uv.sources]
# scsketch = { path = ".", editable = true }
# ///

# scSketch

> **Quick Start**: `uvx scsketch demo` (no cloning required!)  
> Or from this repo: `uvx juv run demo.ipynb`

scSketch provides a custom UI for [Jupyter-Scatter](https://jupyter-scatter.dev) that implements [Directional Analysis from Colubri et al's Sciviewer](https://doi.org/10.1093/bioinformatics/btab689). Sciviewer's directional analysis helps you interpret patterns in embedding visualizations by identifying genes varying locally along any user-specified direction.

For this demo, we're using a single-cell RNA-seq dataset of human oral keratinocytes and the effects of human cytomegalovirus from Kowalik et al. (2025), clustered with Louvain algorithm and embedded with UMAP.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%env ANYWIDGET_HMR=1

env: ANYWIDGET_HMR=1


## Load Data

Load your single-cell data using scanpy and prepare it for visualization.

In [3]:
# Replace with your own h5ad file
data_file = "cmv.srt.soupx.filt.updated.h5ad"

In [4]:
import pandas as pd
import scanpy as sc

adata = sc.read(data_file)
adata

  from anndata import __version__ as anndata_version
  if Version(anndata.__version__) >= Version("0.11.0rc2"):
  if Version(anndata.__version__) >= Version("0.11.0rc2"):


Only considering the two last: ['.updated', '.h5ad'].
Only considering the two last: ['.updated', '.h5ad'].


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'cmv.srt.soupx.filt.updated.h5ad', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

## Prepare Data for scSketch

Extract UMAP coordinates, metadata, and gene expression data into a single DataFrame.

In [None]:
# UMAP coordinates
umap_df = pd.DataFrame(adata.obsm["X_umap"], columns=["x", "y"], index=adata.obs_names)

# Define metadata columns of interest
metadata_cols = [
    "dpi",
    "strain",
    "percent.cmv.log10",
    "seurat_clusters",
    "virus.presence",
    "Infection_localminima",
    "UL123_define_infection",
    "Infection_state",
    "Infection_state_bkgd",
]
metadata_df = adata.obs[metadata_cols]

# Extract gene expression
gene_exp_df = pd.DataFrame(
    adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X,
    columns=adata.var_names,
    index=adata.obs_names,
)

# Combine into single dataframe
df = pd.concat([umap_df, metadata_df, gene_exp_df], axis=1)
df = df.loc[:, ~df.columns.duplicated()]

# Define categorical columns
categorical_cols = [
    "dpi",
    "strain",
    "seurat_clusters",
    "virus.presence",
    "Infection_localminima",
    "UL123_define_infection",
    "Infection_state",
    "Infection_state_bkgd",
]

# Convert categorical columns to strings
for col in categorical_cols:
    df[col] = df[col].astype(str)

df.head()

## Launch scSketch

Create and display the scSketch widget.

In [None]:
from scsketch import ScSketch

sketch = ScSketch(
    data=df,
    categorical_columns=categorical_cols,
    color_by_default="seurat_clusters",
    height=720,
    background_color="#111111",
)

sketch.show()

## How to Use scSketch

1. **Select points**: Use the rectangle or lasso tool to select cells in the embedding
2. **Add selection**: Click the `+` button to save your selection
3. **Run analysis**: Click `Compute Directional Search` to identify genes varying along the selected direction
4. **Explore results**: Click on genes to see their Reactome pathways, and click pathways to view diagrams
5. **Color by genes**: Use the dropdown to color the embedding by specific genes or metadata

The directional analysis shows genes with their Pearson Correlation Coefficient (R) and p-value (p), representing which genes are most upregulated or downregulated along the selected direction.