In [None]:
%load_ext autoreload
%autoreload 2
%env ANYWIDGET_HMR=1

In [None]:
# Don't run this cell when debugging from VS Code, only when running the notebook with Jupyter lab.
import debugpy
debugpy.listen(5678)
print("Waiting for debugger attach...")
debugpy.wait_for_client()
print("Debugger attached.")

In [None]:
import urllib.request
import os.path

# Setup data
data_url = "https://www.dropbox.com/scl/fi/ih2laraxptbaeqfgqfax9/pbmc3k_20210420.h5ad?rlkey=fdtrijj9rh971uwvaa5x4qaq9&st=o0g4sf07&dl=1"
data_file = "data/pbmc3k_20210420.h5ad"

if not os.path.exists(data_file):
    os.mkdir("data")
    print("Downloading PBMC 3K demo...")
    urllib.request.urlretrieve(data_url, data_file)
    print("Download complete.")

In [None]:
import scanpy as sc

adata = sc.read(data_file)
adata

In [None]:
import pandas as pd

# UMAP coordinates
umap_df = pd.DataFrame(adata.obsm["X_umap"], columns=["x", "y"], index=adata.obs_names)

# Define metadata columns of interest
metadata_cols = [
    "n_genes",
    "total_counts_mt",
]
metadata_df = adata.obs[metadata_cols]

# Extract gene expression
gene_exp_df = pd.DataFrame(
    adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X,
    columns=adata.var_names,
    index=adata.obs_names,
)

# Combine into single dataframe
df = pd.concat([umap_df, metadata_df, gene_exp_df], axis=1)
df = df.loc[:, ~df.columns.duplicated()]

# Define categorical columns
categorical_cols = [
    "n_genes",
    "total_counts_mt",
]

# Convert categorical columns to strings
for col in categorical_cols:
    df[col] = df[col].astype(str)

df.head()

In [None]:
from scsketch import ScSketch

sketch = ScSketch(
    data=df,
    categorical_columns=categorical_cols,
    # color_by_default="seurat_clusters",
    height=720,
    background_color="#111111",
)

sketch.show()