In [3]:
%load_ext autoreload
%autoreload 2

import scanpy as sc
from pprint import pprint


from scale.config import load_config
from scale.utils import select_best_lambdas
from scale.training import train
from scale.clustering import calc_clusterings
from scale.search import calc_stability, calc_entropy

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load and prepare data

In [4]:
path = "/epyc/projects/Behnam/xenium-cgn/notebooks/03_image_registration/data_level_1/adata/X1.h5ad"
adata = sc.read_h5ad(path)
adata

AnnData object with n_obs × n_vars = 91118 × 480
    obs: 'x', 'y', 'z', 'cluster', 'n_transcripts', 'density', 'elongation', 'area', 'avg_confidence', 'avg_assignment_confidence', 'max_cluster_frac', 'lifespan', 'x_centroid', 'y_centroid', 'cell_area', 'Slide_ID', 'batch', 'Patient_Sample_ID', 'Disease', 'n_genes', 'celltype_l1', 'celltype_l1_codes', 'celltype_l1_prob', 'Biopsy_ID', 'sample', 'label', 'nichepca_domain', 'nichepca_domain_tuned', 'nichepca_glom_no'
    var: 'gene_ids', 'feature_types'
    uns: 'celltype_l1_colors', 'log1p', 'neighbors', 'pca', 'spatial', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'spatial'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [5]:
# use one quarter to save time
xmin, ymin = adata.obsm["spatial"].min(axis=0)
xmax, ymax = adata.obsm["spatial"].max(axis=0)

adata = adata[
    (adata.obsm["spatial"][:, 0] > xmin)
    & (adata.obsm["spatial"][:, 0] < xmax / 2)
    & (adata.obsm["spatial"][:, 1] > ymin)
    & (adata.obsm["spatial"][:, 1] < ymax / 2)
]
adata

View of AnnData object with n_obs × n_vars = 6857 × 480
    obs: 'x', 'y', 'z', 'cluster', 'n_transcripts', 'density', 'elongation', 'area', 'avg_confidence', 'avg_assignment_confidence', 'max_cluster_frac', 'lifespan', 'x_centroid', 'y_centroid', 'cell_area', 'Slide_ID', 'batch', 'Patient_Sample_ID', 'Disease', 'n_genes', 'celltype_l1', 'celltype_l1_codes', 'celltype_l1_prob', 'Biopsy_ID', 'sample', 'label', 'nichepca_domain', 'nichepca_domain_tuned', 'nichepca_glom_no'
    var: 'gene_ids', 'feature_types'
    uns: 'celltype_l1_colors', 'log1p', 'neighbors', 'pca', 'spatial', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'spatial'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

## Setup config

In [6]:
cfg = load_config()
cfg.distance_set.stop = 25
cfg.resolution_set.step = 0.1
cfg.n_repeats = 3

pprint(cfg)

{'batch_col': None,
 'distance_set': {'start': 15, 'step': 5, 'stop': 25},
 'knn_set': {'start': 5, 'step': 5, 'stop': 40},
 'lambda_set': [1e-06,
                5e-06,
                1e-05,
                5e-05,
                0.0001,
                0.0005,
                0.001,
                0.005,
                0.01,
                0.05,
                0.1,
                0.5,
                1,
                5,
                10],
 'lr': 0.01,
 'max_epoch': 500,
 'n_heads': 5,
 'n_hidden': 15,
 'n_repeats': 3,
 'n_sample': None,
 'preprocess': False,
 'repeated_negative_sampling': False,
 'resolution_set': {'start': 0.01, 'step': 0.1, 'stop': 1.2},
 'spatial_graph_method': 'distance',
 'y_aggregated': False}


## Run scale

In [None]:
train(adata, cfg)
select_best_lambdas(adata)
calc_clusterings(adata)
calc_stability(adata, verbose=True)
top_results = calc_entropy(adata, n_levels=3, top_n=100)
top_results

Using device: cuda


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

In [None]:
adata