In [12]:
import scanpy as sc 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import sys
sys.path.append('../../../src')

from spaceoracle.tools.network import RegulatoryFactory
from spaceoracle.gene_factory import GeneFactory
from spaceoracle.astronomer import Astronaut

In [13]:
base_dir = '/ix/djishnu/shared/djishnu_kor11/'

co_grn = RegulatoryFactory(
    colinks_path=base_dir + 'training_data_2025/snrna_human_tonsil_colinks.pkl',
    annot='cell_type_int'
)

adata = sc.read_h5ad(base_dir + 'training_data_2025/snrna_human_tonsil.h5ad')
adata

AnnData object with n_obs √ó n_vars = 5778 √ó 3333
    obs: 'cell_type', 'author_cell_type', 'cell_type_int', 'banksy_celltypes'
    uns: 'cell_thresholds', 'cell_type_int_colors', 'received_ligands', 'received_ligands_tfl'
    obsm: 'spatial', 'spatial_unscaled'
    layers: 'imputed_count', 'normalized_count'

In [14]:
del adata.uns['received_ligands']
del adata.uns['received_ligands_tfl']

In [15]:
# Remove pre-processing from COMMOT
adata.uns['cell_thresholds'] = pd.DataFrame(
    index=adata.obs.index, 
    columns=adata.var_names).fillna(1)

In [16]:
sp_maps = pd.read_parquet('/ix/djishnu/shared/djishnu_kor11/scGPT_outputs/tonsil_embeddings.parquet')
sp_maps = sp_maps.reindex(adata.obs.index, axis=0).values
sp_maps.shape

(5778, 512)

In [17]:
feature_key = 'scGPT'
adata.obsm['scGPT'] = sp_maps

In [18]:
neil = Astronaut(
    adata=adata,
    annot='cell_type_int', 
    max_epochs=100, 
    learning_rate=5e-3, 
    # spatial_dim=64, # used to create the spatial maps
    batch_size=512,
    grn=co_grn,
    radius=400,
    contact_distance=50,
    save_dir=base_dir + 'scGPT_runs/tonsil'
)


In [6]:
# We shouldn't use the anchors here, because those are part of our CNN model

neil.run(sp_maps_key='scGPT', use_anchors=False) 

Using scGPT as spatial maps
Fitting TOP2A with 1038 modulators
	21 Transcription Factors
	958 Ligand-Receptor Pairs
	59 TranscriptionFactor-Ligand Pairs
0: 0.8952 | 0.8028
1: 0.9112 | 0.9129
2: 0.7566 | 0.7487
3: 0.9838 | 0.9780
4: 0.8242 | 0.8106
5: 0.6670 | 0.6541
6: 0.4955 | 0.4435
7: 0.9511 | 0.9481
8: 0.7477 | 0.7423
9: 0.9168 | 0.9168
10: 0.9977 | 0.9978
11: 0.9424 | 0.9620
12: 0.9905 | 0.9902
Using scGPT as spatial maps
Fitting CD6 with 1036 modulators
	22 Transcription Factors
	956 Ligand-Receptor Pairs
	58 TranscriptionFactor-Ligand Pairs
0: 0.9887 | 0.9876
1: 0.9252 | 0.9333
2: 0.7353 | 0.7216
3: 0.9882 | 0.9888
4: 0.7254 | 0.7351
5: 0.7390 | 0.7204
6: 0.4979 | 0.4717
7: 0.9389 | 0.9394
8: 0.6999 | 0.6818
9: 0.9880 | 0.9884
10: 0.9279 | 0.9453
11: 0.9992 | 0.9992
12: 0.8946 | 0.8932
Using scGPT as spatial maps
Fitting GALM with 1014 modulators
	14 Transcription Factors
	958 Ligand-Receptor Pairs
	42 TranscriptionFactor-Ligand Pairs
0: 0.8963 | 0.8472
1: 0.7966 | 0.7913
2: 0.4

KeyboardInterrupt: 