In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
adata = sc.read_h5ad('/ix/djishnu/shared/djishnu_kor11/training_data_2025/mouse_lymph1_visiumHD_subsampled.h5ad')
adata

AnnData object with n_obs × n_vars = 10000 × 2965
    obs: 'cell_type', 'cell_type_int', 'n_genes'
    var: 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'cell_thresholds', 'dendrogram_cell_type', 'hvg', 'received_ligands', 'received_ligands_tfl'
    obsm: 'X_pca', 'X_umap', 'spatial', 'spatial_unscaled', 'tangram_ct_pred'
    varm: 'PCs'
    layers: 'imputed_count', 'normalized_count', 'raw_count'

In [5]:
gex_df = adata.to_df(layer='normalized_count')
ccr4 = gex_df['Ccr4']
prdm1 = gex_df['Prdm1']
gata3 = gex_df['Gata3']

In [6]:
import sys
import warnings
warnings.filterwarnings('ignore')
sys.path.append('../../src')

In [8]:
from spaceoracle.tools.network import RegulatoryFactory
co_grn = RegulatoryFactory(
    colinks_path='/ix/djishnu/shared/djishnu_kor11/training_data_2025/mouse_lymph1_visiumHD_subsampled_colinks.pkl',
    annot='cell_type_int'
)

In [9]:
from spaceoracle.models.parallel_estimators import SpatialCellularProgramsEstimator

estimator = SpatialCellularProgramsEstimator(
    adata, 
    target_gene='Cd74', 
    cluster_annot='cell_type_int',
    radius=400, 
    contact_distance=50,
    grn=co_grn,
)

In [10]:
estimator.lr

Unnamed: 0,ligand,receptor,pathway,signaling,radius,pairs
0,Tgfb1,Tgfbr1,TGFb,Secreted Signaling,400,Tgfb1$Tgfbr1
0,Tgfb1,Tgfbr2,TGFb,Secreted Signaling,400,Tgfb1$Tgfbr2
3,Tgfb1,Acvr1b,TGFb,Secreted Signaling,400,Tgfb1$Acvr1b
4,Tgfb1,Acvr1c,TGFb,Secreted Signaling,400,Tgfb1$Acvr1c
12,Bmp2,Acvr2a,BMP,Secreted Signaling,400,Bmp2$Acvr2a
...,...,...,...,...,...,...
2012,Thy1,Itgb3,THY1,Cell-Cell Contact,50,Thy1$Itgb3
2015,Itga4,Vcam1,VCAM,Cell-Cell Contact,50,Itga4$Vcam1
2015,Itgb1,Vcam1,VCAM,Cell-Cell Contact,50,Itgb1$Vcam1
2016,Itga9,Vcam1,VCAM,Cell-Cell Contact,50,Itga9$Vcam1


In [11]:
estimator.fit(num_epochs=100, learning_rate=5e-3, 
    score_threshold=0.2, l1_reg=1e-9, estimator='lasso')

Fitting Cd74 with 520 modulators
	64 Transcription Factors
	323 Ligand-Receptor Pairs
	133 TranscriptionFactor-Ligand Pairs
0: 0.7292 | 0.7171
1: 0.9456 | 0.9419
2: 0.9632 | 0.9608
3: 0.9742 | 0.9742
4: 0.9456 | 0.9417
5: 0.9575 | 0.9542
6: 0.9355 | 0.9285
7: 0.9721 | 0.9705
8: 0.9766 | 0.9758


In [None]:
base_dir = '/ix/djishnu/shared/djishnu_kor11/'
from spaceoracle import SpaceTravLR

star = SpaceTravLR(
    adata=adata,
    annot='cell_type_int', 
    max_epochs=100, 
    learning_rate=5e-3, 
    spatial_dim=64,
    batch_size=512,
    grn=co_grn,
    radius=400,
    contact_distance=50,
    save_dir=base_dir + 'lasso_runs/mouse_lymph3_visiumHD_subsampled'
)

star.run()