## spatial pathway & GOterm

In [None]:
import scanpy as sc
import decoupler as dc
import scipy.sparse as sp

# Only needed for processing
import numpy as np
import pandas as pd

# Plotting options, change to your liking
sc.settings.set_figure_params(dpi=200, frameon=False)
sc.set_figure_params(dpi=200)
sc.set_figure_params(figsize=(4, 4))

In [None]:
from spatialdata_io import visium_hd
import spatialdata as sd
import matplotlib.pyplot as plt
import spatialdata_plot

In [None]:
import matplotlib as mpl
mpl.rcParams['svg.fonttype'] = 'none'   # Ensure text remains as text in SVG

In [None]:
sdata = sd.read_zarr("/data/User/revolvefire/projects/3.PPP_231118/231128_PPP/spatialdata")
sdata

In [None]:
for table in sdata.tables.values():
    table.var_names_make_unique()

axes = plt.subplots(1, 2, figsize=(10, 5))[1].flatten()
sdata.pl.render_images("slide1_full_image").pl.show(ax=axes[0], title="Full image")
sdata.pl.render_images("slide1_cytassist_image").pl.show(ax=axes[1], title="CytAssit image")

from spatialdata import get_extent

data_extent = get_extent(sdata["slide1_full_image"], coordinate_system="global")
data_extent

from spatialdata import bounding_box_query

queried_cytassist = bounding_box_query(
    sdata["slide1_cytassist_image"],
    min_coordinate=[data_extent["x"][0], data_extent["y"][0]],
    max_coordinate=[data_extent["x"][1], data_extent["y"][1]],
    axes=("x", "y"),
    target_coordinate_system="global",
)
sdata["queried_cytassist"] = queried_cytassist

axes = plt.subplots(1, 2, figsize=(10, 5))[1].flatten()
sdata.pl.render_images("slide1_full_image").pl.show(ax=axes[0], title="Full image")
sdata.pl.render_images("queried_cytassist").pl.show(ax=axes[1], title="CytAssit image")



In [None]:
adata = sdata['square_008um']

In [None]:
adata

In [None]:
cell_df = pd.read_csv('250422.s1_cell_labels.csv')  # columns: 'cell', 'label'
keep_barcodes = cell_df['cell'].astype(str).unique()
adata_subset = adata[adata.obs_names.isin(keep_barcodes)].copy()
barcode2label = dict(zip(cell_df['cell'], cell_df['label']))
adata_subset.obs['label'] = adata_subset.obs_names.map(barcode2label)

print(adata_subset)
adata_subset.obs['label'].value_counts(dropna=False).head()

In [None]:
adata = adata_subset.copy()

In [None]:
adata.layers["counts"] = adata.X.copy()  # preserve counts
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
adata.layers['log_norm'] = adata.X.copy()

In [None]:
import liana as li

In [None]:
plot, _ = li.ut.query_bandwidth(coordinates=adata.obsm['spatial'], start=0, end=50, interval_n=10)
plot

In [None]:
li.ut.spatial_neighbors(
    adata,
    bandwidth=30,
    cutoff=0.1,
    kernel='gaussian',
    set_diag=True,
    standardize=True
)

In [None]:
adata

In [None]:
# Plot the spatial weights of one spot in our object
adata.obs['conn'] = adata.obsp['spatial_connectivities'][0].A.ravel()
sc.pl.spatial(adata, color='conn', spot_size=20, size=1.5, frameon=False)

In [None]:
import scipy.sparse as sp
sp.issparse(adata.X) # true -> adata.X.A exist 

In [None]:
# Update X with spatially weighted gene exression
adata.X = adata.obsp['spatial_connectivities'].A.dot(adata.X.A)

In [None]:
genes = ['CCL22', 'KRT5']

sc.pl.spatial(adata, color=genes, size=1.5, spot_size=15, frameon=False, layer='log_norm')
sc.pl.spatial(adata, color=genes, size=1.5, spot_size=15,frameon=False)

In [None]:
net = dc.get_collectri(organism='human', split_complexes=False)
net
dc.run_ulm(
    mat=adata,
    net=net,
    source='source',
    target='target',
    weight='weight',
    verbose=True,
    use_raw=False
)

In [None]:
adata.obsm['ulm_estimate']

In [None]:
adata.obsm['collectri_ulm_estimate'] = adata.obsm['ulm_estimate'].copy()
adata.obsm['collectri_ulm_pvals'] = adata.obsm['ulm_pvals'].copy()
adata

In [None]:
progeny = dc.get_progeny(organism='human', top=500)
progeny

In [None]:
dc.run_mlm(
    mat=adata,
    net=progeny,
    source='source',
    target='target',
    weight='weight',
    verbose=True,
    use_raw=False
)

# Store in new obsm keys
adata.obsm['progeny_mlm_estimate'] = adata.obsm['mlm_estimate'].copy()
adata.obsm['progeny_mlm_pvals'] = adata.obsm['mlm_pvals'].copy()

In [None]:
adata.obsm['progeny_mlm_estimate']

In [None]:
acts = dc.get_acts(adata, obsm_key='progeny_mlm_estimate')
acts

In [None]:
label_order = [
    'Pustular KC', 'Terminally diff KC', 'Differentiating KC', 'Basal KC',
    'Neutrophilic pustule', 'mLC', 'pDC', 'Lymphoid-CCL19+ niche',
    'Fibroblast', 'Inflammatory Fib/Mac', 'Endo/Peri',
    'Eccrine gland coil', 'Eccrine gland duct', 'Mast cell', 'Adipocyte'
]
label_colors = [
    '#83e377', '#16db93', '#0db39e', '#048ba8',
    '#e40066', '#ffc052', '#3a0ca3', '#f2f230',
    '#4cc9f0', '#4361ee', '#f20089',
    '#83ad6c', '#506e40', '#a06cd5', '#121e41'
]

acts.obs['label'] = pd.Categorical(
    acts.obs['label'],
    categories=label_order,
    ordered=True
)

acts.uns['label_colors'] = label_colors

In [None]:
sc.pl.spatial(
    acts,
    color=['JAK-STAT', 'label'],
    cmap='RdBu_r',
    vcenter=0,
    size=1.5, spot_size=30,
    frameon=False
)
sc.pl.violin(
    acts,
    keys='JAK-STAT',
    groupby='label',
    rotation=90
)

In [None]:
sc.pl.spatial(
    acts,
    color=['JAK-STAT'],
    cmap='magma',
    vcenter=0,
    size=1.5, spot_size=20,
    frameon=False, save="250501_jak-stat_tissue1.svg"
)


In [None]:
plt.rcParams["figure.figsize"] = [6, 3]

In [None]:
sc.pl.violin(
    acts,
    keys='JAK-STAT',
    groupby='label',
    rotation=90, save="250501_jak-stat_tissue1_violin.svg"
)

In [None]:
msigdb = dc.get_resource('MSigDB')
msigdb

In [None]:
msigdb['collection'].value_counts()

In [None]:
# Filter by immunesigdb
msigdb = msigdb[msigdb['collection']=='go_biological_process']
msigdb

In [None]:
# Remove duplicated entries
msigdb = msigdb[~msigdb.duplicated(['geneset', 'genesymbol'])]

# Rename
msigdb.loc[:, 'geneset'] = [name.split('GOBP_')[1] for name in msigdb['geneset']]

msigdb

In [None]:
dc.run_ora(
    mat=adata,
    net=msigdb,
    source='geneset',
    target='genesymbol',
    verbose=True,
    use_raw=False
)

# Store in a different key
adata.obsm['msigdb_ora_estimate'] = adata.obsm['ora_estimate'].copy()
adata.obsm['msigdb_ora_pvals'] = adata.obsm['ora_pvals'].copy()

In [None]:
adata.obsm['msigdb_ora_estimate'].iloc[:, 0:5]

In [None]:
acts = dc.get_acts(adata, obsm_key='msigdb_ora_estimate')

# We need to remove inf and set them to the maximum value observed
acts_v = acts.X.ravel()
max_e = np.nanmax(acts_v[np.isfinite(acts_v)])
acts.X[~np.isfinite(acts.X)] = max_e

acts

In [None]:
sc.pl.spatial(
    acts,
    color=['3_UTR_MEDIATED_MRNA_DESTABILIZATION', 'label'],
    cmap='RdBu_r',spot_size=20,
    size=1.5,
    frameon=False
)

In [None]:
df = dc.rank_sources_groups(acts, groupby='label', reference='rest', method='t-test_overestim_var')
df

In [None]:
n_top = 5
term_markers = df.groupby('group').head(n_top).groupby('group')['names'].apply(lambda x: list(x)).to_dict()
term_markers

In [None]:
sc.pl.matrixplot(acts, term_markers, 'label', dendrogram=True, standard_scale='var',
                 colorbar_title='Z-scaled scores', cmap='viridis', swap_axes=True, save = "250501.GOBP_spatial.svg")

In [None]:
sc.set_figure_params(dpi_save=300, dpi=300)

In [None]:
sc.pl.spatial(
    acts,
    color=['POSITIVE_REGULATION_OF_INTERLEUKIN_8_PRODUCTION', 'POSITIVE_REGULATION_OF_INTERLEUKIN_6_PRODUCTION', 'IMMUNE_RESPONSE', 'NEUTROPHIL_CHEMOTAXIS', 'T_CELL_ACTIVATION', 'LEUKOCYTE_CELL_CELL_ADHESION', 'MYELOID_DENDRITIC_CELL_CHEMOTAXIS',  'POSITIVE_REGULATION_OF_DENDRITIC_CELL_ANTIGEN_PROCESSING_AND_PRESENTATION', 'label'], 
    cmap='RdBu_r',spot_size=30, ncols=2,
    size=1.5,
    frameon=False, save="250501_gobp_spatial.svg"
)