In [1]:
import torch
import numpy as np

from torch_geometric.nn import GATConv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Node features
num_nodes = 10
num_node_features = 5
x = torch.from_numpy(np.random.rand(num_nodes, num_node_features).astype(np.float32))

# Edge index
num_edges = 20
edge_index = torch.randint(0, num_nodes, (2, num_edges), dtype=torch.long)

In [4]:
gat_net = GATConv((-1, -1), 2, heads=4)
gat_net(x, edge_index)

tensor([[-0.0634, -0.4959, -0.0615,  0.1039, -0.5780,  0.2957, -0.2445, -0.3484],
        [-0.1571, -0.4197, -0.0764, -0.1136, -0.2961,  0.3356, -0.3984, -0.2184],
        [-0.0241, -0.3007, -0.2063,  0.1972, -0.6057,  0.2394, -0.0591, -0.3631],
        [ 0.0854, -0.6489,  0.0236,  0.4758, -0.7820,  0.1971, -0.1411, -0.5144],
        [-0.0823, -0.5780,  0.0508, -0.2357, -0.0234,  0.2289, -0.4243,  0.2322],
        [-0.2902, -0.4341, -0.0320, -0.3044,  0.1074,  0.1974, -0.5429,  0.1893],
        [-0.1672, -0.2891, -0.1805, -0.1515, -0.2229,  0.2790, -0.2696, -0.0844],
        [ 0.0541, -0.3236, -0.1029, -0.2215, -0.1868,  0.3267, -0.1629,  0.0779],
        [ 0.0705, -0.5568, -0.0584,  0.1232, -0.6335,  0.3309, -0.1530, -0.2904],
        [-0.3174, -0.3844, -0.0611, -0.2754, -0.0155,  0.2762, -0.5620, -0.0594]],
       grad_fn=<AddBackward0>)

In [6]:
len(gat_net(x, edge_index, return_attention_weights=True))

2

In [8]:
len(gat_net(x, edge_index, return_attention_weights=True)[1])

2

In [9]:
gat_net(x, edge_index, return_attention_weights=True)[1][0].shape, gat_net(x, edge_index, return_attention_weights=True)[1][1].shape

(torch.Size([2, 27]), torch.Size([27, 4]))

# Metrics

In [10]:
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score, silhouette_score, adjusted_rand_score, normalized_mutual_info_score, adjusted_mutual_info_score, calinski_harabasz_score, davies_bouldin_score

torch.Size([10, 5])

In [None]:
# unsupervised metrics
silouette_avg = silhouette_score(X, labels_pred)
calinski_harabasz = calinski_harabasz_score(X, labels_pred)
davies_bouldin = davies_bouldin_score(X, labels_pred)

print(f'Silhouette Score: {silhouette_avg}')
print(f'Calinski-Harabasz Index: {calinski_harabasz}')
print(f'Davies-Bouldin Index: {davies_bouldin}')

In [None]:
# Supervised metrics
v_measure = v_measure_score(labels_true, labels_pred)
ari = adjusted_rand_score(labels_true, labels_pred)
ami = adjusted_mutual_info_score(labels_true, labels_pred)

print(f'V-measure: {v_measure}')
print(f'Adjusted Rand Index (ARI): {ari}')
print(f'Adjusted Mutual Information (AMI): {ami}')

# SpatialGLUE

In [1]:
import SpatialGlue

import os
import torch
import pandas as pd
import scanpy as sc

# Environment configuration. SpatialGlue pacakge can be implemented with either CPU or GPU. GPU acceleration is highly recommend for imporoved efficiency.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# the location of R, which is required for the 'mclust' clustering algorithm. Please replace the path below with local R installation path
# os.environ['R_HOME'] = '/scbio4/tools/R/R-4.0.3_openblas/R-4.0.3'

ModuleNotFoundError: No module named 'torch'

In [None]:
# read data
file_fold = '/home/yahui/anaconda3/work/SpatialGlue_revision/data/Dataset3_Mouse_Thymus1/' #please replace 'file_fold' with the download path

adata_omics1 = sc.read_h5ad(file_fold + 'adata_RNA.h5ad')
adata_omics2 = sc.read_h5ad(file_fold + 'adata_protein.h5ad')

adata_omics1.var_names_make_unique()
adata_omics2.var_names_make_unique()

In [None]:
# Specify data type
data_type = 'Stereo-CITE-seq'

# Fix random seed
from SpatialGlue.preprocess import fix_seed
random_seed = 2022
fix_seed(random_seed)

## Preprocessing

In [None]:
from SpatialGlue.preprocess import clr_normalize_each_cell, pca

# RNA
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.filter_cells(adata_omics1, min_genes=80)

sc.pp.filter_genes(adata_omics2, min_cells=50)
adata_omics2 = adata_omics2[adata_omics1.obs_names].copy()

sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)

adata_omics1_high =  adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = pca(adata_omics1_high, n_comps=adata_omics2.n_vars-1)

# Protein
adata_omics2 = clr_normalize_each_cell(adata_omics2)
adata_omics2.obsm['feat'] = pca(adata_omics2, n_comps=adata_omics2.n_vars-1)

In [None]:
## Constructing neighbor graph
from SpatialGlue.preprocess import construct_neighbor_graph
data = construct_neighbor_graph(adata_omics1, adata_omics2, datatype=data_type)

## Training model

In [None]:
# define model
from SpatialGlue.SpatialGlue_pyG import Train_SpatialGlue
model = Train_SpatialGlue(data, datatype=data_type, device=device)

# train model
output = model.train()

In [None]:
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1']
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2']
adata.obsm['SpatialGlue'] = output['SpatialGlue']
adata.obsm['alpha'] = output['alpha']
adata.obsm['alpha_omics1'] = output['alpha_omics1']
adata.obsm['alpha_omics2'] = output['alpha_omics2']

## Cross-modality integrative analysis

In [None]:
# we set 'mclust' as clustering tool by default. Users can also select 'leiden' and 'louvain'
from SpatialGlue.utils import clustering
tool = 'mclust' # mclust, leiden, and louvain
clustering(adata, key='SpatialGlue', add_key='SpatialGlue', n_clusters=8, method=tool, use_pca=True)

In [None]:
# visualization
import matplotlib.pyplot as plt
adata.obsm['spatial'][:,1] = -1*adata.obsm['spatial'][:,1]

fig, ax_list = plt.subplots(1, 2, figsize=(7, 3))
sc.pp.neighbors(adata, use_rep='SpatialGlue', n_neighbors=30)
sc.tl.umap(adata)

sc.pl.umap(adata, color='SpatialGlue', ax=ax_list[0], title='SpatialGlue', s=20, show=False)
sc.pl.embedding(adata, basis='spatial', color='SpatialGlue', ax=ax_list[1], title='SpatialGlue', s=20, show=False)

plt.tight_layout(w_pad=0.3)
plt.show()

In [None]:
# annotation
adata.obs['SpatialGlue_number'] = adata.obs['SpatialGlue'].copy()
adata.obs['SpatialGlue'].cat.rename_categories({1: '5-Outer cortex region 3(DN T,DP T,cTEC)',
                                                2: '7-Subcapsular zone(DN T)',
                                                3: '4-Middle cortex region 2(DN T,DP T,cTEC)',
                                                4: '2-Corticomedullary Junction(CMJ)',
                                                5: '1-Medulla(SP T,mTEC,DC)',
                                                6: '6-Connective tissue capsule(fibroblast)',
                                                7: '8-Connective tissue capsule(fibroblast,RBC,myeloid)',
                                                8: '3-Inner cortex region 1(DN T,DP T,cTEC)'
                                                }, inplace=True)

In [None]:
list_ = ['3-Inner cortex region 1(DN T,DP T,cTEC)','2-Corticomedullary Junction(CMJ)','4-Middle cortex region 2(DN T,DP T,cTEC)',
         '7-Subcapsular zone(DN T)', '5-Outer cortex region 3(DN T,DP T,cTEC)', '8-Connective tissue capsule(fibroblast,RBC,myeloid)',
         '1-Medulla(SP T,mTEC,DC)','6-Connective tissue capsule(fibroblast)']
adata.obs['SpatialGlue']  = pd.Categorical(adata.obs['SpatialGlue'],
                      categories=list_,
                      ordered=True)

In [None]:
# plotting with annotation
fig, ax_list = plt.subplots(1, 2, figsize=(9.5, 3))
sc.pp.neighbors(adata, use_rep='SpatialGlue', n_neighbors=30)
sc.tl.umap(adata)

sc.pl.umap(adata, color='SpatialGlue', ax=ax_list[0], title='SpatialGlue', s=10, show=False)
sc.pl.embedding(adata, basis='spatial', color='SpatialGlue', ax=ax_list[1], title='SpatialGlue', s=20, show=False)

ax_list[0].get_legend().remove()

plt.tight_layout(w_pad=0.3)
plt.show()

In [None]:
# Exchange attention weights corresponding to annotations
list_SpatialGlue = [5,4,8,3,1,6,2,7]
adata.obs['SpatialGlue_number']  = pd.Categorical(adata.obs['SpatialGlue_number'],
                      categories=list_SpatialGlue,
                      ordered=True)
adata.obs['SpatialGlue_number'].cat.rename_categories({5:1,
                                                       4:2,
                                                       8:3,
                                                       3:4,
                                                       1:5,
                                                       6:6,
                                                       2:7,
                                                       7:8
                                                }, inplace=True)

In [None]:
# plotting modality weight values.
import pandas as pd
import seaborn as sns
plt.rcParams['figure.figsize'] = (5,3)
df = pd.DataFrame(columns=['RNA', 'protein', 'label'])
df['RNA'], df['protein'] = adata.obsm['alpha'][:, 0], adata.obsm['alpha'][:, 1]
df['label'] = adata.obs['SpatialGlue_number'].values
df = df.set_index('label').stack().reset_index()
df.columns = ['label_SpatialGlue', 'Modality', 'Weight value']
ax = sns.violinplot(data=df, x='label_SpatialGlue', y='Weight value', hue="Modality",
                split=True, inner="quart", linewidth=1, show=False)
ax.set_title('RNA vs protein')
ax.set_xlabel('SpatialGlue label')
ax.legend(bbox_to_anchor=(1.4, 1.01), loc='upper right')

plt.tight_layout(w_pad=0.05)
#plt.show()