# Tutorial: Integrating mouse embryo slices from E9.5-E16.5
This tutorial demonstrates STIntg's ablility to integrate four mouse embryo slices sampled at the time stages of E9.5, E10.5, E11.5, E12.5, E13.5, E14.5, E15.5, and E16.5 profiled by Stereo-seq. The raw data can be downloaded from https://db.cngb.org/stomics/mosta/.

## Preparation

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# the location of R (used for the mclust clustering)
import os
os.environ['R_USER'] =  '/usr/lib/R'

import anndata as ad
import numpy as np
import scanpy as sc
import torch
import matplotlib.pyplot as plt

import STIntg

used_device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

## Load Data

In [None]:
Batch_list = []
adj_list = []

section_ids = ['E9.5_E1S1', 'E10.5_E2S1', 'E11.5_E1S1', 'E12.5_E1S1', 'E13.5_E1S1', 'E14.5_E1S1', 'E15.5_E1S1', 'E16.5_E1S1']
for section_id in section_ids:
    print(section_id)
    adata = sc.read_h5ad(os.path.join("./tutorial_data/chen2022spatiotemporal/" + section_id + ".MOSTA.h5ad"))
    # adata.X = adata.layers['count']

    # make spot name unique
    adata.obs_names = [x + '_' + section_id for x in adata.obs_names]

    STIntg.Cal_Spatial_Net(adata, rad_cutoff=1.3)

    # Normalization
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=8000) #ensure enough common HVGs in the combined matrix
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)


## Concat the scanpy objects for multiple slices

In [None]:
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].str.split('_').str[0]
adata_concat.obs["batch_name"] = adata_concat.obs["batch_name"].astype('category')
print('adata_concat.shape: ', adata_concat.shape)

adata_concat.uns['edgeList'] = STIntg.adj_concat(adj_list)

## Running STIntg

In [None]:
# Important parameter:
# "iter_comb" is used to specify the order of integration
# Generate list of tuples representing pairs of adjacent sections (e.g., (0,1), (1,2), ...)
adjacent_comb = [(i, i+1) for i in range(len(section_ids)-1)]
# Generate list of tuples where each section is paired with the last section (e.g., (0,7), (1,7), ...)
last_comb = [(i, len(section_ids)-1) for i in range(len(section_ids)-1)]
# Combine and deduplicate the two lists to determine the integration order
iter_comb = list(set(adjacent_comb + last_comb))

print(f'iter_comb: {iter_comb}')
spatial_net_args = {'rad_cutoff': 1.3, 'model': 'Radius',}
adata_concat = STIntg.train(adata_concat, 
                            verbose=True, 
                            knn_neigh=100, 
                            iter_comb=iter_comb, 
                            alpha=10,
                            device=used_device, 
                            batch_data=True, 
                            spatial_net_args=spatial_net_args,
                            pretrain_epochs=200, 
                            n_epochs=400)

## Clustering

In [None]:
sc.pp.neighbors(adata_concat, use_rep='STIntg')
sc.tl.louvain(adata_concat, random_state=666, key_added="louvain", resolution=0.4)