In [None]:
## 01 Trajectory Inference
import scanpy as sc
import os
import pandas as pd
from cellrank.tl.kernels import CytoTRACEKernel,ConnectivityKernel
from cellrank.tl.estimators import GPCCA
import scanpy.external as sce
import palantir

import matplotlib
import matplotlib.pyplot as plt
import warnings
from numba.core.errors import NumbaDeprecationWarning

warnings.filterwarnings(action="ignore", category=NumbaDeprecationWarning)
warnings.filterwarnings(
    action="ignore", module="scanpy", message="No data for colormapping"
)

adata=sc.read("/path/to/dsc.h5ad")

ck = ConnectivityKernel(adata)
ck.compute_transition_matrix()
g_pv = GPCCA(ck)
g_pv.compute_schur(n_components=20)
g_pv.plot_spectrum(real_only=True)
g_pv.compute_macrostates(n_states=10, cluster_key="subtype")
g_pv.plot_macrostates(discrete=True, legend_loc="right", size=100, basis="X_umap")
g_pv.set_terminal_states_from_macrostates(["DSC4","DSC3"])
g_pv._set_initial_states_from_macrostates("DSC0")
g_pv.compute_absorption_probabilities()
g_pv.plot_absorption_probabilities(same_plot=False)

start_cell=trophoblast.obs.initial_states_probs.argmax()
start_cell=trophoblast.obs.index[root_cell]

end_cell=adata.obs.terminal_states_probabilities.idxmax(axis=0)
terminal_states=end_cell.idxmax(axis=0)

dm_res = palantir.utils.run_diffusion_maps(adata, n_components=5,pca_key='X_pca_harmony')
ms_data = palantir.utils.determine_multiscale_space(adata)
imputed_X = palantir.utils.run_magic_imputation(adata)

pr_res = palantir.core.run_palantir(
    adata, start_cell, num_waypoints=500, terminal_states=terminal_states)

pr_res.branch_probs.columns = adata.obs.loc[pr_res.branch_probs.columns,'subtype']
adata.obs['palantir_pseudotime']=pr_res.pseudotime
sc.pl.umap(adata,color='palantir_pseudotime',cmap='viridis')


## plot trajectories on UMAP
palantir.plot.plot_trajectories(adata, pseudotime_interval=(0, .9))
palantir.plot.plot_trajectories(adata, cell_color = "branch_selection", pseudotime_interval=(0, .9))


##stream plot
import math
from streamutils import *
time_key="palantir_pseudotime"
a=Gradient_calculator(adata,obsm_key='X_umap',pseudotime_key=time_key)
plt.rcParams['savefig.bbox'] = 'tight'
a.calculate_p_mass(n_grid=20)
a.suggest_mass_thresholds()
a.calculate_mass_filter(min_mass=10,plot=True)
plt.style.use("seaborn")
a.transfer_data_into_grid()
a.calculate_gradient()
a.plot_reference_flow_on_grid(density=5)
a.plot_dev_flow_on_grid()
a.visualize_results(plot_type="stream",s=11,cmap="rainbow")

In [None]:
## 02 Composition changes over time

sc.pp.neighbors(adata,n_neighbors=40, n_pcs=50, use_rep='X_pca')
adata.obs['Days']=adata.obs['GW']*7.0
wk = WOTKernel(adata, time_key="Days")
wk.compute_initial_growth_rates(organism="human", key_added="growth_rate_init")
sc.pl.scatter(adata, color="growth_rate_init", legend_loc="right", basis="X_umap", s=10)
wk.compute_transition_matrix(growth_iters=3, growth_rate_key="growth_rate_init", last_time_point="connectivities")
groups=['DSC0','DSC1','DSC2','DSC3','DSC4']
ax = wk.plot_single_flow(cluster_key="subtype",cluster="DSC0",clusters=groups,time_key="Days",min_flow=0,
                         xticks_step_size=4,show=False,dpi=100)
locs, labels = plt.xticks()
ax.set_xticks(locs)
ax.set_xticklabels(labels, rotation=90)

In [None]:
## 03 Differential Expression Analysis 

# Path A vs Path B
import seaborn as sns

adata.obs['Path']=adata.obs['subtype'].astype('str').map({'DSC0':'None', 'DSC1':"A", 'DSC3':"A", 'DSC2':"B", 'DSC3':"B"})


sc.tl.rank_genes_groups(adata,groupby="Path",groups=["A","B"],
                        method="wilcoxon",reference="B",n_genes=30)

df=sc.get.rank_genes_groups_df(adata,group=['A'])
mean_exp=pd.DataFrame(np.array(adata.X.mean(axis=0)).ravel(),index=adata.var_names,columns=['basemean'])
df['basemean']=mean_exp.loc[df.index,'basemean']
df['group']='BG'
df.loc[df['scores'].gt(5),'group']="A"
df.loc[df['scores'].lt(-5),'group']="B"

genestolabel=['IGFBP1','LGR4','TIMP3','SEMA3A','DOCK1','PRLR',"PRL",'DOCK4','SAT1',"ITGA1","ITGB1","THSD1",'LUM',
     "VIM","B2M",'CD63','TIMP1','TIMP2','CD81','IGFBP2','HLA-C','HLA-A','HLA-B','CD248','TPM2','PPIA','S100A11',
     'CD28','TPT1','SERPINF1','PPIB','C1R','OAZ1','CD59','IGFBP5','GJA1','IGFBP4']
ax = sns.scatterplot(x="basemean", # Horizontal axis
            y='logfoldchanges', # Vertical axis
           data=df, # Data source # Don't fix a regression line
           size = 1,hue='group',alpha=0.7,palette='coolwarm_r',linewidth=0,hue_order=["B","BG","A"]), # size and dimension

plt.xlabel('Mean exp')
plt.ylabel('Log2FC(Path A vs B)')

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']),size=8)
label_point(df.loc[genestolabel,'basemean'], df.loc[genestolabel,'logfoldchanges'], df.loc[genestolabel,'Gene name'], plt.gca())
plt.ylim((-5,5))

## Between DSC3 subclusters

dsc3=adata[adata.obs.subtype=="DSC3",:]
sc.tl.louvain(dsc3,resolution=0.1)
dec31=dsc3.obs.loc[dsc3.obs.louvain=="0",:].index
dec32=dsc3.obs.loc[dsc3.obs.louvain=="1",:].index
adata.obs['subclusters']=adata.obs['subtype'].astype('str')
adata.obs.loc[dec31,"subclusters"]="DSC3_1"
adata.obs.loc[dec32,"subclusters"]="DSC3_2"
sc.tl.rank_genes_groups(dec,groupby="subclusters",groups=["DSC3_1","DSC3_2"],
                        method="wilcoxon",reference="DSC3_1",n_genes=30)
sc.pl.embedding(dsc,color=['TIMP1','SERPINF1','HSPB1','DCN','HLA-A','HLA-B','HLA-C','B2M','CD81','C1S','C1R','C3',
            'PPIA','UBB','UBC','APOE'],basis="X_umap",cmap='viridis_r',vmin=0.5,frameon=False,palette='Set1')
sc.pl.embedding(dsc,color="subclusters",basis="X_umap",groups=['DSC3_1','DSC3_2'],
                palette='Set1')



In [None]:
## 04 Invasive EVTs counting on STOMICS

import squidpy as sq
import seaborn as sns
import pandas as pd
import numpy as np

sq.gr.spatial_neighbors(adata, set_diag=True,coord_type=None,spatial_key='shift_spatial', 
                        delaunay=False,n_neighs=5)

dsc=adata.obs.loc[adata.obs.celltype=='DSC',:]
DSC4=dsc.loc[dsc.subtype.isin(['DSC4'])].index
DSC3=dsc.loc[dsc.subtype.isin(['DSC3'])].index
otherDSC=dsc.loc[~dsc.subtype.isin(['DSC3','DSC4'])].index

## Searching spatial neighbors of DSCs
def neighbor_search(adata, cells, spatial_key='spatial_connectivities'):
    mask = adata.obs.index.isin(cells)
    index_numbers = np.where(mask)[0]
    dist_matrix = adata.obsp[spatial_key]
    neighbors = [np.asarray([j for j in dist_matrix[i].indices if j != i]) for i in index_numbers]
    neighbor_index, neighbor_subclass=[],[]
    for i in neighbors:
        neighbor_index.append([adata.obs.index[ids] for ids in i])

    dsc_neighbors=[]
    for i in neighbor_index:
        dsc_neighbors+= i
    return list(set(dsc_neighbors))

## locate neighboring  cell  ids
evt_ids = adata.obs.loc[adata.obs['subclass']=='EVT'].index
dsc3_neighbors = neighbor_search(adata,dsc3)
dsc3_evts = np.intersect1d(dsc3_neighbors,evt_ids)

dsc4_neighbors = neighbor_search(adata,dsc4)
dsc4_evts = np.intersect1d(dsc4_neighbors,evt_ids)

otherdsc_neighbors = neighbor_search(adata,otherDSC)
otherdsc_evts = np.intersect1d(otherdsc_neighbors,evt_ids)

groups1 = list(set(dsc3_evts) - set(dsc4_evts) - set(otherdsc_evts))
groups2 = list(set(dsc4_evts) - set(dsc3_evts) - set(otherdsc_evts))
groups3 = list(set(otherdsc_evts) - set(dsc3_evts) - set(dsc4_evts))

## iScore comparions
evt=adata[adata.obs.subclass=='EVT',:].obs
evt['group']='allEVTs'
evt.loc[group1,'group']='nearDSC3'
evt.loc[group2,'group']='nearDSC4'
evt.loc[group3,'group']='otherDSC'
sns.violinplot(data=evt, color='group',x='group',y='iScore')

In [None]:
## 05 scRNA-seq Preprocessing in vitro decidualization

import anndata as ad
import os
import scanpy as sc
datadir="/path/to/cellranger/mAEA/"
files=os.listdir(datadir)
adata_list=[]

## merging adata
for i in files:
    tmp=sc.read_10x_mtx(os.path.join(datadir,i,"/outs/filtered_feature_bc_matrix/"))
    tmp.obs['dataset']=i
    tmp.var_names_make_unique("-")
    tmp.obs.index=i+"_"+pd.Series(tmp.obs.index)
    adata_list.append(tmp)
adata=ad.concat(adata_list,label="ID")
adata.obs['ID']=adata.obs['ID'].astype('str')

## QC and integrating
sc.pl.highest_expr_genes(adata, n_top=20, )
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=0)
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],jitter=0.4, multi_panel=True)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
adata = adata[adata.obs.n_genes_by_counts < 5000, :]
adata = adata[adata.obs.pct_counts_mt < 20, :]

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5,)
sc.pl.highly_variable_genes(adata,)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log=True)
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=50)
sc.tl.umap(adata,random_state=44)
sc.tl.louvain(adata)
sc.pl.umap(adata, color=['louvain','CNR1', 'ACTA2', 'IGFBP1','dataset'], use_raw=True,vmax='p99')
adata.raw.to_adata().write("mAEA_processed.h5ad")

## Cell type identification 

dsc_markers=pd.read_table('/path/to/dsc_markers.tsv',sep="\t",index_col=0,header=0)
set_a=dsc_markers.sort_values('DSC3_scores',ascending=False).index[:50]
set_b=dsc_markers.sort_values('DSC4_scores',ascending=False).index[:50]
set_a=[i for i in set_a if 'RPS' not in i and 'RPL' not in i and 'MT-' not in i]
set_b=[i for i in set_a if 'RPS' not in i and 'RPL' not in i and 'MT-' not in i]
sc.tl.score_genes(adata,gene_list=set_a,score_name="DSC3_score",use_raw=True)
sc.tl.score_genes(adata,gene_list=set_b,score_name="DSC4_score",use_raw=True)
sc.pl.umap(adata, color=['louvain','DSC4_score'], use_raw=False,vmin=-0.4,vmax=0.4)



In [None]:
##06 DEG mAEA vs EtOH
import pandas as pd
writer=pd.ExcelWriter("/path/to/mAEA_vs_EtOH.xlsx")
for i in adata.obs.clusters.unique():
    adata2=adata[adata.obs.clusters==i,:]
    sc.tl.rank_genes_groups(adata2,groupby='Condition',groups=['mAEA','EtOH'],reference='EtOH',method='wilcoxon')
    df=sc.get.rank_genes_groups_df(adata2,group='mAEA').to_excel(writer,
    sheet_name="{group}".format(group='mAEA_vs_EtOH_'+str(i)),header=True,index=True)
writer.save()



In [None]:
## 07 GSEA mAEA vs EtOH

import pandas as pd
import gseapy as gp
import matplotlib.pyplot as plt
import os
import numpy as np
import scanpy as sc
import seaborn as sns
wdir='/path/to/gsea_ouptut/'
gene_set_names = gp.get_library_name(organism='Human')
print(gene_set_names)

gene_set='GO_Biological_Process_2025'
by='logfoldchanges'
df1=pd.read_table("mAEA_vs_EtOH_DSC3.tsv",sep='\t',index_col=0,header=0)
df2=pd.read_table("mAEA_vs_EtOH_DSC3.tsv",sep='\t',index_col=0,header=0)

gene_rank=df1.loc[:,['gene',by]]
gene_rank.sort_values(by=[by], inplace=True, ascending=False)
res=gp.prerank(rnk=gene_rank, gene_sets=gene_set,permutation_num=1000,outdir=wdir+'/dsc3/')
nes=res.res2d.sort_index(ascending=True)

gene_rank=df2.loc[:,['gene',by]]
gene_rank.sort_values(by=[by], inplace=True, ascending=False)
res=gp.prerank(rnk=gene_rank, gene_sets=gene_set,permutation_num=1000,outdir=wdir+'/dsc4/')
nes=res.res2d.sort_index(ascending=True)