In [None]:
## 01 Trajectory Analysis by Palantir   refer to https://github.com/dpeerlab/Palantir/blob/master/notebooks/Palantir_sample_notebook.ipynb
import scanpy as sc
import os
import pandas as pd
from cellrank.tl.kernels import CytoTRACEKernel,ConnectivityKernel
from cellrank.tl.estimators import GPCCA
import scanpy.external as sce
import palantir

import matplotlib
import matplotlib.pyplot as plt
import warnings
from numba.core.errors import NumbaDeprecationWarning

warnings.filterwarnings(action="ignore", category=NumbaDeprecationWarning)
warnings.filterwarnings(
    action="ignore", module="scanpy", message="No data for colormapping"
)

adata=sc.read("/path/to/trophoblast.h5ad")

ck = ConnectivityKernel(adata)
ck.compute_transition_matrix()
g_pv = GPCCA(ck)
g_pv.compute_schur(n_components=20)
g_pv.plot_spectrum(real_only=True)
g_pv.compute_macrostates(n_states=10, cluster_key="subtype")
g_pv.plot_macrostates(discrete=True, legend_loc="right", size=100, basis="X_umap")
g_pv.set_terminal_states_from_macrostates(['eEVT','SCT_b','iEVT',"SCT_a"])
g_pv._set_initial_states_from_macrostates("VCT_div")
g_pv.compute_absorption_probabilities()
g_pv.plot_absorption_probabilities(same_plot=False)

start_cell=trophoblast.obs.initial_states_probs.argmax()
start_cell=trophoblast.obs.index[root_cell]

end_cell=adata.obs.terminal_states_probabilities.idxmax(axis=0)
terminal_states=end_cell.idxmax(axis=0)

dm_res = palantir.utils.run_diffusion_maps(adata, n_components=20,pca_key='X_pca_harmony')
ms_data = palantir.utils.determine_multiscale_space(adata)
imputed_X = palantir.utils.run_magic_imputation(adata)

pr_res = palantir.core.run_palantir(
    adata, start_cell, num_waypoints=500, terminal_states=terminal_states)


pr_res.branch_probs.columns = adata.obs.loc[pr_res.branch_probs.columns,'subtype']
adata.obs['palantir_pseudotime']=pr_res.pseudotime
sc.pl.umap(adata,color='palantir_pseudotime',cmap='viridis')

## plot trajectories on UMAP
palantir.plot.plot_trajectories(adata, pseudotime_interval=(0, .9))
palantir.plot.plot_trajectories(adata, cell_color = "branch_selection", pseudotime_interval=(0, .9))

masks = palantir.presults.select_branch_cells(adata, q=.01, eps=.01)
palantir.plot.plot_branch_selection(adata)

In [None]:
## 02 Composition analysis of STOMICS

import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42

df_bv=pd.read_table("/path/to/distance_to_bv.tsv",sep='\t',index_col=0,header=0)
df_interface=pd.read_table("/path/to/distance_to_interface.tsv",sep='\t',index_col=0,header=0)
bins=[-4000,-2000,-1500,-1000,-500,-250,0,250,500,1000,1500,2000,4000]
## interface
df_interface['distance_bin'] = pd.cut(df_interface['signed_distance_interface'], bins)
composition = df_interface.groupby(['distance_bin', 'celltype']).size().reset_index(name='count')
composition['proportion'] = composition.groupby('distance_bin')['count'].transform(lambda x: x / x.sum())
plt.figure(figsize=(10, 5))
composition_pivot = composition.pivot(index="distance_bin", columns="celltype", values="proportion").fillna(0)
composition_pivot.plot(kind="bar", stacked=True, colormap="tab20", figsize=(10, 5))
plt.xlabel("Distance from interface")
plt.ylabel("Proportion of Cell Type")
plt.xticks(rotation=45)

#BV
bins=[-1000,-100,0,50,100,150,200,300,500,1000]
df_bv['distance_bin'] = pd.cut(df_bv['signed_distance_bv'], bins)
composition = df_bv.groupby(['distance_bin', 'celltype']).size().reset_index(name='count')
composition['proportion'] = composition.groupby('distance_bin')['count'].transform(lambda x: x / x.sum())
plt.figure(figsize=(10, 5))
composition_pivot = composition.pivot(index="distance_bin", columns="celltype", values="proportion").fillna(0)
composition_pivot.plot(kind="bar", stacked=True, colormap="tab20", figsize=(10, 5))

In [None]:
## 03 Differential Expressed Genes in EVT subtypes



def plot_scatter(data,x,y,group,degs,adjust=True,**kwargs):
    ax=plt.figure(figsize=(10,10))
    ax=sns.scatterplot(data=data,x=x,y=y,hue=group,size =2, linewidth=0, **kwargs)
    labeled_x,labeled_y=data.loc[degs,x],data.loc[degs,y]
    texts = [plt.text(labeled_x[i], labeled_y[i], degs[i], fontsize='medium') for i in range(len(labeled_x))]
    if adjust:
        adjust_text(texts, labeled_x.values, labeled_y.values, arrowprops=dict(arrowstyle='-', color='black'),force_text=(2,2),expand_text=(0.5,0.5))
    plt.xlabel('scaled expression')                   # Set x-axis label
    plt.ylabel('Log2FC')
    return ax


evt=adata[adata.obs.subtype.isin(['pEVT','eEVT','iEVT']),:]

## Compute DEGs
sc.tl.rank_genes_groups(adata,groupby='subtype',method='wilcoxon',refenece='iEVT')
deg1=sc.get.rank_genes_groups_df(adata,group=['pEVT'])
deg2=sc.get.rank_genes_groups_df(adata,group=['eEVT'])

df=pd.DataFrame(index=evt.var_names)
df[['padj_pEVT','scores_PvsI']]=deg1.loc[df.index,['pvals_adj','scores']]
df[['padj_eEVT','scores_EvsI']]=deg2.loc[df.index,['pvals_adj','scores']]

genestolabel=['PGF','SEMA4B','ELMO1','PLXDC2','FN1',
      "SLIT2","BMP5",'TCF4',"GATA4","ZEB2",
     "DOCK5","DOCK1","MAP4K4","GRK5","ACTN1","ITGA1","VAV2",
     "ITGB1","ANXA1","IFI6","HLA-G","TIMP2","TIMP3",'GRB2','AOC1','B2M','PRG2','SERPINE1','SERPINE2','GJA1',
     "CD63",'CD59','FTL','KRT8','IGF2','SEMA5A','HTRA1','HES2']
df['group']='genome'
df.loc[df['score_pEVT'].gt(20),'group']='pEVT'
df.loc[df['score_eEVT'].gt(20),'group']='eEVT'
df.loc[(df['score_eEVT'].lt(-20))&(df['score_pEVT'].lt(-20)),'group']='iEVT'

palette=['springgreen','dodgerblue','scarlet','lightgrey']
plot_scatter(data=df,x="scores_PvsI",y='scores_EvsI',adjust=True,degs=genestolabel,group="group",palette=sns.color_palette(palette))
plt.ylim((-60,60))
plt.xlim((-20,40))


In [None]:
## 04 Scoring Pseudtovascularization of EVTs
import pandas as pd
import numpy as np
from pygam import s, LinearGAM
import matplotlib.pyplot as plt
import matplotlib
from scipy.stats import zscore

plt.rcParams['pdf.fonttype']=42

evt=adata[adata.obs.subclass=='EVT',:] ## subset EVTs from STOMICS
distances=pd.read_table("/path/to/distance_bv.tsv",sep="\t",index_col=0,header=0)  ## distance computed on STOMICS
distances=distances.loc[[i for i in evt.obs_names if i in distances.index]]
degs=pd.read_csv("/path/to/marker_mVEC.tsv",sep="\t",index_col=0) ## load mVEC markers from snRNA-seq 
degs = degs.sort_values(by="scores", ascending=False).head(100).index
degs=[i for i in degs if 'RPS' not in i and 'RPL' not in i and "MT-" not in i and i in evt.var_names] ## remove ribosomal and mitochondria genes

#genes=['PECAM1','MMP1','CDH5','CLEC14A','VWF','CD34','MCAM','THSD1']
sc.tl.score_genes(evt,genes,score_name='aEC_score')
distances['aEC_scaled_score']=zscore(evt.obs['aEC_score'],ddof=1)
bins = list(range(0, 301, 25))
distances['distance_bin'] = pd.cut(distances['signed_distance'], bins)
distances=distances.dropna()

sns.boxplot(distances,x='distance_bin',y='aEC_scaled_score',fliersize=0)


X,y=distances['signed_distance'],distances['aEC_scaled_score']
gam = LinearGAM(s(0, n_splines=10, lam=5)).fit(X, y)
predictions = gam.predict(np.linspace(0,300,50))
plt.plot(mp.linspace(0,300,50), predictions, color='red')
confint = gam.confidence_intervals(XX)
plt.fill_between(XX, confint[:, 0], confint[:, 1], alpha=0.3, label='95% CI')
plt.ylim(-2,1.5)

In [None]:
## 05 Invasive score modeling

import pandas as pd
import numpy as np
from sklearn import train_test_split
from sklearn.linear_model import LassoCV, Lasso
from sklearn.preprocessing import StandardScaler
import pickle
import scipy.stats as ss
## data preprocessing
df_interface=pd.read_table("/path/to/distance_interface.tsv",sep="\t",index_col=0,header=0)

expr_df = pd.DataFrame(evt.X.toarray(),index=evt.obs_names,columns=evt.var_names)
genes=evt.var_names
cell_id=evt.obs.loc[evt.obs.celltype.isin(['EVT','iEVT','EVTpro']),:].index
masked=df_interface.loc[(df_interface['signed_distance_interface']>0),:].index
cell_id=np.intersect1d(cell_id,masked)

train_cells, test_cells = train_test_split(cell_id,test_size=0.5,random_state=42)
train_df,test_df=expr_df.loc[train_cells],expr_df.loc[test_cells]
train_df['distance_to_interface']=df_interface.loc[train_cells,'distance_to_interface']
train_df=train_df.sort_values(by='distance_to_interface', ascending=True).reset_index(drop=True)
N=10
train_df['group']=train_df.index//N
train_mean = train_df.groupby('group')[genes+['distance_to_interface']].mean(numeric_only=True)
## restrict to genes highly expressed in EVTs
mks=pd.read_table("/path/to/EVT_marker.tsv",sep="\t",header=0)
candidates=mks.loc[(mks.scores.gt(2)),'names'].values
candidates=[i for i in candidates if not (i.startswith('RPL') | i.startswith('MT-') | i.startswith('RPS')) and i in genes]

## Feature selection by LASSO
X = StandardScaler().fit_transform(train_mean[candidates])
y=train_mean['distance_to_interface'].values
lasso = Lasso(alpha=50).fit(X,y)
coef_lasso = pd.Series(lasso.coef_, index=candidates).sort_values(ascending=True)
print((coef_lasso!=0).sum())
with open("model.pkl", "rb") as f:
    pickle.dump(lasso,f)

## Model Validation 
X_test=StandardScaler().fit_transform(test_df[candidates])
y_pred=lasso.predict(X_test)
print(ss.spearmanr(df_interface.loc[test_cells,'signed_distance_interface'].values,y_pred)

## Invasive Scoring by snRNA-seq 

adata=sc.read("/path/to/trophoblast.h5ad")
snRNA_ad=adata[adata.obs.major_class=='EVT',:]
snRNA_input = pd.DataFrame(snRNA_ad.X.toarray(),index=snRNA_ad.obs_names,columns=snRNA_ad.var_names)
snRNA_input = StandardScaler().fit_transform(snRNA_input[candidates])
pred=pd.DataFrame(lasso.predict(snRNA_input),index=snRNA_ad.obs_names,columns='raw_pred')
pred['iScore']=(pred['raw_pred'] - pred['raw_pred'].mean()) / pred['raw_pred'].std()
## visualization
snRNA_ad.obs['iScore']=pred['iScore']
sns.boxplot(data=snRNA_ad.obs, x='subtype',y='iScore', hue='subtype',fliersize=0)
sc.pl.umap(snRNA_ad,colour=['iScore'],cmap='viridis')

