In [None]:
import os
import scipy
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import anndata as ad
from IPython.core.display import display, HTML
import seaborn as sns
display(HTML("<style>div.output_scroll { height: 50em; }</style>"))
display(HTML("<style>.container { width:80% !important; }</style>"))
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 200)
np.set_printoptions(suppress=True)
scv.settings.verbosity = 3
scv.settings.presenter_view = True
scv.set_figure_params('scanpy')
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 200)
np.set_printoptions(suppress=True)
sc.set_figure_params(dpi=100, color_map = 'viridis_r')
sc.settings.verbosity = 1
plt.rcParams['pdf.fonttype']=42

In [None]:
wdir="/path/to/h5ad/"
os.chdir(wdir)
adata=sc.read("scPlacenta.h5ad")
tp=adata[adata.obs.subclass.isin(['VCT',"EVT","SCT"]),:]
dsc=adata[adata.obs.subclass.isin(['DSC']),:]
endo=adata[(adata.obs.subclass=='vEC')&(adata.obs.Origin!="Fetal"),:]



In [None]:
## Scrublet
files=os.listdir("/path/to/cellranger/")
x=list()
for i in files:
    filtered= dir+i+"/outs/filtered_feature_bc_matrix.h5"
    c_filtered=sc.read_10x_h5(filtered, gex_only=True, backup_url=None)
    exp=c_filtered.n_obs/1000*0.4/100
       
    print('Sample {} Number of cells in cell list: {}'.format(i, c_filtered.n_obs))
    print('Sample {} Number of genes in gene list: {}'.format(i, c_filtered.n_vars))
    sc.external.pp.scrublet(c_filtered,expected_doublet_rate=exp,log_transform=True,n_prin_comps=30)
    score_list.append(c_filtered.obs['doublet_score'])
    dub_list.append(c_filtered.obs['predicted_doublet'])
    id_list.append(c_filtered.obs.index)

In [None]:
## dotplot
target_gene=['TP63','TENM3','HLA-G','AOC1','ERVW-1','CYP19A1','DKK1','LUM',"COL3A1","BMP5","ACTA2","AFF2",
"POU5F1","PAEP","CDH5","PECAM1",'IGHM','BCL11A',"CD3D","CD3G","GNLY","NKG7","CD14","MS4A7",'HLA-DRA','CD74',
"HBA1","HBB"]
categories_order=['VCT',"EVT","SCT","STM","FB","PV","Epi","Endo","B","T","dNK","M","HB","cDC","Erythrocyte"]
sc.pl.dotplot(adata,groupby='subclass',var_names=target_gene,standard_scale='var',cmap='coolwarm',
              categories_order=categories_order,dot_max=0.6,use_raw=False)

## piechart 1
def func(pct, allvals):
    absolute = int(np.round(pct/100.*np.sum(allvals)))
    return "{:.1f}%".format(pct)
x=adata.obs.loc[adata.obs.subclass=="FB","Origin"].value_counts()
plt.pie(x,labels=x.index,autopct=lambda pct: func(pct, x),colors=['dodgerblue','deeppink','grey'])

## piechart 2
stages=adata.obs.stage.unique()
pal=sns.color_palette('tab20', 15)
fig, axs = plt.subplots(ncols=len(stages), nrows=1, figsize=(25,5))
pal=pal.as_hex()
order=adata.obs.subclass.value_counts().index.astype('str')
for col,j in enumerate(stages):
    fig.add_subplot(axs[col])
    df=adata.obs.loc[adata.obs.stage==j,'subclass'].value_counts().loc[order]
    wedges=axs[col].pie(df,labels=df.index,colors=pal)
    plt.axis('off')

In [None]:
##motif activities
chromvar=sc.read("../multiome_placenta_processed/rds/20K_chromvar.h5ad")
chromvar=chromvar[adata.obs.index,:]
chromvar.obs=adata.obs
scv.set_figure_params('scanpy')
sc.tl.rank_genes_groups(chromvar, groupby='subclass',method='wilcoxon')

motif_ids=pd.read_csv(".chromvar.motif.names.txt",sep="\t",header=0,index_col=2)
chromvar.var=motif_ids
chromvar.var['MA']=chromvar.var.index
chromvar.var.index=chromvar.var.jaspar2020
categories_order=['VCT',"EVT","SCT","STM","FB","PV","Epi","Endo","B","T","dNK","M","HB","cDC","Erythrocyte"]
TFs=['TP63','TEAD3',"TEAD1",'GATA3',"GCM1","TFAP2B","TFAP2A",
     "HAND2","HOXA10","EBF1","HNF1B","HOXB4","ETV1","IRF4","IKZF1","RUNX2","ETS1",'RUNX2',"SPI1","ELF1","ETV6",
     "GATA1::TAL1"]
sc.pl.matrixplot(chromvar, TFs, categories_order=categories_order,groupby='subclass',dendrogram=False,
              cmap="bwr",standard_scale='var')


In [None]:
## UMAP
sc.pp.neighbors(tp, n_neighbors=40, use_rep='X_pca',n_pcs=50)
sc.tl.umap(tp,n_components=2,min_dist=0.3, spread=1.0)
n=len(tp.obs.subtype.unique())
sc.pl.embedding(tp,color='subtype',basis="X_umap",palette=sns.color_palette("tab20c",n),
                legend_loc='on data',legend_fontsize=8)

## DEGs EVT vs VCT
sc.tl.rank_genes_groups(tp,groupby='subclass',groups=['EVT'],ref=['VCT'],method='wilcoxon')
sc.get.rank_genes_groups_df(tp,group=['EVT']).to_csv("./evt_vs_vct.txt",sep="\t",index=False)

In [None]:
##Volcano plot1 
from adjustText import adjust_text
df1=pd.read_table("EvsI.tsv",sep="\t",header=0,index_col=0).iloc[:,-5:]
df2=pd.read_table("PvsI.tsv",sep="\t",header=0,index_col=0).iloc[:,-5:]
df1.columns=['pvals_adj','scores','lfc','pts1','pts2']
df2.columns=['pvals_adj','scores','lfc','pts1','pts2']
df1['Gene name']=df1.index
df2['Gene name']=df2.index
df2.scores=df2.scores*-1
colors=["silver","dodgerblue",'firebrick','lime']
df=pd.concat((df1.scores,df2.scores),1)
df.columns=['scores_EvsI','scores_PvsI']
df['group']="BG"
df['Gene name']=df.index
df.loc[df.scores_EvsI.ge(20),'group']='eEVT'
df.loc[df.scores_PvsI.ge(20),'group']='pEVT'
df.loc[(df.scores_EvsI.le(-20)&df.scores_PvsI.le(-5)),'group']='iEVT'
degs=['PGF','SEMA4B','ELMO1','PLXDC2','FN1',
      "SLIT2","BMP5",'TCF4',"GATA4","ZEB2",
     "DOCK5","DOCK1","MAP4K4","GRK5","ACTN1","ITGA1","VAV2",
     "ITGB1","ANXA1","IFI6","HLA-G","TIMP2","TIMP3",'GRB2','AOC1','B2M','PRG2','SERPINE1','SERPINE2','GJA1',
     "CD63",'CD59','FTL','KRT8','IGF2','SEMA5A','HTRA1','HES2']
def plot_scatter(data,x,y,group,degs,adjust=True,**kwargs):
    ax=plt.figure(figsize=(10,10))
    ax=sns.scatterplot(data=data,x=x,y=y,hue=group,size =2, linewidth=0, **kwargs)
    labeled_x,labeled_y=data.loc[degs,x],data.loc[degs,y]
    texts = [plt.text(labeled_x[i], labeled_y[i], degs[i], fontsize='medium') for i in range(len(labeled_x))]
    if adjust:
        adjust_text(texts, labeled_x.values, labeled_y.values, arrowprops=dict(arrowstyle='-', color='black'),force_text=(2,2),expand_text=(0.5,0.5))
    plt.xlabel('scaled expression')                   # Set x-axis label
    plt.ylabel('Log2FC')
    return ax
plot_scatter(data=df,x="scores_PvsI",y='scores_EvsI',adjust=True,degs=degs,group="group",palette=sns.color_palette(colors))
plt.ylim((-10,10))
plt.show()

In [None]:
## palantir
from cellrank.tl.kernels import CytoTRACEKernel,ConnectivityKernel
from cellrank.tl.estimators import GPCCA
ck = ConnectivityKernel(tp)
ck.compute_transition_matrix()
g_pv = GPCCA(ck)
g_pv.compute_schur(n_components=20)
g_pv.plot_spectrum(real_only=True)
g_pv.compute_macrostates(n_states=10, cluster_key="subtype")
g_pv.plot_macrostates(discrete=True, legend_loc="right", size=100, basis="X_umap")
g_pv.set_terminal_states_from_macrostates(['eEVT','SCT_b','iEVT',"SCT_a"])
g_pv._set_initial_states_from_macrostates("VCT_0")
g_pv.compute_absorption_probabilities()
g_pv.plot_absorption_probabilities(same_plot=False)

root_cell=tp.obs.initial_states_probs.argmax()
root_cell=tp.obs.index[root_cell]

import scanpy.external as sce
import palantir
sce.tl.palantir(tp, n_components=20, knn=20)
pr_res = sce.tl.palantir_results(
    tp,
    early_cell=root_cell,
    ms_data='X_palantir_multiscale',
    num_waypoints=500,
)
pr_res.branch_probs.columns = tp.obs.loc[pr_res.branch_probs.columns,'subtype']
tp.obs['palantir_pseudotime']=pr_res.pseudotime

##stream plot
import math
from streamutils import *
time_key="palantir_pseudotime"
a=Gradient_calculator(adata=tp,obsm_key='X_umap',pseudotime_key=time_key)
plt.rcParams['savefig.bbox'] = 'tight'
a.calculate_p_mass(n_grid=20)
a.suggest_mass_thresholds()
a.calculate_mass_filter(min_mass=10,plot=True)
plt.style.use("seaborn")
a.transfer_data_into_grid()
a.calculate_gradient()
a.plot_reference_flow_on_grid(density=5)
a.plot_dev_flow_on_grid()
a.visualize_results(plot_type="stream",s=11,cmap="rainbow")

In [None]:
##TF dotplot
tp2=tp[tp.obs.new_subtype.isin(['EVT-pro',"eEVT","iEVT",'pEVT'])]
markers=["EPAS1","MBD2","GATA3",'STAT1','ZNF486',"FOXO1",
         "CREB5",'GCM1','MBNL2',"PPARG",'SMAD3',
         "SOX4","FOXJ3","TCF7L2","TEAD1","ZNF704","FOS","ASCL2","CREB5",'FOXN3','GLIS3']
pal=sns.color_palette('Spectral', 20)
sc.pl.dotplot(tp2,var_names=markers,cmap='rainbow',groupby='new_subtype',dendrogram=False,standard_scale='var',
              categories_order=["eEVT",'iEVT','pEVT',"EVT_pro"],
              swap_axes=True)

In [None]:
# sampling cells for celloracle
from geosketch import gs
N = 5000 
X_dimred=tp.obsm['X_pca']
sketch_index = gs(X_dimred, N, replace=False)
X_sketch = X_dimred[sketch_index]

In [None]:
from cellrank.tl.estimators import GPCCA
sc.pp.neighbors(dsc,n_neighbors=40, n_pcs=50, use_rep='X_pca')
dsc.obs['Days']=dsc.obs['GW']*7.0
wk = WOTKernel(dsc, time_key="Days")
wk.compute_initial_growth_rates(organism="human", key_added="growth_rate_init")
scv.pl.scatter(dsc, c="growth_rate_init", legend_loc="right", basis="X_umap", s=10)
wk.compute_transition_matrix(growth_iters=3, growth_rate_key="growth_rate_init", last_time_point="connectivities")
groups=['DSC0','DSC1','DSC2','DSC3','DSC4']
ax = wk.plot_single_flow(cluster_key="subtype",cluster="DSC0",clusters=groups,time_key="Days",min_flow=0,
                         xticks_step_size=4,show=False,dpi=100)
locs, labels = plt.xticks()
ax.set_xticks(locs)
ax.set_xticklabels(labels, rotation=90)

In [None]:
## palantir
from cellrank.tl.kernels import CytoTRACEKernel,ConnectivityKernel
from cellrank.tl.estimators import GPCCA
ck = ConnectivityKernel(dsc)
ck.compute_transition_matrix()
g_pv = GPCCA(ck)
g_pv.compute_schur(n_components=20)
g_pv.plot_spectrum(real_only=True)
g_pv.compute_macrostates(n_states=10, cluster_key="subtype")
g_pv.plot_macrostates(discrete=True, legend_loc="right", size=100, basis="X_umap")
g_pv.set_terminal_states_from_macrostates(['eEVT','SCT_b','iEVT',"SCT_a"])
g_pv._set_initial_states_from_macrostates("VCT_0")
g_pv.compute_absorption_probabilities()
g_pv.plot_absorption_probabilities(same_plot=False)

root_cell=dsc.obs.initial_states_probs.argmax()
root_cell=dsc.obs.index[root_cell]

import scanpy.external as sce
import palantir
sce.tl.palantir(dsc, n_components=20, knn=20)
pr_res = sce.tl.palantir_results(
    dsc,
    early_cell=root_cell,
    ms_data='X_palantir_multiscale',
    num_waypoints=500,
)
pr_res.branch_probs.columns = dsc.obs.loc[pr_res.branch_probs.columns,'subtype']
dsc.obs['palantir_pseudotime']=pr_res.pseudotime

##stream plot
import math
from streamutils import *
time_key="palantir_pseudotime"
a=Gradient_calculator(adata=dsc,obsm_key='X_umap',pseudotime_key=time_key)
plt.rcParams['savefig.bbox'] = 'tight'
a.calculate_p_mass(n_grid=20)
a.suggest_mass_thresholds()
a.calculate_mass_filter(min_mass=10,plot=True)
plt.style.use("seaborn")
a.transfer_data_into_grid()
a.calculate_gradient()
a.plot_reference_flow_on_grid(density=5)
a.plot_dev_flow_on_grid()
a.visualize_results(plot_type="stream",s=11,cmap="rainbow")

In [None]:
#Volcano plot2
import seaborn as sns
degs=['IGFBP1','LGR4','TIMP3','SEMA3A','DOCK1','PRLR',"PRL",'DOCK4','SAT1',"ITGA1","ITGB1","THSD1",'LUM',
     "VIM","B2M",'CD63','TIMP1','TIMP2','CD81','IGFBP2','HLA-C','HLA-A','HLA-B','CD248','TPM2','PPIA','S100A11',
     'CD28','TPT1','SERPINF1','PPIB','C1R','OAZ1','CD59','IGFBP5','GJA1','IGFBP4']
ax = sns.scatterplot(x="basemean", # Horizontal axis
            y='lfc', # Vertical axis
           data=df, # Data source # Don't fix a regression line
           size = 1,hue='Group',alpha=0.7,palette='coolwarm_r',linewidth=0,hue_order=["DSC2","BG","DSC3"]), # size and dimension
# Set x-axis label
plt.xlabel('Mean DSC')
# Set y-axis label
plt.ylabel('Log2FC(Path A vs B)')
def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']),size=8)
label_point(df.loc[degs,'basemean_dec'], df.loc[degs,'lfc'], df.loc[degs,'Gene name'], plt.gca())
plt.ylim((-5,5))
plt.show()

In [None]:
## subcluster in DSC3
dsc3=dsc[dsc.obs.new_cluster=="DSC3",:]
sc.tl.louvain(dsc3,resolution=0.1)
dec31=dsc.obs.loc[dec_sub.obs.louvain=="0",:].index
dec32=dsc.obs.loc[dec_sub.obs.louvain=="1",:].index
dsc.obs['subclusters']=dsc.obs['subtype'].astype('str')
dsc.obs.loc[dec31,"subclusters"]="DSC3_1"
dsc.obs.loc[dec32,"subclusters"]="DSC3_2"
sc.tl.rank_genes_groups(dec,groupby="subclusters",groups=["DSC3_1","DSC3_2"],
                        method="wilcoxon",reference="DSC3_1",n_genes=30)
sc.pl.embedding(dsc,color=['TIMP1','SERPINF1','HSPB1','DCN','HLA-A','HLA-B','HLA-C','B2M','CD81','C1S','C1R','C3',
            'PPIA','UBB','UBC','APOE'],basis="X_umap",cmap='viridis_r',vmin=0.5,frameon=False,palette='Set1')
sc.pl.embedding(dsc,color="subclusters",basis="X_umap",groups=['DSC3_1','DSC3_2'],
                palette='Set1')