## **Inferring neural progenitor states during indirect neurogenesis from single cell data - scFates** 

(Trevino et al 2021 dataset)

In [None]:
pwd

### **Loading modules and settings**

In [None]:
import sys
#!{sys.executable} -m pip -q install palantir fa2
import warnings
warnings.filterwarnings("ignore")
from anndata import AnnData
import numpy as np
import pandas as pd
import scanpy as sc
import scFates as scf
import palantir
import matplotlib.pyplot as plt
sc.settings.verbosity = 3
sc.settings.logfile = sys.stdout
## fix palantir breaking down some plots
import seaborn 
seaborn.reset_orig()
%matplotlib inline

sc.set_figure_params()
scf.set_figure_pubready()


from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

In [None]:
from pathlib import Path
Path("/home/jovyan/jm_jlab/data_indNeuro/0.Figures_scRNA-seq_processing/").mkdir(parents=True, exist_ok=True)

In [None]:
figures_path = "/home/jovyan/jm_jlab/data_indNeuro/0.Figures_scRNA-seq_processing/"

### **Loading data with Seurat PCA coordinates and normalization**

In [None]:
adata = sc.read_h5ad("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/rna_counts.h5ad")

adata.layers['raw_counts'] = adata.X.copy()
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.normalize_per_cell(adata, key_n_counts='n_counts_all')
sc.pp.highly_variable_genes(adata, n_top_genes=4000, flavor='seurat_v3') #let's retain more genes before further filtering
print(sc.pl.highly_variable_genes(adata))

#adata=adata[:,adata.var.highly_variable]
#Renormalized after filtering
#sc.pp.normalize_per_cell(adata, key_n_counts='n_counts_all')

# keep raw count data before log transformation
adata.raw = adata
adata.layers["raw_count"] = adata.raw.X.copy()

# Log transformation
sc.pp.log1p(adata)

cellembeddings = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/cellembeddings.tsv", sep='\t')
cellembeddings.iloc[:,1] = cellembeddings.iloc[:,1]*-1 #for better geometry
adata.obsm['X_pca'] = cellembeddings.to_numpy() #sharing embeddings

pca_projections = pd.DataFrame(adata.obsm["X_pca"],index=adata.obs_names)

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.highly_variable_genes(adata[:,adata.var['means'] < 50])

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.pca(adata, color='Cluster.Name')

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.pca(adata, color='Batch', alpha=1,  save="SF1_pca_batches.png")

In [None]:
adata.obs['Batch'].value_counts()

**Coarse clustering**

In [None]:
sc.pp.neighbors(adata, n_pcs=2, n_neighbors=75, method='gauss') #Also necessary for later force-directed graph computation

sc.tl.leiden(adata, resolution=0.1) # for coarse clustering

adata.uns['leiden_colors'] = ['#279e68', '#d62728', '#ff7f0e'] # for match with scFates milestones later

sc.pl.pca(adata, color=['leiden', 'EGR1', 'HOPX', 'EOMES'])

In [None]:
import seaborn as sns

In [None]:
sns.color_palette("colorblind")

In [None]:
sns.color_palette("colorblind",4).as_hex()

In [None]:
adata.uns['leiden_colors'] = [sns.color_palette("colorblind",4).as_hex()[2], sns.color_palette("colorblind",4).as_hex()[3], sns.color_palette("colorblind",4).as_hex()[1]] # for match with scFates milestones later

sc.pl.pca(adata, color=['leiden', 'EGR1', 'HOPX', 'EOMES'])

In [None]:
sc.pl.pca(adata, color=['leiden'], save='SF1_CoarseClustering.png')

**DEG - Logistic regression**

In [None]:
# Logistic regression
sc.set_figure_params(figsize=(4, 4),dpi_save=300)

sc.tl.rank_genes_groups(adata, 'leiden', method='logreg', key_added = "logreg")

In [None]:
pwd

In [None]:
sc.pl.rank_genes_groups(adata,key='logreg', n_genes=10, save="SF1_DEG_logistic.png")

**DEG - Wilcoxon**

In [None]:
sc.set_figure_params(figsize=(4, 4))

sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon', key_added = "wilcoxon")
sc.pl.rank_genes_groups(adata, n_genes=10, key= 'wilcoxon', sharey=False)

**Renaming leiden clustering**

In [None]:
new_cluster_names = [
    'oRG', 'vRG',
    'IP']
adata.rename_categories('leiden', new_cluster_names)

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.pca(adata, color='leiden', legend_loc='right margin', title='', frameon=True)

### **Computing force-directed graph**

In [None]:
adata.obsm["X_pca2d"]=adata.obsm["X_pca"][:,:2]

sc.tl.draw_graph(adata,init_pos='X_pca2d')

sc.pl.draw_graph(adata, color='Cluster.Name') #Trevino et al 2021 clustering

### **Run Palantir to obtain multiscale diffusion space**

In [None]:
dm_res = palantir.utils.run_diffusion_maps(pca_projections.iloc[:,0:2])
ms_data = palantir.utils.determine_multiscale_space(dm_res,n_eigs=4)
adata.obsm["X_palantir"]=ms_data.values

## **Tree learning with EPG** - Exploration

### **Selection of number nodes 25**

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.tl.tree(adata,method="epg",Nodes=25,use_rep="palantir",
            device="cpu",seed=1)
scf.pl.graph(adata)

#### Projecting into PCA dim red

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.pl.graph(adata, basis="pca")

### **Selecting a root using FOS expression and computing pseudotime**

In [None]:
adata

In [None]:
scf.tl.root(adata,"FOS")

scf.tl.convert_to_soft(adata,1,1000) #soft assignment

scf.tl.pseudotime(adata,n_jobs=16,n_map=100,seed=42)

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.pl.trajectory(adata)

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.draw_graph(adata,color=["seg","milestones"]) 

In [None]:
scf.tl.rename_milestones(adata,["transient","IPC","oRG","vRG"])

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.draw_graph(adata,color=["seg","milestones"])

In [None]:
#Single branch
root_to_oRG=scf.tl.subset_tree(adata,root_milestone="vRG",milestones=["oRG"],copy=True)
#
root_to_IPC=scf.tl.subset_tree(adata,root_milestone="vRG",milestones=["IPC"],copy=True)

### **Dendrogram representation**

In [None]:
scf.tl.dendrogram(adata)

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.pl.dendrogram(adata,color="milestones")

In [None]:
scf.pl.dendrogram(adata,color="milestones",legend_loc="right margin",color_milestones=True,legend_fontoutline=True, frameon=False)

In [None]:
sc.set_figure_params(figsize=(1.5,4),frameon=False,dpi_save=300)
scf.pl.dendrogram(adata,color="t",show_info=False,cmap="viridis", save="_pseudotimeDendrogram")
scf.pl.dendrogram(adata,color="milestones",legend_loc="on data",color_milestones=True,legend_fontoutline=True, save="_milestoensDendrogram")
scf.pl.dendrogram(adata,color="seg",legend_loc="on data",color_milestones=True,legend_fontoutline=True, save="_segmentsDendrogram")

## Test and fit features associated with the tree

Let's find out which genes are significantly changing along the tree.

In [None]:
scf.tl.test_association(adata,n_jobs=16, A_cut=0.8)

In [None]:
scf.pl.test_association(adata)

In [None]:
adata.var[adata.var['signi'] == True].index #significant genes n=233

In [None]:
scf.tl.fit(adata,n_jobs=16)

In [None]:
sc.set_figure_params()

sc.pl.draw_graph(adata,color=["seg"],legend_loc="on data",show=False,legend_fontoutline=True)

In [None]:
adata.obs['segmentation'] = np.select([adata.obs['seg'] == '1', adata.obs['seg'] == '3'],
                            ['s_IPC',  's_vRG'], 
                            default='s_oRG')

In [None]:
!ls /home/jovyan/jm_jlab/data_indNeuro/scFates_output

### **Saving for later analysis**

#### Plotting specific markers

In [None]:
pd.DataFrame(adata.var_names).to_csv("/home/jovyan/jm_jlab/data_indNeuro/scFates_test_gene_tree.tsv", sep='\t', index=False, header=None)

In [None]:
sc.set_figure_params(figsize=(.8,4),frameon=False)
scf.set_figure_pubready()
scf.pl.single_trend(adata,"KLF6",basis="dendro",wspace=-.25)

In [None]:
sc.set_figure_params(figsize=(5,3),frameon=False,dpi=300)
scf.set_figure_pubready()
scf.pl.single_trend(adata,"KLF6",basis="dendro",wspace=-.25, plot_emb=False, save="KLF6_t.png")

In [None]:
sc.set_figure_params(figsize=(.8,4),frameon=False)
scf.set_figure_pubready()
scf.pl.single_trend(adata,"EGR1",basis="dendro",wspace=-.25)
scf.pl.single_trend(adata,"HOPX",basis="dendro",wspace=-.25)
scf.pl.single_trend(adata,"EOMES",basis="dendro",wspace=-.25)
scf.pl.single_trend(adata,"GLI3",basis="dendro",wspace=-.25)

In [None]:
adata.layers['fitted'].shape

In [None]:
plot_fig1_data = adata[:,adata.var['highly_variable']].copy()

In [None]:
plot_fig1_data2 = plot_fig1_data[:,plot_fig1_data.var.sort_values(by='highly_variable_rank').head(20).index].copy()

In [None]:
plot_fig1_df =  pd.DataFrame(np.log1p(plot_fig1_data2.layers['raw_counts'].todense()), columns=plot_fig1_data2.var_names, index=plot_fig1_data2.obs['t']).sort_index()

In [None]:
g = sns.clustermap(plot_fig1_df.transpose(), figsize=(5, 15), row_cluster=True,col_cluster=False, xticklabels=False, dendrogram_ratio=(.2, .3))
ax = g.ax_heatmap
ax.set_xlabel("pseudotime")
ax.set_ylabel("")
plt.savefig("/home/jovyan/jm_jlab/test.png",dpi=300)

## **Final plots**

In [None]:
figures_path

In [None]:
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

sc.set_figure_params(figsize=(20, 11),dpi_save=300)

fig=plt.figure()

gs=GridSpec(4,6)


ax0=fig.add_subplot(gs[0,0:1])
ax1=fig.add_subplot(gs[1:3,0:2])
ax2=fig.add_subplot(gs[0,1:2])
ax3=fig.add_subplot(gs[3:4,0:1])
ax33=fig.add_subplot(gs[3:4,1:2])

ax20=fig.add_subplot(gs[0,2:3]) 
ax21=fig.add_subplot(gs[1,2:3]) 
ax22=fig.add_subplot(gs[2,2:3]) 

ax5=fig.add_subplot(gs[0,3:6])
#ax5.set_title('ax1 title')

ax6=fig.add_subplot(gs[1,3:6])
ax7=fig.add_subplot(gs[2,3:6])

# PCA plot

sc.pl.pca(adata, color=["Age"],  frameon=True, ax=ax0, show=False, title="", legend_loc=None)

scf.pl.dendrogram(adata,color="milestones",ax=ax2, legend_loc="on data", frameon=False, show=False, color_milestones=True,legend_fontoutline=True, title="")


scf.pl.graph(adata,basis="draw_graph_fa", ax=ax1,
             color_cells=['t'], forks=False, tips=False, show=False, size_nodes=75 )

ax3.axis('off')
ax33.axis('off')


sc.pl.draw_graph(adata, color='EGR1', ax=ax20, show=False, title="") 
sc.pl.draw_graph(adata, color='HOPX', ax=ax21, show=False, title="") 
sc.pl.draw_graph(adata, color='EOMES', ax=ax22, show=False, title="") 


scf.pl.single_trend(adata,"EGR1",basis="dendro", ax_trend=ax5, wspace=-.25, ylab=None, title=False, plot_emb=False, show=False)
scf.pl.single_trend(adata,"HOPX",basis="dendro", ax_trend=ax6, wspace=-.25, ylab=None, title=True, plot_emb=False, show=False)
scf.pl.single_trend(adata,"EOMES",basis="dendro", ax_trend=ax7, wspace=-.25, ylab=None, title=True, plot_emb=False, show=False)

ax5.set_xlabel('')
ax6.set_xlabel('')
ax6.set_xlabel('')

#fig.savefig(figures_path+'Fig1.pdf',dpi=300)

In [None]:
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

sc.set_figure_params(figsize=(20, 11),dpi_save=300)

fig=plt.figure()

gs=GridSpec(3,6)


ax0=fig.add_subplot(gs[0,0:1])
ax1=fig.add_subplot(gs[1:2,0:1])
ax2=fig.add_subplot(gs[0,1:2])
ax3=fig.add_subplot(gs[2:3,0:1])
ax33=fig.add_subplot(gs[2:3,1:2])

#ax20=fig.add_subplot(gs[0,2:3]) 
#ax21=fig.add_subplot(gs[1,2:3]) 
#ax22=fig.add_subplot(gs[2,2:3]) 

ax5=fig.add_subplot(gs[0,2:4])
#ax5.set_title('ax1 title')

ax6=fig.add_subplot(gs[1,2:4])
ax7=fig.add_subplot(gs[2,2:4])


# Space for summary strategy
ax0.axis('off')
ax2.axis('off')

# PCA plot
sc.pl.pca(adata, color=["Age"],  frameon=True, ax=ax1, show=False, title="", legend_loc=None)


# Bottom left
scf.pl.graph(adata,basis="draw_graph_fa", ax=ax3, forks=False, tips=False, show=False)

scf.pl.dendrogram(adata,color="milestones",ax=ax33, legend_loc="on data", frameon=False, show=False, color_milestones=False,legend_fontoutline=False, title="")


#ax20.axis('off')
#ax21.axis('off')
#ax22.axis('off')


scf.pl.single_trend(adata,"EGR1",basis="dendro", ax_trend=ax5, wspace=-.25, ylab=None, title=False, plot_emb=False, show=False)
scf.pl.single_trend(adata,"HOPX",basis="dendro", ax_trend=ax6, wspace=-.25, ylab=None, title=True, plot_emb=False, show=False)
scf.pl.single_trend(adata,"EOMES",basis="dendro", ax_trend=ax7, wspace=-.25, ylab=None, title=True, plot_emb=False, show=False)

ax5.set_xlabel('')
ax6.set_xlabel('')
ax6.set_xlabel('')

fig.tight_layout(pad=3.0)
 
fig.savefig(figures_path+'Fig1.pdf',dpi=300)