# *Polioudakis 2019 dataset:* Inferring neural progenitor states during indirect neurogenesis from single cell data - scFates

In [None]:
pwd

### **Loading modules and settings**

In [None]:
import sys
#!{sys.executable} -m pip -q install palantir fa2
import warnings
warnings.filterwarnings("ignore")
from anndata import AnnData
import numpy as np
import pandas as pd
import scanpy as sc
import scFates as scf
import palantir
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

sc.settings.verbosity = 3
sc.settings.logfile = sys.stdout
## fix palantir breaking down some plots
import seaborn 
seaborn.reset_orig()
%matplotlib inline

sc.set_figure_params()
scf.set_figure_pubready()

### **Loading data with Seurat PCA coordinates and normalization**

In [None]:
# JM
adata = sc.read_h5ad("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/pol19_counts.h5ad")

milestones = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/scFates_output/polioudakis19_scFates_clustering.tsv", sep='\t', index_col=0)

adata.obs = pd.concat([adata.obs, milestones], axis=1)

cellembeddings = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/integrated_polioudakis19_cellembeddings.tsv", sep='\t')
cellembeddings.iloc[:,0] = cellembeddings.iloc[:,0]*-1 #for consistent better geometry
cellembeddings.iloc[:,1] = cellembeddings.iloc[:,1]*-1 #for consistent better geometry
cellembeddings = cellembeddings[cellembeddings.index.str.contains("pol19")]
cellembeddings.index = pd.DataFrame(cellembeddings.index)[0].str.split("pol19_", expand=True)[1]
adata = adata[adata.obs.index.isin(cellembeddings.index),:].copy()
cellembeddings = cellembeddings[cellembeddings.index.isin(adata.obs.index)].copy()
cellembeddings = cellembeddings.reindex(adata.obs.index)

adata.obsm['X_pca'] = cellembeddings.to_numpy() #sharing embeddings
pca_projections = pd.DataFrame(adata.obsm["X_pca"],index=adata.obs_names)

In [None]:
adata.obs.drop(columns=['t'], inplace=True) #will give problems later on, remove now

In [None]:
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.normalize_per_cell(adata, key_n_counts='n_counts_all')
sc.pp.highly_variable_genes(adata, n_top_genes=4000, flavor='seurat_v3')

In [None]:
adata=adata[:,adata.var.highly_variable]

#Renormalized after filtering
sc.pp.normalize_per_cell(adata, key_n_counts='n_counts_all')

# keep raw cont data before log transformation
adata.raw = adata.copy()
adata.layers["raw_count"] = adata.raw.X.copy()

# Log transformation
sc.pp.log1p(adata)

In [None]:
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

sc.set_figure_params(figsize=(9,9),dpi_save=300)

fig=plt.figure()

gs=GridSpec(2,3) # 2 rows, 3 columns


ax1=fig.add_subplot(gs[0,0:2])
ax2=fig.add_subplot(gs[1,0]) 
ax3=fig.add_subplot(gs[1,1])
ax4=fig.add_subplot(gs[1,2])

sc.pl.pca(adata, color=["Gestation_week"], frameon=True, ax=ax1, show=False, title="")
sc.pl.pca(adata, color=["EGR1"], frameon=False, ax=ax2, show=False)
sc.pl.pca(adata, color=["HOPX"], frameon=False, ax=ax3, show=False)
sc.pl.pca(adata, color=["EOMES"], frameon=False, ax=ax4, show=False)


#fig.savefig('/home/jovyan/jm_jlab/data_indNeuro/tmp_figures/FigXt.pdf')

**Clustering form original publication**

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.pca(adata, color=['Cluster'])

In [None]:
from pathlib import Path
Path("/home/jovyan/jm_jlab/data_indNeuro/tmp_figures/").mkdir(parents=True, exist_ok=True)

### **Computing force-directed graph**

In [None]:
sc.pp.neighbors(adata, n_pcs=2, n_neighbors=50, method='gauss') #Also necessary for later force-directed graph computation

In [None]:
adata.obsm["X_pca2d"]=adata.obsm["X_pca"][:,:2]

sc.tl.draw_graph(adata,init_pos='X_pca2d')

sc.pl.draw_graph(adata, color='Cluster') #Polioudakis et al 2019 clustering

### **Run Palantir to obtain multiscale diffusion space**

In [None]:
dm_res = palantir.utils.run_diffusion_maps(pca_projections.iloc[:,0:2])
ms_data = palantir.utils.determine_multiscale_space(dm_res,n_eigs=4)
adata.obsm["X_palantir"]=ms_data.values

## Tree learning

## **Tree learning with EPG** - Exploration

## **Tree learning with EPG** - Selection

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.tl.tree(adata,method="epg",Nodes=50,use_rep="palantir",
            device="cpu",seed=1)
scf.pl.graph(adata)

#### Projecting into PCA dim red

In [None]:
adata

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.pl.graph(adata, basis="pca")

### Selecting a root and computing pseudotime

Using FOS marker

In [None]:
adata

In [None]:
adata.var_names[adata.var_names.isin(["FOS"])]

In [None]:
scf.tl.root(adata,"FOS")

scf.tl.convert_to_soft(adata,1,1000) #soft assignment

In [None]:
adata.obsm['X_R'].shape

In [None]:
adata.obsm['X_pca'].shape

In [None]:
adata.obsm['X_palantir'].shape

In [None]:
scf.pl.graph(adata)

In [None]:
scf.tl.pseudotime(adata, n_jobs=16, n_map=100, seed=42)

In [None]:
sc.set_figure_params(figsize=(4,4))

scf.pl.trajectory(adata)

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.draw_graph(adata,color=["seg","milestones"]) 

In [None]:
scf.tl.rename_milestones(adata,["transient","IPC","oRG","vRG"])

In [None]:
sc.set_figure_params(figsize=(4,4))

sc.pl.draw_graph(adata,color=["seg","milestones"])

**Highlight single-branch**

In [None]:
#Single branch
root_to_oRG=scf.tl.subset_tree(adata,root_milestone="vRG",milestones=["oRG"],copy=True)
#
root_to_IPC=scf.tl.subset_tree(adata,root_milestone="vRG",milestones=["IPC"],copy=True)

In [None]:
sc.set_figure_params(figsize=(4,4))

ax=sc.pl.scatter(adata,basis="draw_graph_fa",color="whitesmoke",show=False)
scf.pl.graph(root_to_oRG,basis="draw_graph_fa",size_nodes=.1,ax=ax)#, save="_tr21_oRGbranch.pdf")

In [None]:
sc.set_figure_params(figsize=(4,4))

ax=sc.pl.scatter(adata,basis="draw_graph_fa",color="whitesmoke",show=False)
scf.pl.graph(root_to_IPC,basis="draw_graph_fa",size_nodes=.1,ax=ax)#, save="_tr21_IPCbranch.pdf")

In [None]:
sc.set_figure_params()
fig, axs=plt.subplots(2,2,figsize=(8,8))
axs=axs.ravel()
scf.pl.graph(adata,basis="draw_graph_fa",show=False,ax=axs[0])
scf.pl.trajectory(adata,basis="draw_graph_fa",show=False,ax=axs[1])
sc.pl.draw_graph(adata,color=["seg"],legend_loc="on data",show=False,ax=axs[2],legend_fontoutline=True)
scf.pl.milestones(adata,ax=axs[3],show=False,annotate=True)

### **Dendrogram representation**

In [None]:
scf.tl.dendrogram(adata)

scf.pl.dendrogram(adata,color="seg")

In [None]:
scf.pl.dendrogram(adata,color="milestones")

In [None]:
pwd

In [None]:
sc.set_figure_params(figsize=(1.5,4),frameon=False,dpi_save=300)
scf.pl.dendrogram(adata,color="t",show_info=False,save="pol19_d1",cmap="viridis")
scf.pl.dendrogram(adata,color="milestones",legend_loc="on data",color_milestones=True,legend_fontoutline=True,save="pol19_d2")
scf.pl.dendrogram(adata,color="seg",legend_loc="on data",color_milestones=True,legend_fontoutline=True,save="pol19_d3")

## Test and fit features associated with the tree

Let's find out which genes are significantly changing along the tree.

In [None]:
scf.tl.test_association(adata,n_jobs=16, A_cut=0.5) #to compare same genes with Trevino 21, A_cut = 1 to high

In [None]:
sc.set_figure_params()
scf.pl.test_association(adata)
#plt.savefig("figures/C.pdf",dpi=300)

In [None]:
adata.var[adata.var['signi'] == True].index #significantly genes n=179

In [None]:
scf.tl.fit(adata,n_jobs=16)

In [None]:
sc.pl.pca(adata, color = ['Cluster', 'milestones', 'seg'])

In [None]:
adata.obs['segmentation'] = np.select([adata.obs['seg'] == '1', adata.obs['seg'] == '3'],
                            ['s_IPC',  's_vRG'], 
                            default='s_oRG')

### **Saving for later analysis**

#### Plotting specific markers

In [None]:
sc.set_figure_params(figsize=(.8,4),frameon=False)
scf.set_figure_pubready()
scf.pl.single_trend(adata,"EGR1",basis="dendro",wspace=-.25)#,save="_E1.pdf")
scf.pl.single_trend(adata,"HOPX",basis="dendro",wspace=-.25)#,save="_E2.pdf")
scf.pl.single_trend(adata,"EOMES",basis="dendro",wspace=-.25)#,save="_E3.pdf")

## **Final plot**

In [None]:
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

sc.set_figure_params(figsize=(22, 14),dpi_save=300)

fig=plt.figure()

gs=GridSpec(5,6)


ax0=fig.add_subplot(gs[0,0:1])
ax1=fig.add_subplot(gs[1:2,0:2])
ax11=fig.add_subplot(gs[2:3,0:2])
ax111=fig.add_subplot(gs[3:4,0:2])

ax2=fig.add_subplot(gs[0,1:2])
ax3=fig.add_subplot(gs[3:4,0:1])
ax33=fig.add_subplot(gs[3:4,1:2])

ax20=fig.add_subplot(gs[0,2:3]) 
ax21=fig.add_subplot(gs[1,2:3]) 
ax22=fig.add_subplot(gs[2,2:3]) 

ax5=fig.add_subplot(gs[0,3:6])
#ax5.set_title('ax1 title')

ax6=fig.add_subplot(gs[1,3:6])
ax7=fig.add_subplot(gs[2,3:6])

# PCA plot

scf.pl.milestones(adata,show=False,ax=ax0,annotate=True, title="")

scf.pl.graph(adata,basis="draw_graph_fa", ax=ax2, forks=False, tips=False, show=False)

scf.pl.single_trend(adata,"EGR1",basis="dendro", ax_trend=ax1, wspace=-.25, ylab=None, title=False, plot_emb=False, show=False)
scf.pl.single_trend(adata,"HOPX",basis="dendro", ax_trend=ax11, wspace=-.25, ylab=None, title=True, plot_emb=False, show=False)
scf.pl.single_trend(adata,"EOMES",basis="dendro", ax_trend=ax111, wspace=-.25, ylab=None, title=True, plot_emb=False, show=False)


ax3.axis('off')
ax33.axis('off')
ax20.axis('off')
ax21.axis('off')
ax22.axis('off')
ax5.axis('off')
ax6.axis('off')
ax7.axis('off')

fig.tight_layout()

fig.savefig('/home/jovyan/jm_jlab/SF_1.pdf',dpi=300)