# pseudotime-informed NMF - <span style="color:red">ALL STAGES</span>

**CellOracle environment**

In [1]:
import scanpy as sc

import celloracle as co

import pandas as pd

import seaborn as sns

import numpy as np

import matplotlib.pyplot as plt

# visualization settings
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = (15,7)
plt.rcParams["savefig.dpi"] = 600

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


# Trevino et al 2021 dataset

## Loading raw data

In [2]:
adata = sc.read_h5ad("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/rna_counts.h5ad")

milestones = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/scFates_output/scFates_clustering.tsv", sep='\t', index_col=0)

adata.obs = pd.concat([adata.obs, milestones], axis=1)

cellembeddings = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/cellembeddings.tsv", sep='\t')
cellembeddings.iloc[:,1] = cellembeddings.iloc[:,1]*-1 #for better geometry
adata.obsm['X_pca'] = cellembeddings.to_numpy() #sharing embeddings

## oRG

In [3]:
branch_oRG = adata[(adata.obs['segmentation'] == 's_vRG') | (adata.obs['segmentation'] == 's_oRG'),:]

sc.pp.filter_genes(branch_oRG, min_cells=25)

sc.pp.highly_variable_genes(branch_oRG, n_top_genes=4000, flavor='seurat_v3')

branch_oRG = branch_oRG[:,branch_oRG.var['highly_variable'] == True].copy()

my_data = pd.DataFrame(branch_oRG.X.todense(), index=branch_oRG.obs['t'], columns=branch_oRG.var_names)

my_data.sort_index(inplace=True, ascending=True)

for_NMF = sc.AnnData(my_data)

for_NMF

#Including metadata to final object
branch_oRG.obs.index = branch_oRG.obs['t'].astype(np.float64)

branch_oRG.obs = branch_oRG.obs.reindex(np.sort(branch_oRG.obs.index))

for_NMF.obs.index = for_NMF.obs.index.astype(np.float64)

for_NMF.obs = for_NMF.obs.join(branch_oRG.obs)

for_NMF

AnnData object with n_obs × n_vars = 2413 × 4000
    obs: 'orig.ident', 'Sample.ID', 'Age', 'Batch', 'Cluster.Name', 'percent.mt', 'percent.zeros', 'nCount_RNA', 'nFeature_RNA', 'milestones', 'segmentation', 't'

In [4]:
for_NMF.write("/home/jovyan/jm_jlab/data_indNeuro/2.NMF/branch_ALL_vRGtooRG_4k.h5ad")

## IPC

In [5]:
branch_IPC = adata[(adata.obs['segmentation'] == 's_vRG') | (adata.obs['segmentation'] == 's_IPC'),:]

sc.pp.filter_genes(branch_IPC, min_cells=25)

sc.pp.highly_variable_genes(branch_IPC, n_top_genes=4000, flavor='seurat_v3') 

branch_IPC = branch_IPC[:,branch_IPC.var['highly_variable'] == True].copy()

my_data = pd.DataFrame(branch_IPC.X.todense(), index=branch_IPC.obs['t'], columns=branch_IPC.var_names)

my_data.sort_index(inplace=True, ascending=True)

In [6]:
for_NMF = sc.AnnData(my_data)

for_NMF

AnnData object with n_obs × n_vars = 2059 × 4000

In [7]:
#Including metadata to final object
branch_IPC.obs.index = branch_IPC.obs['t'].astype(np.float64)

branch_IPC.obs = branch_IPC.obs.reindex(np.sort(branch_IPC.obs.index))

for_NMF.obs.index = for_NMF.obs.index.astype(np.float64)

for_NMF.obs = for_NMF.obs.join(branch_IPC.obs)

In [8]:
for_NMF.write("/home/jovyan/jm_jlab/data_indNeuro/2.NMF/branch_ALL_vRGtoIPC_4k.h5ad")

# Polioudakis et al 2019 dataset

In [9]:
adata = sc.read_h5ad("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/pol19_counts.h5ad")

milestones = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/scFates_output/polioudakis19_scFates_clustering.tsv", sep='\t', index_col=0)

adata.obs = pd.concat([adata.obs, milestones], axis=1)


#Including embeddings
cellembeddings = pd.read_csv("/home/jovyan/jm_jlab/data_indNeuro/intermediate_files/integrated_polioudakis19_cellembeddings.tsv", sep='\t')

cellembeddings.iloc[:,0] = cellembeddings.iloc[:,0]*-1 #for consistent better geometry

cellembeddings.iloc[:,1] = cellembeddings.iloc[:,1]*-1 #for consistent better geometry

cellembeddings = cellembeddings[cellembeddings.index.str.contains("pol19")]

cellembeddings.index = pd.DataFrame(cellembeddings.index)[0].str.split("pol19_", expand=True)[1]

adata = adata[adata.obs.index.isin(cellembeddings.index),:].copy()

cellembeddings = cellembeddings[cellembeddings.index.isin(adata.obs.index)].copy()

cellembeddings = cellembeddings.reindex(adata.obs.index)

adata.obsm['X_pca'] = cellembeddings.to_numpy() #sharing embeddings

**oRG**

In [10]:
branch_oRG = adata[(adata.obs['segmentation'] == 's_vRG') | (adata.obs['segmentation'] == 's_oRG'),:]

sc.pp.filter_genes(branch_oRG, min_cells=25)

sc.pp.highly_variable_genes(branch_oRG, n_top_genes=4000, flavor='seurat_v3')
branch_oRG = branch_oRG[:,branch_oRG.var['highly_variable'] == True].copy()


my_data = pd.DataFrame(branch_oRG.X.todense(), index=branch_oRG.obs['t'], columns=branch_oRG.var_names)

my_data.sort_index(inplace=True, ascending=True)

#my_data.head(2)

for_NMF = sc.AnnData(my_data)

for_NMF

#Including metadata to final object
branch_oRG.obs.index = branch_oRG.obs['t'].astype(np.float64)

branch_oRG.obs = branch_oRG.obs.reindex(np.sort(branch_oRG.obs.index))

for_NMF.obs.index = for_NMF.obs.index.astype(np.float64)

for_NMF.obs = for_NMF.obs.join(branch_oRG.obs)

for_NMF

AnnData object with n_obs × n_vars = 1405 × 4000
    obs: 'orig.ident', 'Cluster', 'Subcluster', 'Donor', 'Layer', 'Gestation_week', 'Library', 'percent.mt', 'percent.zeros', 'nCount_RNA', 'nFeature_RNA', 'milestones', 'segmentation', 't'

In [11]:
for_NMF.write("/home/jovyan/jm_jlab/data_indNeuro/2.NMF/polioudakis19_vRGtooRG_4k.h5ad")

In [12]:
branch_IPC = adata[(adata.obs['segmentation'] == 's_vRG') | (adata.obs['segmentation'] == 's_IPC'),:]

sc.pp.filter_genes(branch_IPC, min_cells=25)

sc.pp.highly_variable_genes(branch_IPC, n_top_genes=4000, flavor='seurat_v3')
branch_IPC = branch_IPC[:,branch_IPC.var['highly_variable'] == True].copy()


my_data = pd.DataFrame(branch_IPC.X.todense(), index=branch_IPC.obs['t'], columns=branch_IPC.var_names)

my_data.sort_index(inplace=True, ascending=True)

#my_data.head(2)

for_NMF = sc.AnnData(my_data)

for_NMF

#Including metadata to final object
branch_IPC.obs.index = branch_IPC.obs['t'].astype(np.float64)

branch_IPC.obs = branch_IPC.obs.reindex(np.sort(branch_IPC.obs.index))

for_NMF.obs.index = for_NMF.obs.index.astype(np.float64)

for_NMF.obs = for_NMF.obs.join(branch_IPC.obs)

for_NMF

AnnData object with n_obs × n_vars = 2610 × 4000
    obs: 'orig.ident', 'Cluster', 'Subcluster', 'Donor', 'Layer', 'Gestation_week', 'Library', 'percent.mt', 'percent.zeros', 'nCount_RNA', 'nFeature_RNA', 'milestones', 'segmentation', 't'

In [13]:
for_NMF.write("/home/jovyan/jm_jlab/data_indNeuro/2.NMF/polioudakis19_vRGtoIPC_4k.h5ad")