# Load packages

In [None]:
import sys
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from matplotlib.colors import LinearSegmentedColormap
from sklearn.metrics.pairwise import cosine_similarity

# Set up the Synapse client
from synapseclient import Synapse
syn = Synapse()
syn.login()  # Assuming you're already logged in or have set up your credentials

# Set up work path
data_dir='Pseudotime_trajectories'
os.chdir(data_dir)

# Inital setting for plot size
from matplotlib import rcParams
FIGSIZE=(6, 6)
rcParams['figure.figsize']=FIGSIZE

sc.settings.verbosity = 1
# set number of cores to use
sc.settings.n_jobs = 25
sc.settings.set_figure_params( dpi=300, fontsize=6)
sc.logging.print_header()

# Load python functions

## Initial settings: color, order, trait

In [None]:
from initial_settings import *

## Similarity of cell type

In [None]:
import similarity_cell_type

# Load R libraries

In [None]:
# monocle3 conda environment
# Load libraries
library(Seurat)
library(Signac)
library(ggplot2)
library(dplyr)
library(zellkonverter)
set.seed(222)
library(RColorBrewer)
library(viridis)
library(monocle3)
library(tidyselect)
library(grid)
library(mgcv)
library(colorspace)
library(ggrepel)
library(igraph)
library(pbapply)
library(devtools)
library(parallel)
library(evobiR)
library(tidyr)
library(cluster)
library(grDevices)
library(repr)
library(zoo)
library(ggnewscale)
library(VennDiagram)
library(ggridges)

In [None]:
# R (Py39_R43_Ju10) for pseudotime UMAT visualization
library(monocle3)
library(ggplot2)
library(cowplot)

# Load R functions

## Initial settings: color, order, trait

In [None]:
source("initial_settings.r")

## Load trajectory reconstruction and downstream analysis based on Kriegstein scripts

In [None]:
source("trajectory_reconstruction.r")

## Load defined functions

In [None]:
source("functions.r")

# Load data

In [None]:
adata = sc.read('integrated_adata.h5ad')

# Alternative download from synapse
syn62289304 = syn.get(entity="syn62289304", downloadLocation=data_dir)
ad = sc.read(syn62289304)

## Extract cells from EN

In [None]:
ad_en = adata[adata.obs['cell_type_uni'] == 'EN', :]
ad_en

In [None]:
ad_en.write('files/integrated_EN.h5ad')

## Similarity between two cohorts

In [None]:
# subclass
similarity_cell_type.similarity_cell_type_all(ad_en, 'subclass_uni', 'Integrated_subclass_uni', 10, 'files/figures//en_similarity_two_cohorts_subclass_uni_all.pdf')


In [None]:
similarity_cell_type.similarity_cell_type_all(ad_en, 'subtype_uni', 'Integrated_subtype_uni', 20, 'files/figures//en_similarity_two_cohorts_subtype_uni_all.pdf')


# Basic UMAP and UMAT analysis

In [None]:
%%sh

python lineage_analysis_embeddings_calculation.py \
-l files/integrated_EN.h5ad \
-r 123456 \
-v scanpy@6000 \
-p half \
-n 100 \
-o files/integrated_EN_con \
-k no

## UMAT visualization

In [None]:
adata_6k = sc.read('files/integrated_EN_con_clustering.h5ad')
adata_6k

### subclass_uni

In [None]:
spec_subclasses = ['EN_L6B', 'EN_L6_CT', 'EN_L5_6_NP', 'EN_L5_ET', 'EN_L6_IT_1', 'EN_L6_IT_2', 'EN_L3_5_IT_1', 'EN_L3_5_IT_2', 'EN_L3_5_IT_3', 'EN_L2_3_IT']
adata_6k.obs['subclass_uni_plot'] = pd.NA
condition = ((adata_6k.obs['numerical_age'] >= 20) & (adata_6k.obs['subclass_uni'].isin(spec_subclasses))) | (adata_6k.obs['subclass_uni'].isin(['PN_dev']))
adata_6k.obs.loc[condition, 'subclass_uni_plot'] = adata_6k.obs.loc[condition, 'subclass_uni']

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    p_subclass_uni = sc.pl.embedding(adata_6k, basis = 'umat', color='subclass_uni_plot', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False, palette=colors_subclass,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)
p_subclass_uni.savefig('files/figures/en_data_umat_subclass_uni_plot.pdf', transparent=True, format='pdf')
plt.close(p_subclass_uni)

### stage_id

In [None]:
from pandas.api.types import CategoricalDtype
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id']
cat_dtype = CategoricalDtype(categories=adata_6k.uns['stage_order'], ordered=True)
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id_ord'].astype(cat_dtype)
adata_6k

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(adata_6k, basis = 'umat', color='stage_id', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=8,
               return_fig=True)
plt.savefig('files/figures/en_data_umat_stage_id_plot.pdf', transparent=True, format='pdf')

In [None]:
adata_6k.layers['ds_norm_cts'] = adata_6k.X.copy()
adata_6k

In [None]:
adata_6k.write('files/integrated_EN_con_clustering.h5ad')

# Pseudotime inference

In [None]:
%%sh
Rscript pseudotime_infer.r \
files/integrated_EN_con_clustering.h5ad \
files/integrated_EN_con

## Data visualization

In [None]:
cds <- readRDS("files/integrated_EN_con_beforeConstruction_cds.RDS")
d <- readRDS("files/integrated_EN_con_beforeConstruction_d.RDS")
print(cds)
print(head(d))
print(dim(d))

### subclass_uni

In [None]:
plot_cells(cds, color_cells_by = "subclass_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

plot_cells(cds, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F) +
    scale_color_manual(values = cols_stage_id)

### subclass

In [None]:
plot_cells(cds, color_cells_by = "subclass", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 6) + 
    scale_color_manual(values = cols_subclass_uni)


### subtype_uni

In [None]:
plot_cells(cds, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


### stage_id

In [None]:
plot_cells(cds, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)

## Principle graphs

### unit: 1000

In [None]:
for(i in seq(0, 4, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(1000*i+1):min(1000*(i+1), 3576), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(1000*i+1):min(1000*(i+1), 3576), ]), size=1, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 500

In [None]:
for(i in seq(0, 8, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(500*i+1):min(500*(i+1), 3576), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(500*i+1):min(500*(i+1), 3576), ]), size=1.5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 200

In [None]:
for(i in seq(0, 18, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(200*i+1):min(200*(i+1), 3576), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(200*i+1):min(200*(i+1), 3576), ]), size=1.5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 100

In [None]:
for(i in seq(0, 36, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(100*i+1):min(100*(i+1), 3576), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(100*i+1):min(100*(i+1), 3576), ]), size=4, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 50

In [None]:
for(i in seq(0, 73, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(50*i+1):min(50*(i+1), 3576), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(50*i+1):min(50*(i+1), 3576), ]), size=4, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

## Trajectories

### Start point

In [None]:
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3172), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3172), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

### L6B

In [None]:
subtypes_to_keep <- c("PN_dev", "L5-6_TLE4_dev", "L5-6_TLE4_SCUBE1", "EN_L6B")  # replace with your actual subtype_uni labels
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(262), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(262), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# included
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3314), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3314), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

### L6_CT

In [None]:
subtypes_to_keep <- c("PN_dev", "L5-6_TLE4_dev", "L5-6_TLE4_SORCS1", "EN_L6_CT_HSPA1A", "EN_L6_CT")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2638), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2638), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3438, 3434), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3438, 3434), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

### L5_6_NP

In [None]:
subtypes_to_keep <- c("PN_dev", "L5-6_TLE4_dev", "L5-6_TLE4_HTR2C", "EN_L5_6_NP")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(554), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(554), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3438, 905), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3438, 905), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

### L6_IT_1

In [None]:
subtypes_to_keep <- c("PN_dev", "L5-6_THEMIS_dev-1", "L5-6_THEMIS_dev-2", "L5-6_THEMIS_CNR1", "EN_L6_IT_1_HSPA1A", "EN_L6_IT_1")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(548), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(548), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3067, 3102), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3067, 3102), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3119, 470), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3119, 470), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### L6_IT_2

In [None]:
subtypes_to_keep <- c("PN_dev", "L5-6_THEMIS_dev-1", "L5-6_THEMIS_dev-2", "L5-6_THEMIS_NTNG2", "EN_L6_IT_2_HSPA1A", "EN_L6_IT_2")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
 

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2080), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2080), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### L3_5_IT

In [None]:
subtypes_to_keep <- unique(c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_MME", "EN_L3_5_IT_1_CUX2", "EN_L3_5_IT_1_PLSCR4", "PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_MET", "EN_L3_5_IT_2_MET", "EN_L3_5_IT_2_HSPA1A", "EN_L3_5_IT_2_DACH1", "PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_LRRK1", "EN_L3_5_IT_3_HSPA1A", "EN_L3_5_IT_3"))
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3172, 3293), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3172, 3293), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3337, 3421), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3337, 3421), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)



### L3_5_IT_1

In [None]:
subtypes_to_keep <- c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_MME", "EN_L3_5_IT_1_CUX2", "EN_L3_5_IT_1_PLSCR4")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2209), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2209), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2996, 3126), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2996, 3126), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3126, 3556), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3126, 3556), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### L3_5_IT_2

In [None]:
subtypes_to_keep <- c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_MET", "EN_L3_5_IT_2_MET", "EN_L3_5_IT_2_HSPA1A", "EN_L3_5_IT_2_DACH1")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2106), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2106), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3551, 3466), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3551, 3466), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3053, 3268), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3053, 3268), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### L3_5_IT_3

In [None]:
subtypes_to_keep <- c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_LRRK1", "EN_L3_5_IT_3_HSPA1A", "EN_L3_5_IT_3")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2814), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2814), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### L2_3_IT

In [None]:
subtypes_to_keep <- c("PN_dev", "L2-3_CUX2_dev-fetal", "L2-3_CUX2_dev-4", "L2-3_CUX2_dev-6", "L2-3_CUX2_dev-2", "L2-3_CUX2_dev-3", "L2-3_CUX2_dev-5", "L2-3_CUX2_dev-1", "L2_CUX2_LAMP5_dev", "L2_CUX2_LAMP5", "L3_CUX2_PRSS12", "EN_L2_3_IT_NTNG1", "EN_L2_3_IT_PDGFD")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(256), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(256), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3172, 3366), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3172, 3366), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3366, 3357), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3366, 3357), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3188, 3080), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3188, 3080), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3248, 3112), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3248, 3112), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


## Reconstruction

In [None]:
# connect nodes
# L6_CT
cds = connect_nodes(cds, "Y_3438", "Y_3434")

# L5_6_NP
cds = connect_nodes(cds, "Y_3438", "Y_905")

# deep_IT (L6_IT)
cds = connect_nodes(cds, "Y_3067", "Y_3102")

# L6_IT_1
# cds = connect_nodes(cds, "Y_3020", "Y_470")
cds = connect_nodes(cds, "Y_3119", "Y_470")

# L3_5_IT
cds = connect_nodes(cds, "Y_3172", "Y_3293")
cds = connect_nodes(cds, "Y_3337", "Y_3421")

# L3_5_IT_1 
cds = connect_nodes(cds, "Y_2996", "Y_3126")
cds = connect_nodes(cds, "Y_3126", "Y_3556")

# L3_5_IT_2
cds = connect_nodes(cds, "Y_3551", "Y_3466") 
cds = connect_nodes(cds, "Y_3053", "Y_3268")

# L2_3_IT
cds = connect_nodes(cds, "Y_3172", "Y_3366")
cds = connect_nodes(cds, "Y_3366", "Y_3357")
cds = connect_nodes(cds, "Y_3188", "Y_3080")
cds = connect_nodes(cds, "Y_3248", "Y_3112")

# select individual lineages
# L6B
lineage <- "L6B"
print(lineage)
start <- 3172
end <- 262
inc.node <- c("Y_3314")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L5-6_TLE4_dev", "L5-6_TLE4_SCUBE1", "EN_L6B")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)


# L6_CT
lineage <- "L6_CT"
print(lineage)
end <- 2638
inc.node <- c("Y_3314", "Y_3434")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L5-6_TLE4_dev", "L5-6_TLE4_SORCS1", "EN_L6_CT_HSPA1A", "EN_L6_CT")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L5_6_NP
lineage <- "L5_6_NP"
print(lineage)
end <- 554
inc.node <- c("Y_3314", "Y_905")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L5-6_TLE4_dev", "L5-6_TLE4_HTR2C", "EN_L5_6_NP")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L6_IT_1
lineage <- "L6_IT_1"
print(lineage)
end <- 548
inc.node <- c("Y_3314", "Y_3102", "Y_470")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L5-6_THEMIS_dev-1", "L5-6_THEMIS_dev-2", "L5-6_THEMIS_CNR1", "EN_L6_IT_1_HSPA1A", "EN_L6_IT_1")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L6_IT_2
lineage <- "L6_IT_2"
print(lineage)
end <- 2080
inc.node <- c("Y_3314", "Y_3102")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L5-6_THEMIS_dev-1", "L5-6_THEMIS_dev-2", "L5-6_THEMIS_NTNG2", "EN_L6_IT_2_HSPA1A", "EN_L6_IT_2")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L3_5_IT_1
lineage <- "L3_5_IT_1"
print(lineage)
end <- 2209
inc.node <- c("Y_3421", "Y_3556")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_MME", "EN_L3_5_IT_1_CUX2", "EN_L3_5_IT_1_PLSCR4")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L3_5_IT_2
lineage <- "L3_5_IT_2"
print(lineage)
end <- 2106
inc.node <- c("Y_3421", "Y_3268")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_MET", "EN_L3_5_IT_2_MET", "EN_L3_5_IT_2_HSPA1A", "EN_L3_5_IT_2_DACH1")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L3_5_IT_3
lineage = "L3_5_IT_3"
print(lineage)
end <- 2814
inc.node <- c("Y_3421") 
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster = c("PN_dev", "L4_RORB_dev-fetal", "L4_RORB_dev-1", "L4_RORB_dev-2", "L4_RORB_LRRK1", "EN_L3_5_IT_3_HSPA1A", "EN_L3_5_IT_3")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

# L2_3_IT
lineage <- "L2_3_IT"
print(lineage)
end <- 256
inc.node <- c("Y_3366", "Y_3080", "Y_3248")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("PN_dev", "L2-3_CUX2_dev-fetal", "L2-3_CUX2_dev-4", "L2-3_CUX2_dev-6", "L2-3_CUX2_dev-2", "L2-3_CUX2_dev-3", "L2-3_CUX2_dev-5", "L2-3_CUX2_dev-1", "L2_CUX2_LAMP5_dev", "L2_CUX2_LAMP5", "L3_CUX2_PRSS12", "EN_L2_3_IT_NTNG1", "EN_L2_3_IT_PDGFD")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 25, N = 10)

cds <- combine_lineages(cds, start)

In [None]:
saveRDS(cds, file = "files/integrated_EN_con_afterCombination_cds.RDS")

## Figure plots

### Pseudotime

In [None]:
p_pseudotime <- plot_cells(cds, color_cells_by = "pseudotime", 
                           trajectory_graph_color = "grey", 
                           trajectory_graph_segment_size = 1, 
                           label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        axis.title = element_blank(), 
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        legend.position="none")
ggsave("files/figures/en_umat_pseudotime.png", plot = p_pseudotime, width = 6, height = 6, dpi = 150)

p_pseudotime_legend <- plot_cells(cds, color_cells_by = "pseudotime",
                                    trajectory_graph_color = "grey", 
                                    trajectory_graph_segment_size = 1, 
                                    label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
legend <- cowplot::get_legend(p_pseudotime_legend)
pdf("files/figures/en_umat_pseudotime_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

# Maturation rate of branches and lineages

In [None]:
pdf("figures/en_mature_ratio_regression_lineage.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_trajectory(cds, "EN")
dev.off()

In [None]:
pdf("files/figures/en_mature_ratio_regression_branch.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_branch(cds, "EN", c("Deep_nonIT", "Deep_IT", "Upper_IT"))
dev.off()

# Identify DEGs along trajectory

In [None]:
%%sh

for sub_lineage in L6B L6_CT L5_6_NP L6_IT_1 L6_IT_2 L2_3_IT L3_5_IT_1 L3_5_IT_2 L3_5_IT_3
do
    Rscript identify_traDEGs.r \
        files/integrated_EN_con_afterCombination_cds.RDS \
        ${sub_lineage} \
        3172 \
        0.05 \
        10000 \
        40 \
        files/traDEGs/EN
done # for sub_lineage end

# Compress data along trajectories

In [None]:
cds <- compress_lineages(cds, 3172, N = 500, cores = 40)
print(cds)
print(length(cds@expression))

In [None]:
saveRDS(cds, file = "files/integrated_EN_con_afterCombination_cds_compressed.RDS")