# Load packages

In [None]:
import sys
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from matplotlib.colors import LinearSegmentedColormap
from sklearn.metrics.pairwise import cosine_similarity

# Set up the Synapse client
from synapseclient import Synapse
syn = Synapse()
syn.login()  # Assuming you're already logged in or have set up your credentials

# Set up work path
data_dir='Pseudotime_trajectories'
os.chdir(data_dir)

# Inital setting for plot size
from matplotlib import rcParams
FIGSIZE=(6, 6)
rcParams['figure.figsize']=FIGSIZE

sc.settings.verbosity = 1
# set number of cores to use
sc.settings.n_jobs = 25
sc.settings.set_figure_params( dpi=300, fontsize=6)
sc.logging.print_header()

# Load python functions

## Initial settings: color, order, trait

In [None]:
from initial_settings import *

## Similarity of cell type

In [None]:
import similarity_cell_type

# Load R libraries

In [None]:
# monocle3 conda environment
# Load libraries
library(Seurat)
library(Signac)
library(ggplot2)
library(dplyr)
library(zellkonverter)
set.seed(222)
library(RColorBrewer)
library(viridis)
library(monocle3)
library(tidyselect)
library(grid)
library(mgcv)
library(colorspace)
library(ggrepel)
library(igraph)
library(pbapply)
library(devtools)
library(parallel)
library(evobiR)
library(tidyr)
library(cluster)
library(grDevices)
library(repr)
library(zoo)
library(ggnewscale)
library(VennDiagram)
library(ggridges)

In [None]:
# R (Py39_R43_Ju10) for pseudotime UMAT visualization
library(monocle3)
library(ggplot2)
library(cowplot)

# Load R functions

## Initial settings: color, order, trait

In [None]:
source("initial_settings.r")

## Load trajectory reconstruction and downstream analysis based on Kriegstein scripts

In [None]:
source("trajectory_reconstruction.r")

### Load defined functions

In [None]:
source("functions.r")

# Load data

In [None]:
adata = sc.read('integrated_adata.h5ad')

# Alternative download from synapse
syn62289304 = syn.get(entity="syn62289304", downloadLocation=data_dir)
ad = sc.read(syn62289304)

## Extract cells from IN

In [None]:
ad_in = adata[adata.obs['cell_type_uni'] == 'IN', :]
ad_in

In [None]:
ad_in.write('files/integrated_IN.h5ad')

## Similarity between two cohorts

In [None]:
# subclass
similarity_cell_type.similarity_cell_type_all(ad_in, 'subclass_uni', 'Integrated_subclass_uni', 10, 'files/figures//in_similarity_two_cohorts_subclass_uni_all.pdf')


In [None]:
similarity_cell_type.similarity_cell_type_all(ad_in, 'subtype_uni', 'Integrated_subtype_uni', 20, 'files/figures//in_similarity_two_cohorts_subtype_uni_all.pdf')


# Basic UMAP and UMAT analysis

In [None]:
%%sh
python lineage_analysis_embeddings_calculation.py \
-l files/integrated_IN.h5ad \
-r 123456 \
-v scanpy@6000 \
-p half \
-n 100 \
-o files/integrated_IN_con \
-k no

## UMAT visualization

In [None]:
adata_6k = sc.read('files/integrated_IN_con_clustering.h5ad')
adata_6k

### subclass_uni

In [None]:
spec_subclasses = ['IN_SST', 'IN_PVALB', 'IN_PVALB_CHC', 'IN_VIP', 'IN_ADARB2', 'IN_LAMP5_RELN', 'IN_LAMP5_LHX6']
adata_6k.obs['subclass_uni_plot'] = pd.NA
condition = ((adata_6k.obs['numerical_age'] >= 20) & (adata_6k.obs['subclass_uni'].isin(spec_subclasses))) | (adata_6k.obs['subclass_uni'].isin(['MGE_dev', 'CGE_dev']))
adata_6k.obs.loc[condition, 'subclass_uni_plot'] = adata_6k.obs.loc[condition, 'subclass_uni']

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    p_subclass_uni = sc.pl.embedding(adata_6k, basis = 'umat', color='subclass_uni_plot', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False, palette=colors_subclass,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)
p_subclass_uni.savefig('files/figures/in_data_umat_subclass_uni_plot.pdf', transparent=True, format='pdf')
plt.close(p_subclass_uni)

### stage id

In [None]:
from pandas.api.types import CategoricalDtype
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id']
cat_dtype = CategoricalDtype(categories=adata_6k.uns['stage_order'], ordered=True)
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id_ord'].astype(cat_dtype)
adata_6k

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    p_stage_id = sc.pl.embedding(adata_6k, basis = 'umat', color='stage_id', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=8,
               return_fig=True)
p_stage_id.savefig('files/figures/in_data_umat_stage_id_plot.pdf', transparent=True, format='pdf')
plt.close(p_stage_id)

In [None]:
adata_6k.write('files/integrated_IN_con_clustering.h5ad')

# Pseudotime inference

In [None]:
%%sh
Rscript pseudotime_infer.r \
files/integrated_IN_con_clustering.h5ad \
files/integrated_IN_con

## Data visualization

In [None]:
cds <- readRDS("files/integrated_IN_con_beforeConstruction_cds.RDS")
d <- readRDS("files/integrated_IN_con_beforeConstruction_d.RDS")
print(cds)
print(head(d))
print(dim(d))

### subclass_uni

In [None]:
plot_cells(cds, color_cells_by = "subclass_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


### subclass

In [None]:
plot_cells(cds, color_cells_by = "subclass", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 6) + 
    scale_color_manual(values = cols_subclass_uni)


### subtype_uni

In [None]:
plot_cells(cds, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


### stage_id

In [None]:
plot_cells(cds, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)

## Principle graphs

### unit: 2000

In [None]:
for(i in seq(0, 3, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(2000*i+1):min(2000*(i+1), 4623), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(2000*i+1):min(2000*(i+1), 4623), ]), size=1, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 1000

In [None]:
for(i in seq(0, 5, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(1000*i+1):min(1000*(i+1), 4623), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(1000*i+1):min(1000*(i+1), 4623), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 500

In [None]:
for(i in seq(0, 10, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(500*i+1):min(500*(i+1), 4623), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(500*i+1):min(500*(i+1), 4623), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 200

In [None]:
for(i in seq(0, 24, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(200*i+1):min(200*(i+1), 4623), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(200*i+1):min(200*(i+1), 4623), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 50

In [None]:
for(i in seq(0, 93, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[(50*i+1):min(50*(i+1), 4623), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(50*i+1):min(50*(i+1), 4623), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

## Trajectories

### Start point

In [None]:
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4319), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4319), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### SST

In [None]:
subclasses_to_keep <- c("MGE_dev", "SST", "IN_SST")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "numerical_age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2170), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2170), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connection
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4395, 4469), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4395, 4469), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4184, 1607), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4184, 1607), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### PVALB

In [None]:
subclasses_to_keep <- c("MGE_dev", "PV", "IN_PVALB")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "numerical_age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)



In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(18), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(18), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4232, 1243), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4232, 1243), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# included
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4456), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4456), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

### PVALB_CHC

In [None]:
subclasses_to_keep <- c("MGE_dev", "PV_SCUBE3", "IN_PVALB_CHC")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3238), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3238), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4408, 4177), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4408, 4177), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### VIP

In [None]:
subclasses_to_keep <- c("CGE_dev", "VIP", "IN_VIP")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

#### VIP_TRPC6

In [None]:
subtypes_to_keep <- c("CGE_dev", "VIP_dev", "VIP_HS3ST3A1", "VIP_ABI3BP", "VIP_CHRM2", "IN_VIP_TRPC6", "VIP_DPP6", "VIP_CRH", "IN_VIP_SCML4")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(161), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(161), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1456, 1363), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1456, 1363), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1246, 2657), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1246, 2657), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# included
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4399), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4399), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


#### VIP_BCL11B

In [None]:
subtypes_to_keep <- c("CGE_dev", "VIP_dev", "VIP_ADAMTSL1", "VIP_PCDH20", "VIP_KIRREL3", "IN_VIP_BCL11B")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2003), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2003), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1456, 4327), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1456, 4327), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1350, 1364), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1350, 1364), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### ADARB2

In [None]:
subclasses_to_keep <- c("CGE_dev", "IN_ADARB2", "ID2")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

#### ADARB2_SV2C

In [None]:
subtypes_to_keep <- c("CGE_dev", "ID2_dev", "LAMP5_NDNF", "IN_ADARB2_SV2C", "ID2_CSMD1", "IN_ADARB2_SYT10")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3)


In [None]:
subtypes_to_keep <- c("CGE_dev", "ID2_dev", "LAMP5_NDNF", "IN_ADARB2_SV2C")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(187), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(187), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4227, 4464), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4227, 4464), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

#### ADARB2_COL12A1

In [None]:
subtypes_to_keep <- c("CGE_dev", "ID2_dev", "CCK_SORCS1", "IN_ADARB2_COL12A1")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3241), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3241), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4227, 632), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4227, 632), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

#### ADARB2_SYT10

In [None]:
subtypes_to_keep <- c("CGE_dev", "ID2_dev", "ID2_CSMD1", "IN_ADARB2_SYT10")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3)



In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(990), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(990), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4227, 4538), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4227, 4538), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1901, 83), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1901, 83), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(83, 1583), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(83, 1583), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

#### ADARB2_RAB37

In [None]:
subtypes_to_keep <- c("CGE_dev", "ID2_dev", "CCK_RELN", "CCK_SYT6", "IN_ADARB2_RAB37")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(3709), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(3709), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4227, 1450), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4227, 1450), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### LAMP5_RELN

In [None]:
subtypes_to_keep <- c("CGE_dev", "ID2_dev", "LAMP5_CCK", "IN_LAMP5_RELN_HSPA1A", "IN_LAMP5_RELN")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(2993), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(2993), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4227, 4160), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4227, 4160), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

### LAMP5_LHX6

In [None]:
subtypes_to_keep <- c("CGE_dev", "LAMP5_NOS1", "IN_LAMP5_LHX6_HSPA1A", "IN_LAMP5_LHX6")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1205), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1205), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(4313, 4222), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(4313, 4222), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


## Reconstruction

In [None]:
# connect nodes
# SST: 
cds <- connect_nodes(cds, "Y_4395", "Y_4469")
cds <- connect_nodes(cds, "Y_4184", "Y_1607")

# PVALB: no need to connect
cds <- connect_nodes(cds, "Y_4232", "Y_1243")

# PVALB_CHC: no need to connect
cds <- connect_nodes(cds, "Y_4408", "Y_4177")

# VIP_TRPC6
cds = connect_nodes(cds, "Y_1456", "Y_1363")
cds = connect_nodes(cds, "Y_1246", "Y_2657")

# VIP_BCL11B
cds = connect_nodes(cds, "Y_1456", "Y_4327")
cds = connect_nodes(cds, "Y_1350", "Y_1364")


# ADARB2_RAB37: 
cds = connect_nodes(cds, "Y_4227", "Y_1450")

# ADARB2_COL12A1
cds = connect_nodes(cds, "Y_4227", "Y_632")

# ADARB2_SYT10: 
cds = connect_nodes(cds, "Y_4227", "Y_4538")
cds = connect_nodes(cds, "Y_1901", "Y_83")
cds = connect_nodes(cds, "Y_83", "Y_1583")

# ADARB2_SV2C
cds = connect_nodes(cds, "Y_4227", "Y_4464")

# LAMP5_RELN
cds = connect_nodes(cds, "Y_4227", "Y_4160")

# LAMP5_LHX6
cds = connect_nodes(cds, "Y_4313", "Y_4222")

# select individual lineages
lineage <- "SST"
print(lineage)
start <- 4319
end <- 2170
inc.node <- c("Y_1607")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- as.character(unique(colData(cds)[colData(cds)$subclass_uni %in% c("MGE_dev", "SST", "IN_SST"), ]$subtype_uni))
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "PVALB"
print(lineage)
end <- 18
inc.node <- c("Y_4456")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- as.character(unique(colData(cds)[colData(cds)$subclass_uni %in% c("MGE_dev", "PV", "IN_PVALB"), ]$subtype_uni))
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "PVALB_CHC"
print(lineage)
end <- 3238
cds <- isolate_graph(cds, start, end, lineage)
sel.cluster <- as.character(unique(colData(cds)[colData(cds)$subclass_uni %in% c("MGE_dev", "PV_SCUBE3", "IN_PVALB_CHC"), ]$subtype_uni))
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "VIP_TRPC6"
print(lineage)
end <- 161
inc.node <- c("Y_4399", "Y_2657")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "VIP_dev", "VIP_HS3ST3A1", "VIP_ABI3BP", "VIP_CHRM2", "IN_VIP_TRPC6", "VIP_DPP6", "VIP_CRH", "IN_VIP_SCML4")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "VIP_BCL11B"
print(lineage)
end <- 2003
inc.node <- c("Y_4399", "Y_1364")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "VIP_dev", "VIP_ADAMTSL1", "VIP_PCDH20", "VIP_KIRREL3", "IN_VIP_BCL11B")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)


lineage <- "ADARB2_RAB37"
print(lineage)
end <- 3709
inc.node <- c("Y_4227")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "ID2_dev", "CCK_RELN", "CCK_SYT6", "IN_ADARB2_RAB37")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "ADARB2_COL12A1"
print(lineage)
end <- 3241
inc.node <- c("Y_632")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "ID2_dev", "CCK_SORCS1", "IN_ADARB2_COL12A1")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "ADARB2_SYT10"
print(lineage)
end <- 990
inc.node <- c("Y_4538", "Y_83", "Y_1583")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "ID2_dev", "ID2_CSMD1", "IN_ADARB2_SYT10")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "ADARB2_SV2C"
print(lineage)
end <- 187
inc.node <- c("Y_4227")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "ID2_dev", "LAMP5_NDNF", "IN_ADARB2_SV2C")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "LAMP5_RELN"
print(lineage)
end <- 2993
inc.node <- c("Y_4227")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("CGE_dev", "ID2_dev", "LAMP5_CCK", "IN_LAMP5_RELN_HSPA1A", "IN_LAMP5_RELN")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "LAMP5_LHX6"
print(lineage)
end <- 1205
cds <- isolate_graph(cds, start, end, lineage)
sel.cluster <- c("CGE_dev", "LAMP5_NOS1", "IN_LAMP5_LHX6_HSPA1A", "IN_LAMP5_LHX6")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)


cds <- combine_lineages(cds, start)
cds

In [None]:
saveRDS(cds, file = "files/integrated_IN_con_afterCombination_cds.RDS")


## Figure plots

### Pseudotime

In [None]:
p_pseudotime <- plot_cells(cds, color_cells_by = "pseudotime", 
                           trajectory_graph_color = "grey", 
                           trajectory_graph_segment_size = 1, 
                           label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        axis.title = element_blank(), 
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        legend.position="none")
ggsave("files/figures/in_umat_pseudotime.png", plot = p_pseudotime, width = 6, height = 6, dpi = 150)


p_pseudotime_legend <- plot_cells(cds, color_cells_by = "pseudotime",
                                    trajectory_graph_color = "grey", 
                                    trajectory_graph_segment_size = 1, 
                                    label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
legend <- cowplot::get_legend(p_pseudotime_legend)
pdf("files/figures/in_umat_pseudotime_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

# Maturation rate of branches and lineages

In [None]:
pdf("files/figures/in_mature_ratio_regression_lineage.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_trajectory(cds, "IN")
dev.off()

In [None]:
pdf("files/figures/in_mature_ratio_regression_branch.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_branch(cds, "IN", c("MGE", "CGE"))
dev.off()

# Identify DEGs along trajectory

In [None]:
%%sh

for sub_lineage in SST PVALB PVALB_CHC VIP_TRPC6 VIP_BCL11B ADARB2_RAB37 ADARB2_COL12A1 ADARB2_SYT10 ADARB2_SV2C LAMP5_RELN LAMP5_LHX6
do
    Rscript identify_traDEGs.r \
        files/integrated_IN_con_afterCombination_cds.RDS \
        ${sub_lineage} \
        4319 \
        0.05 \
        10000 \
        40 \
        files/traDEGs/IN
done # for sub_lineage end

# Compress data along trajectories

In [None]:
cds <- compress_lineages(cds, 4319, N = 500, cores = 40)
print(cds)
print(length(cds@expression))

In [None]:
saveRDS(cds, file = "files/integrated_IN_con_afterCombination_cds_compressed.RDS")