# Load packages

In [None]:
import sys
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from matplotlib.colors import LinearSegmentedColormap
from sklearn.metrics.pairwise import cosine_similarity

# Inital setting for plot size
from matplotlib import rcParams
FIGSIZE=(6, 6)
rcParams['figure.figsize']=FIGSIZE

sc.settings.verbosity = 1
# set number of cores to use
sc.settings.n_jobs = 25
sc.settings.set_figure_params( dpi=300, fontsize=6)
sc.logging.print_header()

# Load python functions

## Initial settings: color, order, trait

In [None]:
sys.path.append('scripts')
from initial_settings import *

## Similarity of cell type

In [None]:
import similarity_cell_type

# Load R libraries

In [None]:
# monocle3 conda environment
# Load libraries
library(Seurat)
library(Signac)
library(ggplot2)
library(dplyr)
library(zellkonverter)
set.seed(222)
library(RColorBrewer)
library(viridis)
library(monocle3)
library(tidyselect)
library(grid)
library(mgcv)
library(colorspace)
library(ggrepel)
library(igraph)
library(pbapply)
library(devtools)
library(parallel)
library(evobiR)
library(tidyr)
library(cluster)
library(grDevices)
library(repr)
library(zoo)
library(ggnewscale)
library(VennDiagram)
library(ggridges)

In [None]:
# R (Py39_R43_Ju10) for pseudotime UMAT visualization
library(monocle3)
library(ggplot2)
library(cowplot)

# Load R functions

## Initial settings: color, order, trait

In [None]:
source("scripts/initial_settings.r")

## Load trajectory reconstruction and downstream analysis based on Kriegstein scripts

In [None]:
source("scripts/trajectory_reconstruction.r")

## Load defined functions

In [None]:
source("scripts/functions.r")

# Load data

In [None]:
adata = sc.read('files/integrated_adata.h5ad')
adata # 1,454,617 × 16,416

## Extract cells from astrocyte

In [None]:
ad_ast = adata[adata.obs['cell_type_uni'] == 'Astro', :]
ad_ast # 173,622 × 16,416

In [None]:
ad_ast.write('files/integrated_AST.h5ad')

### Cell type similarity between two cohorts

In [None]:
similarity_cell_type.similarity_cell_type_all(ad_ast, 'subtype_uni', 'Integrated_subtype_uni', 10, 'files/figures//ast_similarity_two_cohorts_subtype_uni_all.pdf')

# Basic UMAP and UMAT analysis

In [None]:
%%sh

python ${dataPATH}/scripts/lineage_analysis_embeddings_calculation.py \
-l ${dataPATH}/files/integrated_AST.h5ad \
-r 123456 \
-v scanpy@6000 \
-p half \
-n 100 \
-o ${dataPATH}/files/integrated_AST_con \
-k no

## UMAT visualization

In [None]:
adata_6k = sc.read('files/integrated_AST_clustering.h5ad')
adata_6k # 173,622 × 16,352

### stage_id

In [None]:
from pandas.api.types import CategoricalDtype
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id']
cat_dtype = CategoricalDtype(categories=adata_6k.uns['stage_order'], ordered=True)
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id_ord'].astype(cat_dtype)
adata_6k

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(adata_6k, basis = 'umat', color='stage_id', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=8,
               return_fig=True)
plt.savefig('files/figures/ast_data_umat_stage_id.pdf', transparent=True, format='pdf')

# Pseudotime infer

In [None]:
%%sh

Rscript ${dataPATH}/scripts/pseudotime_infer.r \
${dataPATH}/files/integrated_AST_clustering.h5ad \
${dataPATH}/files/integrated_AST

## Data visualization

In [None]:
cds <- readRDS("files/integrated_AST_beforeConstruction_cds.RDS")
d <- readRDS("files/integrated_AST_beforeConstruction_d.RDS")
print(cds)
print(head(d))
print(dim(d))

### subtype_uni

In [None]:
plot_cells(cds, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


### stage_id

In [None]:
plot_cells(cds, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)

## Principle graphs

### unit: 500

In [None]:
for(i in seq(0, 2, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(500*i+1):min(500*(i+1), 943), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(500*i+1):min(500*(i+1), 943), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 200

In [None]:
for(i in seq(0, 5, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(200*i+1):min(200*(i+1), 943), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(200*i+1):min(200*(i+1), 943), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 100

In [None]:
for(i in seq(0, 10, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(100*i+1):min(100*(i+1), 943), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(100*i+1):min(100*(i+1), 943), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 50

In [None]:
for(i in seq(0, 19, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(50*i+1):min(50*(i+1), 943), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(50*i+1):min(50*(i+1), 943), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

## Trajectories

### Start point

In [None]:
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(861), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(861), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### PA

In [None]:
subtypes_to_keep <- c("Astro_dev-3", "Astro_dev-2", "Astro_dev-1", "Astro_SLC1A2_dev", "Astro_SLC1A2", "Astro_WIF1", "Astro_PLSCR1")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(653), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(653), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# included
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(224), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(224), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### FA

In [None]:
subtypes_to_keep <- c("Astro_dev-3", "Astro_dev-2", "Astro_dev-5", "Astro_dev-4", "Astro_GFAP", "Astro_GRIA1", "Astro_ADAMTSL3")
cells_to_keep <- which(colData(cds)$subtype_uni %in% subtypes_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3)


In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(70), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(70), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(839, 356), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(839, 356), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(344, 123), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(344, 123), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(123, 449), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(123, 449), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(430, 620), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(430, 620), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

## Reconstruction

In [None]:
# connect nodes
# PA: no need to connect

# FA
cds <- connect_nodes(cds, "Y_839", "Y_356")
cds <- connect_nodes(cds, "Y_344", "Y_123")
cds <- connect_nodes(cds, "Y_123", "Y_449")
cds <- connect_nodes(cds, "Y_430", "Y_620")


# select individual lineages
lineage <- "PA"
print(lineage)
start <- 861
end <- 653
inc.node <- c("Y_224")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- c("Astro_dev-3", "Astro_dev-2", "Astro_dev-1", "Astro_SLC1A2_dev", "Astro_SLC1A2", "Astro_WIF1", "Astro_PLSCR1")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

lineage <- "FA"
print(lineage)
end <- 70
cds <- isolate_graph(cds, start, end, lineage)
sel.cluster <- c("Astro_dev-3", "Astro_dev-2", "Astro_dev-5", "Astro_dev-4", "Astro_GFAP", "Astro_GRIA1", "Astro_ADAMTSL3")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

cds <- combine_lineages(cds, start)


In [None]:
saveRDS(cds, file = "files/integrated_AST_afterCombination_cds.RDS")

## Figure plots

### Pseudotime

In [None]:
p_pseudotime <- plot_cells(cds, color_cells_by = "pseudotime", 
                           trajectory_graph_color = "grey", 
                           trajectory_graph_segment_size = 1, 
                           label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        axis.title = element_blank(), 
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        legend.position="none")
ggsave("files/figures/ast_umat_pseudotime.png", plot = p_pseudotime, width = 6, height = 6, dpi = 150)

In [None]:
p_pseudotime_legend <- plot_cells(cds, color_cells_by = "pseudotime",
                                    trajectory_graph_color = "grey", 
                                    trajectory_graph_segment_size = 1, 
                                    label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
legend <- cowplot::get_legend(p_pseudotime_legend)
pdf("files/figures/ast_umat_pseudotime_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

### Trajectory marker genes

In [None]:
p_gfap <- plot_cells(cds, genes = c("GFAP"),  
                     trajectory_graph_color = "grey", 
                     trajectory_graph_segment_size = 1, 
                     norm_method = "log", label_roots = F, label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, show_trajectory_graph = T, cell_size = 0.5, cell_stroke = 0.3) + scale_color_distiller(palette = "Spectral") +
    theme(title = element_blank(), 
          axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(), 
          legend.position="none") # AST-WM
print(p_gfap)
ggsave("files/figures/ast_umat_expr_gfap.png", plot = p_gfap, width = 6, height = 6, dpi = 150)

p_slc1a2 <- plot_cells(cds, genes = c("SLC1A2"),   
                       trajectory_graph_color = "grey", 
                       trajectory_graph_segment_size = 1, 
                       norm_method = "log", label_roots = F, label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, show_trajectory_graph = T, cell_size = 0.5, cell_stroke = 0.3) + scale_color_distiller(palette = "Spectral") +
    theme(title = element_blank(), 
          axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(), 
          legend.position="none") # AST-GM
ggsave("files/figures/ast_umat_expr_slc1a2.png", plot = p_slc1a2, width = 6, height = 6, dpi = 150)


p_mbp <- plot_cells(cds, genes = c("MBP"),   
                    trajectory_graph_color = "grey", 
                    trajectory_graph_segment_size = 1, 
                    norm_method = "log", label_roots = F, label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, show_trajectory_graph = T, cell_size = 0.5, cell_stroke = 0.3) + scale_color_distiller(palette = "Spectral") +
    theme(title = element_blank(), 
          axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(), 
          legend.position="none") # MBP
ggsave("files/figures/ast_umat_expr_mbp.png", plot = p_mbp, width = 6, height = 6, dpi = 150)


In [None]:
p_gfap_legends <- plot_cells(cds, genes = c("GFAP"), norm_method = "log", label_roots = F, label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, show_trajectory_graph = T, cell_size = 0.5, cell_stroke = 0.3) + scale_color_distiller(palette = "Spectral")
# Extract legend
legend <- cowplot::get_legend(p_gfap_legends)
pdf("files/figures/ast_umat_expr_gfap_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

p_slc1a2_legends <- plot_cells(cds, genes = c("SLC1A2"), norm_method = "log", label_roots = F, label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, show_trajectory_graph = T, cell_size = 0.5, cell_stroke = 0.3) + scale_color_distiller(palette = "Spectral")
# Extract legend
legend <- cowplot::get_legend(p_slc1a2_legends)
pdf("files/figures/ast_umat_expr_slc1a2_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

p_mbp_legends <- plot_cells(cds, genes = c("MBP"), norm_method = "log", label_roots = F, label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, show_trajectory_graph = T, cell_size = 0.5, cell_stroke = 0.3) + scale_color_distiller(palette = "Spectral")
# Extract legend
legend <- cowplot::get_legend(p_mbp_legends)
pdf("files/figures/ast_umat_expr_mbp_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

# Maturation rate of branches and lineages

In [None]:
pdf("files/figures/ast_mature_ratio_regression_lineage.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_trajectory(cds, "AST")
dev.off()

# Identify DEGs along trajectory

In [None]:
%%sh

for sub_lineage in PA FA
do
    Rscript ${dataPATH}/scripts/identify_traDEGs.r \
        ${dataPATH}/files/integrated_AST_afterCombination_cds.RDS \
        ${sub_lineage} \
        861 \
        0.05 \
        10000 \
        40 \
        ${dataPATH}/files/traDEGs/AST
done # for sub_lineage end

# Compress data along trajectories

In [None]:
cds <- compress_lineages(cds, 861, N = 500, cores = 40)
print(cds)
print(length(cds@expression))

In [None]:
saveRDS(cds, file = "files/integrated_AST_afterCombination_cds_compressed.RDS")