# Load packages

In [None]:
import sys
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from matplotlib.colors import LinearSegmentedColormap
from sklearn.metrics.pairwise import cosine_similarity

# Set up the Synapse client
from synapseclient import Synapse
syn = Synapse()
syn.login()  # Assuming you're already logged in or have set up your credentials

# Set up work path
data_dir='Pseudotime_trajectories'
os.chdir(data_dir)

# Inital setting for plot size
from matplotlib import rcParams
FIGSIZE=(6, 6)
rcParams['figure.figsize']=FIGSIZE

sc.settings.verbosity = 1
# set number of cores to use
sc.settings.n_jobs = 25
sc.settings.set_figure_params( dpi=300, fontsize=6)
sc.logging.print_header()

# Load python functions

## Initial settings: color, order, trait

In [None]:
from initial_settings import *

## Similarity of cell type

In [None]:
import similarity_cell_type

# Load R libraries

In [None]:
# monocle3 conda environment
# Load libraries
library(Seurat)
library(Signac)
library(ggplot2)
library(dplyr)
library(zellkonverter)
set.seed(222)
library(RColorBrewer)
library(viridis)
library(monocle3)
library(tidyselect)
library(grid)
library(mgcv)
library(colorspace)
library(ggrepel)
library(igraph)
library(pbapply)
library(devtools)
library(parallel)
library(evobiR)
library(tidyr)
library(cluster)
library(grDevices)
library(repr)
library(zoo)
library(ggnewscale)
library(VennDiagram)
library(ggridges)

In [None]:
# R (Py39_R43_Ju10) for pseudotime UMAT visualization
library(monocle3)
library(ggplot2)
library(cowplot)

# Load R functions

## Initial settings: color, order, trait

In [None]:
source("initial_settings.r")

## Load trajectory reconstruction and downstream analysis based on Kriegstein scripts

In [None]:
source("trajectory_reconstruction.r")

## Load defined functions

In [None]:
source("functions.r")

# Load data

In [None]:
adata = sc.read('integrated_adata.h5ad')

# Alternative download from synapse
syn62289304 = syn.get(entity="syn62289304", downloadLocation=data_dir)
ad = sc.read(syn62289304)

## Extract cells from OPC and Oligo

In [None]:
ad_oligo = adata[adata.obs['cell_type_uni'].isin(['OPC', 'Oligo']), :]
ad_oligo

In [None]:
ad_oligo.write('files/integrated_OLIGO.h5ad')

## Similarity between two cohorts

In [None]:
# subclass
similarity_cell_type.similarity_cell_type_all(ad_oligo, 'subclass_uni', 'Integrated_subclass_uni', 5, 'files/figures//oligo_similarity_two_cohorts_subclass_uni_all.pdf')


In [None]:
similarity_cell_type.similarity_cell_type_all(ad_oligo, 'subtype_uni', 'Integrated_subtype_uni', 12, 'files/figures//oligo_similarity_two_cohorts_subtype_uni_all.pdf')


# Basic UMAP and UMAT analysis

In [None]:
%%sh
python lineage_analysis_embeddings_calculation.py \
-l files/integrated_OLIGO.h5ad \
-r 123456 \
-v scanpy@6000 \
-p half \
-n 100 \
-o files/integrated_OLIGO_con \
-k no

## UMAT visualization

In [None]:
adata_6k = sc.read('files/integrated_OLIGO_con_clustering.h5ad')
adata_6k

### stage_id

In [None]:
from pandas.api.types import CategoricalDtype
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id']
cat_dtype = CategoricalDtype(categories=adata_6k.uns['stage_order'], ordered=True)
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id_ord'].astype(cat_dtype)
adata_6k

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(adata_6k, basis = 'umat', color='stage_id', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=8,
               return_fig=True)
plt.savefig('files/figures/oligo_data_umat_stage_id.pdf', transparent=True, format='pdf')

### subclass_uni

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(adata_6k, basis = 'umat', color='subclass_uni', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_subclass,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=8,
               return_fig=True)
plt.savefig('files/figures/oligo_data_umat_subclass_uni.pdf', transparent=True, format='pdf')

In [None]:
adata_6k.write('files/integrated_OLIGO_con_clustering.h5ad')

# Pseudotime infer

In [None]:
%%sh

Rscript pseudotime_infer.r \
files/integrated_OLIGO_con_clustering.h5ad \
files/integrated_OLIGO_con

## Data visualization

In [None]:
cds <- readRDS("files/integrated_OLIGO_con_beforeConstruction_cds.RDS")
d <- readRDS("files/integrated_OLIGO_con_beforeConstruction_d.RDS")
print(cds)
print(head(d))
print(dim(d))

### subclass_uni

In [None]:
plot_cells(cds, color_cells_by = "subclass_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

### subtype_uni

In [None]:
plot_cells(cds, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)


### stage_id

In [None]:
plot_cells(cds, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)

### Age

In [None]:
plot_cells(cds, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)

## Principle graphs visualization

### unit: 500

In [None]:
# unit: 500
for(i in seq(0, 3, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(500*i+1):min(500*(i+1), 1379), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(500*i+1):min(500*(i+1), 1379), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 200

In [None]:
# unit: 200
for(i in seq(0, 7, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(200*i+1):min(200*(i+1), 1379), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(200*i+1):min(200*(i+1), 1379), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 100

In [None]:
# unit: 100
for(i in seq(0, 14, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(100*i+1):min(100*(i+1), 1379), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(100*i+1):min(100*(i+1), 1379), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

### unit: 50

In [None]:
# unit: 50
for(i in seq(0, 28, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.005) + geom_text_repel(data=d[(50*i+1):min(50*(i+1), 1379), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(50*i+1):min(50*(i+1), 1379), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

## Trajectory

### Start point

In [None]:
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1298), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1298), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### Oligo

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(43), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(43), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
# ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(915, 1311), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(915, 1311), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(1, 1311), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(1, 1311), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=0.01) + geom_text_repel(data=d[c(476, 1376), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(476, 1376), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


In [None]:
subclasses_to_keep <- c("OPC")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "numerical_age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

In [None]:
subclasses_to_keep <- c("Oligo")
cells_to_keep <- which(colData(cds)$subclass_uni %in% subclasses_to_keep)
cds_subset <- cds[, cells_to_keep]
plot_cells(cds_subset, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds_subset, color_cells_by = "numerical_age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)

## Reconstruction

In [None]:
# connect nodes
cds <- connect_nodes(cds, "Y_1", "Y_1311")
cds <- connect_nodes(cds, "Y_476", "Y_1376")


lineage <- "Oligo"
print(lineage)
start <- 1298
end <- 43
inc.node <- c("Y_1311", "Y_1376")
cds <- isolate_graph(cds, start, end, lineage, include_nodes = inc.node)
sel.cluster <- as.character(unique(colData(cds)[colData(cds)$subclass_uni %in% c("OPC", "Oligo"), ]$subtype_uni))
sel.cluster <- sel.cluster[!sel.cluster %in% c("OPC_MBP", "OPC_GPR17")]
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

cds <- combine_lineages(cds, start)

In [None]:
saveRDS(cds, file = "files/integrated_OLIGO_con_afterCombination_cds.RDS")


## Figure plots

In [None]:
p_pseudotime <- plot_cells(cds, color_cells_by = "pseudotime", 
                           trajectory_graph_color = "grey", 
                           trajectory_graph_segment_size = 1, 
                           label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        axis.title = element_blank(), 
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        legend.position="none")
ggsave("files/figures/oligo_umat_pseudotime.png", plot = p_pseudotime, width = 6, height = 6, dpi = 150)


p_pseudotime_w_legend <- plot_cells(cds, color_cells_by = "pseudotime",
                                    trajectory_graph_color = "grey", 
                                    trajectory_graph_segment_size = 1, 
                                    label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
ggsave("files/figures/oligo_umat_pseudotime_legend.png", plot = p_pseudotime_w_legend, width = 6, height = 6, dpi = 150)

# Maturation rate

In [None]:
pdf("files/figures/oligo_mature_ratio_regression_lineage.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_trajectory(cds, "OLIGO")
dev.off()

# Compress data along trajectories

In [None]:
print(names(cds@lineages))

In [None]:
cds <- compress_lineages(cds, 1298, N = 500, cores = 40)
print(cds)
print(length(cds@expression))

In [None]:
saveRDS(cds, file = "files/integrated_OLIGO_con_afterCombination_cds_compressed.RDS")

# Identify traDEGs

In [None]:
%%sh

Rscript identify_traDEGs.r \
files/integrated_OLIGO_con_afterCombination_cds.RDS \
Oligo \
1298 \
0.05 \
10000 \
40 \
files/traDEGs/OLIGO