# Load packages

In [None]:
import sys
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from matplotlib.colors import LinearSegmentedColormap
from sklearn.metrics.pairwise import cosine_similarity

# Inital setting for plot size
from matplotlib import rcParams
FIGSIZE=(6, 6)
rcParams['figure.figsize']=FIGSIZE

sc.settings.verbosity = 1
# set number of cores to use
sc.settings.n_jobs = 25
sc.settings.set_figure_params( dpi=300, fontsize=6)
sc.logging.print_header()

# Load python functions

## Initial settings: color, order, trait

In [None]:
sys.path.append('scripts')
from initial_settings import *

## Similarity of cell type

In [None]:
import similarity_cell_type

# Load R libraries

In [None]:
# monocle3 conda environment
# Load libraries
library(Seurat)
library(Signac)
library(ggplot2)
library(dplyr)
library(zellkonverter)
set.seed(222)
library(RColorBrewer)
library(viridis)
library(monocle3)
library(tidyselect)
library(grid)
library(mgcv)
library(colorspace)
library(ggrepel)
library(igraph)
library(pbapply)
library(devtools)
library(parallel)
library(evobiR)
library(tidyr)
library(cluster)
library(grDevices)
library(repr)
library(zoo)
library(ggnewscale)
library(VennDiagram)
library(ggridges)

In [None]:
# R (Py39_R43_Ju10) for pseudotime UMAT visualization
library(monocle3)
library(ggplot2)
library(cowplot)

# Load R functions

## Initial settings: color, order, trait

In [None]:
source("scripts/initial_settings.r")

## Load trajectory reconstruction and downstream analysis based on Kriegstein scripts

In [None]:
source("scripts/trajectory_reconstruction.r")

## Load defined functions

In [None]:
source("scripts/functions.r")

# Load data

In [None]:
adata = sc.read('files/integrated_adata.h5ad')
adata # 1,454,617 × 16,416

## Extract cells from Micro

In [None]:
ad_micro =  adata[(adata.obs['subclass'] == 'Micro') | (adata.obs['major_clust'] == 'Micro'), :]
ad_micro # 69,860 × 16,416

In [None]:
ad_micro.write('files/integrated_MICRO.h5ad')

## Similarity between two cohorts

In [None]:
similarity_cell_type.similarity_cell_type_all(ad_micro, 'subtype_uni', 'Integrated_subtype_uni', 6, 'files/figures/micro_similarity_two_cohorts_subtype_uni_all.pdf')


# Basic UMAP and UMAT analysis

In [None]:
%%sh

python ${dataPATH}/scripts/lineage_analysis_embeddings_calculation.py \
-l ${dataPATH}/files/integrated_MICRO.h5ad \
-r 123456 \
-v scanpy@6000 \
-p half \
-n 100 \
-o ${dataPATH}/files/integrated_MICRO \
-k no

## UMAT visualization

In [None]:
adata_6k = sc.read('files/integrated_MICRO_clustering.h5ad')
adata_6k # 69,860 × 16,126

### stage_id

In [None]:
from pandas.api.types import CategoricalDtype
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id']
cat_dtype = CategoricalDtype(categories=adata_6k.uns['stage_order'], ordered=True)
adata_6k.obs['stage_id_ord'] = adata_6k.obs['stage_id_ord'].astype(cat_dtype)
adata_6k

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(adata_6k, basis = 'umat', color='stage_id', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=8,
               return_fig=True)
plt.savefig('files/figures/micro_data_umat_stage_id.pdf', transparent=True, format='pdf')

In [None]:
adata_6k.write('files/integrated_MICRO_clustering.h5ad')

# Pseudotime infer

In [None]:
%%sh

Rscript ${dataPATH}/scripts/pseudotime_infer.r \
${dataPATH}/files/integrated_MICRO_clustering.h5ad \
${dataPATH}/files/integrated_MICRO

## Data visualization

In [None]:
cds <- readRDS("files/integrated_MICRO_beforeConstruction_cds.RDS")
d <- readRDS("files/integrated_MICRO_beforeConstruction_d.RDS")
print(cds)
print(head(d))
print(dim(d))

In [None]:
plot_cells(cds, color_cells_by = "subtype_uni", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, group_label_size = 3)
plot_cells(cds, color_cells_by = "stage_id", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)
plot_cells(cds, color_cells_by = "Age", label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F)

## Principle graphs visualization

### unit: 50

In [None]:
for(i in seq(0, 3, 1)){
    print(ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=1.5) + geom_text_repel(data=d[(50*i+1):min(50*(i+1), 145), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[(50*i+1):min(50*(i+1), 145), ]), size=2, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1))
}

## Trajectories

### Start point

In [None]:
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=1.5) + geom_text_repel(data=d[c(24), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(24), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)


### Micro

In [None]:
# terminal
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=1.5) + geom_text_repel(data=d[c(110), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(110), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

# connected
ggplot(data=d, aes(x=UMAP_1, y=UMAP_2)) + geom_point(size=1.5) + geom_text_repel(data=d[c(62, 119), ], aes(x=UMAP_1, y=UMAP_2), label=rownames(d[c(62, 119), ]), size=5, hjust = 2, color = "red", max.overlaps = Inf, segment.size = 0.1)

## Reconstruction

In [None]:
# connect nodes
cds <- connect_nodes(cds, "Y_62", "Y_119")

# select individual lineages
lineage <- "Micro"
print(lineage)
start <- 24
end <- 110 
cds <- isolate_graph(cds, start, end, lineage)
sel.cluster = c("Micro", "Micro_out")
cds <- isolate_lineage(cds, lineage, sel_clusters = sel.cluster, cl = 20, N = 10)

cds = combine_lineages(cds, start)
cds

In [None]:
saveRDS(cds, file = "files/integrated_MICRO_afterCombination_cds.RDS")


## Figure plots

### Pseudotime

In [None]:
p_pseudotime <- plot_cells(cds, color_cells_by = "pseudotime", 
                           trajectory_graph_color = "grey", 
                           trajectory_graph_segment_size = 1, 
                           label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        axis.title = element_blank(), 
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        legend.position="none")
ggsave("files/figures/micro_umat_pseudotime.png", plot = p_pseudotime, width = 6, height = 6, dpi = 150)

In [None]:
p_pseudotime_legend <- plot_cells(cds, color_cells_by = "pseudotime",
                                    trajectory_graph_color = "grey", 
                                    trajectory_graph_segment_size = 1, 
                                    label_leaves = F, label_branch_points = F, label_principal_points = F, label_groups_by_cluster = F, label_cell_groups = F, cell_size = 0.5, cell_stroke = 0.3) + 
        theme(axis.text = element_blank(), 
          axis.ticks = element_blank(),
          axis.title = element_blank(), 
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
legend <- cowplot::get_legend(p_pseudotime_legend)
pdf("files/figures/micro_umat_pseudotime_legend_only.pdf")
    grid::grid.draw(legend)
dev.off()

# Maturation rate of lineage

In [None]:
pdf("files/figures/micro_mature_ratio_regression_lineage.pdf", width = 4.5, height = 4.5)
regress_pt_along_age_trajectory(cds, "MICRO")
dev.off()

# Compress data along trajectories

In [None]:
cds <- compress_lineages(cds, 24, N = 500, cores = 40)
print(cds)
print(length(cds@expression))

In [None]:
saveRDS(cds, file = "files/integrated_MICRO_afterCombination_cds_compressed.RDS")

# Identify traDEGs

In [None]:
%%sh

Rscript ${dataPATH}/scripts/identify_traDEGs.r \
${dataPATH}/files/integrated_MICRO_afterCombination_cds.RDS \
${sub_lineage} \
24 \
0.05 \
10000 \
40 \
${dataPATH}/files/traDEGs/MICRO