# Load python packages

In [None]:
# sc
import scdrs
import pegasus as pg
import scanpy as sc
import anndata as ad

# plotting
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import seaborn as sns

# data
import numpy as np
import pandas as pd
import os
import csv
import glob
import re
import pynndescent
from scipy.stats import zscore
from joblib import dump
from joblib import load

# path
import sys

# Load python functions

## Initial settings: color, order, traits

In [None]:
sys.path.append('scripts')
from initial_settings import *
from functions import *

# Load R libraries

In [None]:
library(monocle3)
library(data.table)
library(ggplot2)
library(RColorBrewer)
library(splines)
require(speedglm)
library(pheatmap)

# Load R functions

## Initial settings: color, order, trait

In [None]:
source("scripts/initial_settings.r")

## Load trajectory reconstruction and downstream analysis based on Kriegstein scripts

In [None]:
source("scripts/trajectory_reconstruction.r")

## Load defined functions

In [None]:
source("scripts/functions.r")

# Single-cell level

In [None]:
dict_df_score = scDRS_calculate_simple(file_h5ad='files/integrated_adata.h5ad', 
                                       file_gs='files/all_ms_geneset_newOrd_agingsubset.gs')

In [None]:
dump(dict_df_score, 'files/scdrs/scDRS_sc.joblib', compress=3)

In [None]:
for key, value in dict_df_score.items():
    print(key)

## Export each trait to csv file

In [None]:
for key, value in dict_df_score.items():
    if isinstance(value, pd.DataFrame):
        value.to_csv(f'files/scdrs/scDRS_sc_{key}.csv')

# Compressed scDRS

## Excitatory neuron

### Load data

In [None]:
cds_EN <- readRDS("files/integrated_EN_afterCombination_cds_compressed.RDS")
cds_EN

### All traits

In [None]:
scdrs_EN <- compress_scdrs_traits(cds_EN, names(trait_info), 3172, cell_type = "EN", window = F, N = 500, cores = 20)


In [None]:
for(sub_trait in c("sz3", "bip2", "mdd_ipsych", "asd", "adhd_ipsych", "insomn2", "eduAttainment", "intel", "alcohilism_2019", "tourette", "obesity", "dm2")){
    # Plot regressed lines
    trait_res_ggplot <- data.frame(Pseudotime = c(), scDRS = c(), Lineage = c())
    for(sub_lin in names(cds_EN@lineages)){
        trait_res_ggplot <- rbind(trait_res_ggplot, data.frame(Pseudotime = as.numeric(unlist(cds_EN@pseudotime[[sub_lin]])), 
                                                               scDRS = scdrs_EN[[sub_trait]][[sub_lin]], 
                                                               Lineage = rep(sub_lin, 500)))
    }
    p <- ggplot(trait_res_ggplot, aes(x=Pseudotime, y=scDRS, color=Lineage)) + 
        scale_color_manual(values = cols_traj) +
        geom_smooth(method = "lm", formula = y ~ ns(x, df = 3), se = TRUE) +
        geom_hline(yintercept = 0, linetype = "44", color = "black", size = 1) +
        scale_y_continuous(limits = c(-0.5, 1.5)) +
        labs(title = trait_info[sub_trait]) +
        theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) +
        theme_classic()
    pdf(paste0("files/figures/en_scdrs_zscore_trendline_", sub_trait, ".pdf"), width = 5, height = 5)
        print(p)
    dev.off()
}

#### Heatmap

In [None]:
scdrs_fitted_EN <- obtain_fitted_scdrs(cds_EN, "files/scdrs/scDRS_compressed_zscore_EN.RDS")
plot_heatmap_fitted_scdrs(scdrs_fitted_EN, 
                          lineages = names(cds_EN@lineages), 
                          branches = c(rep("Deep-non-IT", 1500), rep("Deep-IT", 1000), rep("Upper-IT", 2000)),
                          n_metacell = 500, 
                          out_file = "files/figures/en_scdrs_zscore_heatmap.pdf",
                          figure_width = 5,
                          figure_height = 5,
                          vmin = -max(abs(unlist(scdrs_fitted_EN))), 
                          vmax = max(abs(unlist(scdrs_fitted_EN))))


## Astrocyte

### Load data

In [None]:
cds_AST <- readRDS("files/integrated_AST_afterCombination_cds_compressed.RDS")
cds_AST

### All traits

In [None]:
scdrs_AST <- compress_scdrs_traits(cds_AST, names(trait_info), 861, cell_type = "AST", window = F, N = 500, cores = 20)

In [None]:
scdrs_AST <- readRDS("files/scdrs/scDRS_compressed_zscore_AST.RDS")

In [None]:
for(sub_trait in c("eduAttainment", "migraines_2021", "dm2")){
    # Plot regressed lines
    trait_res_ggplot <- data.frame(Pseudotime = c(), scDRS = c(), Lineage = c())
    for(sub_lin in names(cds_AST@lineages)){
        trait_res_ggplot <- rbind(trait_res_ggplot, data.frame(Pseudotime = as.numeric(unlist(cds_AST@pseudotime[[sub_lin]])), 
                                                               scDRS = scdrs_AST[[sub_trait]][[sub_lin]], 
                                                               Lineage = rep(sub_lin, 500)))
    }
    p <- ggplot(trait_res_ggplot, aes(x=Pseudotime, y=scDRS, color=Lineage)) + 
        scale_color_manual(values = cols_traj) +
        geom_smooth(method = "lm", formula = y ~ ns(x, df = 3), se = TRUE) +
        geom_hline(yintercept = 0, linetype = "44", color = "black", size = 1) +
        scale_y_continuous(limits = c(-0.5, 1.5)) +
        labs(title = trait_info[sub_trait]) +
        theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) +
        theme_classic()
    pdf(paste0("files/figures/ast_scdrs_zscore_trendline_", sub_trait, ".pdf"), width = 5, height = 5)
        print(p)
    dev.off()
}

#### Heatmap

In [None]:
names(cds_AST@lineages)

In [None]:
scdrs_fitted_AST <- obtain_fitted_scdrs(cds_AST, "files/scdrs/scDRS_compressed_zscore_AST.RDS")
plot_heatmap_fitted_scdrs(scdrs_fitted_AST, 
                          lineages = names(cds_AST@lineages), 
                          branches = c(rep("PA", 500), rep("FA", 500)),
                          n_metacell = 500, 
                          out_file = "files/figures/ast_scdrs_zscore_heatmap.pdf",
                          figure_width = 5,
                          figure_height = 5,
                          vmin = -max(abs(unlist(scdrs_fitted_AST))), 
                          vmax = max(abs(unlist(scdrs_fitted_AST))))

## Inhibitory neuron

### Load data

In [None]:
cds_IN <- readRDS("files/integrated_IN_afterCombination_cds_compressed.RDS")
cds_IN

### All traits

In [None]:
scdrs_IN <- compress_scdrs_traits(cds_IN, names(trait_info), 4319, cell_type = "IN", window = F, N = 500, cores = 40)

In [None]:
names(cds_IN@pseudotime)

In [None]:
scdrs_IN <- readRDS"files/scdrs/scDRS_compressed_zscore_IN.RDS")

In [None]:
for(sub_trait in c("sz3", "bip2", "mdd_ipsych", "asd", "adhd_ipsych", "insomn2", "eduAttainment", "intel", "alcoholism_2019", "ocd", "tourette", "obesity", "dm2")){
    print(sub_trait)
    # Plot regressed lines
    trait_res_ggplot <- data.frame(Pseudotime = c(), scDRS = c(), Lineage = c())
    for(sub_lin in names(cds_IN@lineages)){
        trait_res_ggplot <- rbind(trait_res_ggplot, data.frame(Pseudotime = as.numeric(unlist(cds_IN@pseudotime[[sub_lin]])), 
                                                               scDRS = scdrs_IN[[sub_trait]][[sub_lin]], 
                                                               Lineage = rep(sub_lin, 500)))
    }
    p <- ggplot(trait_res_ggplot, aes(x=Pseudotime, y=scDRS, color=Lineage)) + 
        scale_color_manual(values = cols_traj) +
        geom_smooth(method = "lm", formula = y ~ ns(x, df = 3), se = TRUE) +
        geom_hline(yintercept = 0, linetype = "44", color = "black", size = 1) +
        scale_y_continuous(limits = c(-0.5, 1.5)) +
        labs(title = trait_info[sub_trait]) +
        theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) +
        theme_classic()
    pdf(paste0("files/figures/in_scdrs_zscore_trendline_", sub_trait, ".pdf"), width = 5, height = 5)
        print(p)
    dev.off()
}

#### Heatmap

In [None]:
names(cds_IN@lineages)

In [None]:
scdrs_fitted_IN <- obtain_fitted_scdrs(cds_IN, "files/scdrs/scDRS_compressed_zscore_IN.RDS")
plot_heatmap_fitted_scdrs(scdrs_fitted_IN, 
                          lineages = names(cds_IN@lineages), 
                          branches = c(rep("MGE", 1500), rep("CGE", 4000)),
                          n_metacell = 500, 
                          out_file = "files/figures/in_scdrs_zscore_heatmap.pdf",
                          figure_width = 5,
                          figure_height = 5,
                          vmin = -max(abs(unlist(scdrs_fitted_IN))), 
                          vmax = max(abs(unlist(scdrs_fitted_IN))))

## Microglia

### Load data

In [None]:
cds_MICRO <- readRDS("files/integrated_MICRO_afterCombination_cds_compressed.RDS")
cds_MICRO

### All traits

In [None]:
scdrs_MICRO <- compress_scdrs_traits(cds_MICRO, names(trait_info), 24, cell_type = "MICRO", window = F, N = 500, cores = 20)

In [None]:
scdrs_MICRO <- readRDS("files/scdrs/scDRS_compressed_zscore_MICRO.RDS")

In [None]:
for(sub_trait in names(scdrs_MICRO)){
    # Plot regressed lines
    trait_res_ggplot <- data.frame(xValue = c(), yValue = c(), group = c())
    for(sub_lin in names(cds_MICRO@lineages)){
        trait_res_ggplot <- rbind(trait_res_ggplot, data.frame(xValue = as.numeric(unlist(cds_MICRO@pseudotime[[sub_lin]])), 
                                                               yValue = scdrs_MICRO[[sub_trait]][[sub_lin]], 
                                                               group = rep(sub_lin, 500)))
    }
    p <- ggplot(trait_res_ggplot, aes(x=xValue, y=yValue, color=group)) + 
        scale_color_manual(values = cols_traj) +
        geom_smooth(method = "lm", formula = y ~ ns(x, df = 3), se = TRUE) +
        geom_hline(yintercept = 0, linetype = "44", color = "black", size = 1) +
        scale_y_continuous(limits = c(-2, 2.2)) +
        labs(title = paste("MICRO", trait_info[sub_trait], sep = "_")) +
        theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
        theme_classic()
    print(p)
}

#### Heatmap

In [None]:
scdrs_fitted_MICRO <- obtain_fitted_scdrs(cds_MICRO, "files/scdrs/scDRS_compressed_zscore_MICRO.RDS")
plot_heatmap_fitted_scdrs(scdrs_fitted_MICRO, 
                          lineages = names(cds_MICRO@lineages), 
                          branches =rep("Micro", 500),
                          n_metacell = 500, 
                          out_file = "files/figures/micro_scdrs_zscore_heatmap.pdf",
                          figure_width = 5,
                          figure_height = 5,
                          vmin = -max(abs(unlist(scdrs_fitted_MICRO))), 
                          vmax = max(abs(unlist(scdrs_fitted_MICRO))))

## Oligodendrocyte

### Load data

In [None]:
cds_OLIGO <- readRDS("files/integrated_OLIGO_afterCombination_cds_compressed.RDS")
cds_OLIGO

### All traits

In [None]:
scdrs_OLIGO <- compress_scdrs_traits(cds_OLIGO, names(trait_info), 1298, cell_type = "OLIGO", window = F, N = 500, cores = 20)

In [None]:
for(sub_trait in c("sz3", "bip2", "mdd_ipsych", "asd", "adhd_ipsych", "insomn2", "eduAttainment", "intel", "alcohilism_2019", "tourette", "obesity", "dm2")){
    # Plot regressed lines
    trait_res_ggplot <- data.frame(Pseudotime = c(), scDRS = c(), Lineage = c())
    for(sub_lin in names(cds_OLIGO@lineages)){
        trait_res_ggplot <- rbind(trait_res_ggplot, data.frame(Pseudotime = as.numeric(unlist(cds_OLIGO@pseudotime[[sub_lin]])), 
                                                               scDRS = scdrs_OLIGO[[sub_trait]][[sub_lin]], 
                                                               Lineage = rep(sub_lin, 500)))
    }
    p <- ggplot(trait_res_ggplot, aes(x=Pseudotime, y=scDRS, color=Lineage)) + 
        scale_color_manual(values = cols_traj) +
        geom_smooth(method = "lm", formula = y ~ ns(x, df = 3), se = TRUE) +
        geom_hline(yintercept = 0, linetype = "44", color = "black", size = 1) +
        scale_y_continuous(limits = c(-0.5, 1.5)) +
        labs(title = trait_info[sub_trait]) +
        theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) +
        theme_classic()
    pdf(paste0("files/figures/oligo_scdrs_zscore_trendline_", sub_trait, ".pdf"), width = 5, height = 5)
        print(p)
    dev.off()
}

#### Heatmap

In [None]:
scdrs_fitted_OLIGO <- obtain_fitted_scdrs(cds_OLIGO, "files/scdrs/scDRS_compressed_zscore_OLIGO.RDS")
plot_heatmap_fitted_scdrs(scdrs_fitted_OLIGO, 
                          lineages = names(cds_OLIGO@lineages), 
                          branches = rep("Oligo", 500),
                          n_metacell = 500, 
                          out_file = "files/figures/oligo_scdrs_zscore_heatmap.pdf",
                          figure_width = 5,
                          figure_height = 5,
                          vmin = -max(abs(unlist(scdrs_fitted_OLIGO))), 
                          vmax = max(abs(unlist(scdrs_fitted_OLIGO))))