# Load R libraries

In [None]:
# Load required libraries
library(STRINGdb)
library(stringr)
library(scales)
library(dplyr)
library(tidyr)
library(igraph)
library(decoupleR) # For TF list

library(RColorBrewer)
library(pheatmap)

library(readxl)
library(tidyverse)
library(reshape2)
library(ggplot2)
library(ggforce)
library(janitor)
library(purrr)
library(patchwork)

library(ggraph)
library(tidygraph)

# Load R functions

In [None]:
source("functions.r")

# Data path

In [None]:
dataPATH_traDEGs = "files/tables/"
dataPATH_trajectories = "data/inhouse/"

# Load data

## Load TF-target prior network

In [None]:
net <- decoupleR::get_collectri(organism = 'human', 
                                split_complexes = FALSE)

net

In [None]:
tf_list <- unique(net$source)
length(tf_list)

## traDEG modules

In [None]:
modules <- list(
    AST_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "AST_cluster_dev.csv"), header = F)$V1), 
    AST_dev_mat = as.character(read.csv(file.path(dataPATH_traDEGs, "AST_cluster_dev-mat.csv"), header = F)$V1),
    AST_PA_spec_whole = as.character(read.csv(file.path(dataPATH_traDEGs, "AST_cluster_PA-spec whole.csv"), header = F)$V1), 
    AST_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "AST_cluster_mat-aging.csv"), header = F)$V1),
    AST_FA_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "AST_cluster_FA-spec mat-aging.csv"), header = F)$V1), 
    AST_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "AST_cluster_aging.csv"), header = F)$V1),
    MICRO_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "MICRO_cluster_dev.csv"), header = F)$V1), 
    MICRO_dev_mat = as.character(read.csv(file.path(dataPATH_traDEGs, "MICRO_cluster_dev-mat.csv"), header = F)$V1),
    MICRO_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "MICRO_cluster_mat-aging.csv"), header = F)$V1), 
    MICRO_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "MICRO_cluster_aging.csv"), header = F)$V1),
    OLIGO_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "OLIGO_cluster_dev.csv"), header = F)$V1), 
    OLIGO_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "OLIGO_cluster_mat-aging.csv"), header = F)$V1),
    EN_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_dev.csv"), header = F)$V1), 
    EN_Deep_layer_spec_dev_mat = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_Deep-layer-spec dev-mat.csv"), header = F)$V1),
    EN_Upper_layer_spec_whole = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_Upper-layer-spec whole.csv"), header = F)$V1), 
    EN_Deep_layer_spec_mat = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_Deep-layer-spec mat.csv"), header = F)$V1),
    EN_L5_6_NP_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_L5_6_NP-spec mat-aging.csv"), header = F)$V1), 
    EN_Deep_non_IT_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_Deep-non-IT-spec mat-aging.csv"), header = F)$V1),
    EN_Upper_layer_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_Upper-layer-spec mat-aging.csv"), header = F)$V1), 
    EN_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "EN_cluster_mat-aging.csv"), header = F)$V1),
    IN_CGE_spec_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_CGE-spec dev.csv"), header = F)$V1), 
    IN_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_dev.csv"), header = F)$V1),
    IN_MGE_spec_dev = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_MGE-spec dev.csv"), header = F)$V1), 
    IN_sporadic_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_sporadic mat-aging.csv"), header = F)$V1),
    IN_PVALB_CHC_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_PVALB_CHC-spec mat-aging.csv"), header = F)$V1), 
    IN_LAMP5_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_LAMP5-spec mat-aging.csv"), header = F)$V1),
    IN_ADARB2_spec_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_ADARB2-spec mat-aging.csv"), header = F)$V1), 
    IN_mat_aging = as.character(read.csv(file.path(dataPATH_traDEGs, "IN_cluster_mat-aging.csv"), header = F)$V1)
)

## Initialize STRINGdb

In [None]:
# Initialize STRINGdb
string_db <- STRINGdb$new(version = "12.0", species = 9606, score_threshold = 900)
string_db # 

# TF inference and PPI network construction

In [None]:
# Initilize
tf_presence_3_5_all_ratio_0.02 <- as.data.frame(matrix(0, nrow = length(tf_list), ncol = length(modules), dimnames = list(tf_list, names(modules))))

for(sub_celltype in c("AST", "MICRO", "OLIGO", "EN", "IN")){
    # Load data
    cds <- readRDS(paste0("files/integrated_", sub_celltype, "_con_afterCombination_cds_compressed.RDS"))
    
    # Focus on modules associated with specific cell type
    for(sub_module in names(modules)[startsWith(names(modules), sub_celltype)]){
        genes <- modules[[sub_module]]
        lineages <- names(cds@lineages)
        if(length(genes) < 50){
            pt_tfs <- identify_pt_tfs_ratio(net, cds, genes, lineages, 3, 0.02)
        }else{
            pt_tfs <- identify_pt_tfs_ratio(net, cds, genes, lineages, 5, 0.02)
        }
        print(paste0("Length of inferred TFs for ", sub_module, " : ", length(pt_tfs)))
        print(pt_tfs)
        
        df_celltype <- data.frame(gene = genes, extTF = FALSE)
        if(length(pt_tfs) > 0) df_celltype <- rbind(df_celltype, data.frame(gene = setdiff(pt_tfs, df_celltype$gene), extTF = TRUE))
        print(paste0("Number of genes to infer PPI: ", nrow(df_celltype)))
        
        # PPI networks
        pdf(paste0("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_Gene_", sub_module, ".pdf"), width = 6, height = 6)
        p_all <- ppi_per_celltype_tf_gene(df_celltype, "", sub_module, tf_list)
        saveRDS(object = p_all, file = paste0("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_", sub_module, ".rds"))
        dev.off()
        if(!is.null(p_all)) tf_presence_3_5_all_ratio_0.02[rownames(tf_presence_3_5_all_ratio_0.02) %in% V(p_all)$gene, sub_module] <- 0.5
        
        # PPI networks not less than 5 nodes
        node_count <- nrow(g$data)
        size_factor <- sqrt(node_count) / 10   # tweak "6" to control scaling
        pdf_size <- max(1.6, min(5.2,  4.8 * size_factor))  # cap between 4" and 6"

        pdf(paste0("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_Gene_sel_5_", sub_module, ".pdf"), width = pdf_size, height = pdf_size)
        p <- ppi_per_celltype_tf_gene_sel_5(df_celltype, "", sub_module, tf_list)
        saveRDS(object = p, file = paste0("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_Gene_sel_5_", sub_module, ".rds"))
        dev.off()
        if(!is.null(p)) tf_presence_3_5_all_ratio_0.02[rownames(tf_presence_3_5_all_ratio_0.02) %in% V(p)$gene, sub_module] <- 1
        
        
    }
}

tf_presence_3_5_all_ratio_0.02

# Key TFs' presence in networks ((Comp >= 5 & Degree >=3))

In [None]:
res_all <- plotKeyTFsPresence(names(modules), n_header = 1000)
res_all

In [None]:
pdf("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_keyTFs_all_together.pdf", width = 6.5, height = 5.5)
res_all$dot_plot
dev.off()

## Check activities in the developing human neocortex

In [None]:
# Load data from Excel
file_path <- "data/public/Kriegstein_2025_Nature/Supplementary Table 15.xlsx"

In [None]:
expr_activator <- read_excel(file_path, sheet = "Supplementary Table 15a", skip = 1) %>% clean_names()
auc_region_activator <- read_excel(file_path, sheet = "Supplementary Table 15b", skip = 1) %>% clean_names()
auc_gene_activator <- read_excel(file_path, sheet = "Supplementary Table 15c", skip = 1) %>% clean_names()

In [None]:
tfs_of_interest <- c()
tfs_of_interest_list <- list()
for(sub_celltype in c("EN", "IN", "AST", "MICRO", "OLIGO")){   
    # Focus on modules associated with specific cell type
    for(sub_module in names(modules)[startsWith(names(modules), sub_celltype) & endsWith(names(modules), "dev")]){
        p <- readRDS(paste0("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_Gene_sel_5_", sub_module, ".rds"))
        if(!is.null(p)){
            tf_high_deg_sorted <- tibble(gene = V(p)$gene, 
                                         degree = V(p)$degree, 
                                         is_TF = V(p)$is_TF) %>%
            filter(is_TF, degree >= 3) %>%
            arrange(desc(degree))
            
            tfs_of_interest <- union(tfs_of_interest, tf_high_deg_sorted$gene)
            tfs_of_interest_list[[sub_module]] <- tf_high_deg_sorted$gene
        }
    }
}
tfs_of_interest_fnl <- intersect(tfs_of_interest, expr_activator$transcription_factor)

In [None]:
celltypes_ordered <- c('rg_v_rg', 'rg_t_rg', 'rg_o_rg', 'ipc_en', 'en_newborn', 'en_it_immature', 'en_l2_3_it', 'en_l4_it', 'en_l5_it', 'en_non_it_immature', 'en_l6_it', 'en_l5_et', 'en_l5_6_np', 'en_l6_ct', 'en_l6b', 'in_d_lge_immature', 'in_cge_immature', 'in_cge_vip', 'in_cge_sncg', 'in_mix_lamp5', 'in_mge_immature', 'in_mge_sst', 'in_mge_pv', 'ipc_glia', 'astrocyte_immature', 'astrocyte_protoplasmic', 'astrocyte_fibrous', 'opc', 'oligodendrocyte_immature', 'oligodendrocyte')

# Filter and reshape all datasets with normalization per TF
expr_act <- longify_norm(expr_activator, "expr_act", tfs_of_interest_fnl, celltypes_ordered)
auc_r_act <- longify_norm(auc_region_activator, "auc_r_act", tfs_of_interest_fnl, celltypes_ordered)
auc_g_act <- longify_norm(auc_gene_activator, "auc_g_act", tfs_of_interest_fnl, celltypes_ordered)

# Merge all into one

merged_df <- purrr::reduce(list(expr_act, auc_r_act, auc_g_act), 
                    full_join, by = c("transcription_factor", "celltype")) %>%
  rename(TF = transcription_factor)

# Create matrix coordinates
tile_data <- merged_df %>%
  mutate(row = as.numeric(factor(TF, levels = rev(tfs_of_interest_fnl))),
         col = as.numeric(celltype))

# Generate square polygons
squares <- pmap_dfr(tile_data, function(TF, celltype, expr_act, auc_r_act, auc_g_act, row, col) {
  gen_square(col, row) %>% 
    mutate(fill = expr_act, TF = TF, celltype = celltype, group = paste0(TF, celltype))
})

# Compute circle center
offset <- 0

circle_data <- tile_data %>%
  mutate(
    x_circ = col,
    y_circ = row
  )

# Plot
p <- ggplot() +
  geom_polygon(data = squares, aes(x = x, y = y, group = group, fill = fill), color = "gray70") +
  geom_point(data = circle_data, aes(x = x_circ, y = y_circ, size = auc_r_act, fill = auc_g_act), shape = 21, color = "black", stroke = 0.3) +
  scale_fill_gradientn(colors = rev(colorRampPalette(RColorBrewer::brewer.pal(11, "RdYlBu"))(100)), name = "TF Expression / AUC (gene)") +
  scale_size_continuous(range = c(0.5, 4), name = "AUC (region)") +
  scale_x_continuous(breaks = unique(tile_data$col), labels = levels(tile_data$celltype), expand = c(0, 0)) +
  scale_y_continuous(breaks = unique(tile_data$row), labels = unique(tile_data$TF), expand = c(0, 0)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text = element_text(size = 10),
        axis.title = element_blank())


print(p)


## Top 3 according to degree for each modules

In [None]:
for(sub_celltype in c("AST", "MICRO", "OLIGO", "EN", "IN")){
    modules_sel <- names(modules)[startsWith(names(modules), sub_celltype)]
    res_sel <- plotKeyTFsPresence(modules_sel)
    
    pdf(paste0("data/inhouse/string-db/traDEG_modules/r_PPI_ext_TF_3_5_all_ratio_0.02_keyTFs_", sub_celltype, ".pdf"), width = 6, height = 4)
    res_sel$dot_plot
    dev.off()
}