In [None]:
# importing libraries
library(ggplot2)
library(dplyr)
library(stringr)

In [None]:
#make a look-up map for the in vitro data
datasets <- c("han11.1", "han11.2", "han12","han13",
             "polio", "liu","herr22", "herr24", "herr34",
             "herr2d", "cou13", "cou17", "cou19") 

source_values <- c("Han_GW11.1" = "han11.1",
                   "Han_GW11.2" = "han11.2",
                   "Han_GW12" = "han12",
                   "Han_GW13"= "han13",
                   "Couturier_GW13"= "cou13",
                   "Couturier_GW17"= "cou17",
                   "Couturier_GW19"= "cou19",
                   "Polioudakis_GW17-18"= "polio",
                   "Liu_GW17-19"= "liu",
                   "Herring_GW22"= "herr22",
                   "Herring_GW24"= "herr24",
                   "Herring_GW34"= "herr34",
                   "Herring_Day2"= "herr2d"
)

In [None]:
#Load each csv file, calculate the proportion, and combine into one dataframe for the in vivo data

# Initialize an empty dataframe for storing combined data
vivo <- data.frame()
# Loop through each datasets
for (dataset in datasets) {
  # Construct the filename for each datasets
  csv_file <- paste0(dataset, "_sctype25_7.csv")
  
  # Read the csv with stringsAsFactors set to FALSE
  df <- read.csv(csv_file, stringsAsFactors = FALSE)
  
  # Calculate the proportion
  df <- df %>%
    group_by(type) %>%
    summarize(total_cells = sum(ncells), .groups = "drop") %>%
    mutate(percentage = (total_cells / sum(total_cells)) * 100)
  
  # Add source as a column in the df 
  source_name <- names(source_values)[source_values == dataset]
  df$source <- source_name
  
  # Combine into one dataframe
  if (nrow(vivo) == 0) {
    vivo <- df
  } else {
    vivo <- rbind(vivo, df)
  }
}
  

In [None]:
#Load each csv file, calculate the proportion, and combine into one dataframe for the in vivo interneuron data

# Initialize an empty dataframe for storing combined data
vivo_gb <- data.frame()
# Loop through each datasets
for (dataset in datasets) {
  # Construct the filename for each datasets
  csv_file <- paste0(dataset, "_sctype_INs_25_7.csv")
  
  # If the file does not exist, print a warning and skip to the next dataset
  if (!file.exists(csv_file)) {
    print(paste("Warning: File", csv_file, "does not exist. Skipping..."))
    next
  }
  
  # Read the csv with stringsAsFactors set to FALSE
  df <- read.csv(csv_file, stringsAsFactors = FALSE)
  
  # Calculate the proportion
  df <- df %>%
    group_by(type) %>%
    summarize(total_cells = sum(ncells), .groups = "drop") %>%
    mutate(percentage = (total_cells / sum(total_cells)) * 100)
  
  # Add source as a column in the df
  source_name <- names(source_values)[source_values == dataset]
  
  # If no match is found, it's a problem
  if(length(source_name) == 0) {
    stop(paste("No source value found for dataset:", dataset))
  }
  
  df$source <- source_name
  
  # Combine into one dataframe
  if (nrow(vivo_gb) == 0) {
    vivo_gb <- df
  } else {
    vivo_gb <- rbind(vivo_gb, df)
  }
}


In [None]:
#Load the in vitro csv files and calculate the proportions

#Load the csv files
vitro <- read.csv("outputvitro_int_sctype.csv")
vitro_gb <- read.csv("outputvitro_gaba_int_sctype.csv")

#Calculate the proportion for each cell type per source as a percentage
vitro <- vitro %>%
  group_by(source, cell_type) %>%
  summarize(total_cells = sum(ncells), .groups = "drop") %>%
  group_by(source) %>%
  mutate(percentage = (total_cells / sum(total_cells)) * 100)

vitro_gb <- vitro_gb %>%
  group_by(source, cell_type) %>%
  summarize(total_cells = sum(ncells), .groups = "drop") %>%
  group_by(source) %>%
  mutate(percentage = (total_cells / sum(total_cells)) * 100)

In [None]:
#Combine in vivo and in vitro data into one df

#Rename column in vivo so it's the same as vitro
vivo <- vivo %>% rename(cell_type = type)
vivo_gb <- vivo_gb %>% rename(cell_type = type)

#All Cell Types from the first round of annotations
vivo_vitro <- rbind(vivo,vitro)

#All Cell Types from the second round of annotations:interneurons
vivo_vitro_gb <- rbind(vivo_gb,vitro_gb)

In [None]:
#Wrangle the dataframe so cell types that are not present are given a value of 0
df_all_combinations <- expand.grid(source = unique(vivo_vitro$source), cell_type = unique(vivo_vitro$cell_type))
# Merge the new data frame with the original data frame
combo_full <- merge(df_all_combinations, vivo_vitro, by = c("source", "cell_type"), all.x = TRUE)

combo_full <- replace(combo_full, is.na(combo_full), 0)


In [None]:
#Set the levels for the data for the specified order in the bar plot, and give each dataset a specific color
combo_full$source<- factor(combo_full$source, levels = c("Han_GW11.1","Han_GW11.2", "Han_GW12", "Han_GW13", "Couturier_GW13", 
                                                         "Couturier_GW17", "Couturier_GW19","Polioudakis_GW17-18", "Liu_GW17-19",
                                                         "Herring_GW22", "Herring_GW24", "Herring_GW34", "Herring_Day2",
                                                         "Bhaduri_H1S_5w","Bhaduri_H1X_5w", "Bhaduri_H1S_8w",
                                                         "Bhaduri_H1X_8w","Bhaduri_H1S_10w", "Bhaduri_H1X_10w",
                                                         "Trujillo_4w", "Trujillo_12w", "Trujillo_24w", "Trujillo_40w",
                                                         "Xiangb1_4w", "Xiangb2_4w","Xiang_10w","Xiang_11w",
                                                         "Fair_12w", "Fair_20w","Velasco_12w","Velasco_24w",
                                                         "Popova_batch1_7w", "Popova_batch2_7w",
                                                         "Giandomenico_10w", "Madhavan_12w"), ordered = TRUE)

#Define custom color palette
custom_colors <- c( "Han_GW11.1"="#D3D3D3","Han_GW11.2"=	"#A9A9A9", "Han_GW12"="#808080", "Han_GW13"= "#696969", "Couturier_GW13"="#FFD700", 
                    "Couturier_GW17"="#DAA520", "Couturier_GW19"="#B8860B","Polioudakis_GW17-18"="#402C49", "Liu_GW17-19"="#BC8F8F",
                    "Herring_GW22"="#F4A460", "Herring_GW24"="#D2691E", "Herring_GW34"="#A0522D", "Herring_Day2"="#8B4513",
  "Bhaduri_H1S_5w"="#6495ED", "Bhaduri_H1X_5w"= "#1E90FF", "Bhaduri_H1S_8w"="#0000FF", 
  "Bhaduri_H1X_8w"="#0000CD", "Bhaduri_H1S_10w"="#00008B","Bhaduri_H1X_10w"="#000080", "Fair_12w"="#40E0D0",
  "Fair_20w"="#48D1CC", "Giandomenico_10w"="#008080", "Madhavan_12w"="#C44733", 
  "Popova_batch1_7w"="#9E7DBA", "Popova_batch2_7w"= "#896FA1", "Trujillo_4w"="#A9414B", 
  "Trujillo_12w"="#9B2A35", "Trujillo_24w"="#841E28","Trujillo_40w"="#730F19", "Velasco_12w"="#DEB887",
  "Velasco_24w" = "#D2B48C","Xiangb1_4w"="#599035", "Xiangb2_4w"="#598A38","Xiang_10w" = "#426828","Xiang_11w"="#2F5416")


In [None]:
#Make a bar chart for all of the cell types
bar <- ggplot(combo_full, aes(fill = source, y = percentage, x = cell_type)) + 
  geom_bar(position = "dodge", stat = "identity", width = 0.8)

bar + labs(title="scType Annotation", y = "Proportion (%)", fill ="scRNAseq Data") + 
  theme(axis.title.x = element_blank()) + scale_fill_manual(values = custom_colors) +
  scale_x_discrete(labels = function(x) 
    stringr::str_wrap(x, width = 15)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  theme(axis.text = element_text(size = 9)) +
  theme(panel.background = element_blank(), axis.line = element_line(colour = "black")) 

In [None]:
#To make a stacked area bar chart, another column with the value ages is added
#Polioudakis & Liu ages are the average value and Herring Day2 is converted to weeks
age_values <- c("Han_GW11.1" = 11,
                   "Han_GW11.2" = 11,
                   "Han_GW12" = 12,
                   "Han_GW13"= 13,
                   "Couturier_GW13"= 13,
                   "Couturier_GW17"= 17,
                   "Couturier_GW19"= 17,
                   "Polioudakis_GW17-18"= 17.5,
                   "Liu_GW17-19"= 18,
                   "Herring_GW22"= 22,
                   "Herring_GW24"= 24,
                   "Herring_GW34"= 34,
                   "Herring_Day2"= 40)

vivo$age <- age_values[vivo$source]

#Calculate the average proportion for the same ages
avg_percentages <- vivo %>%
  group_by(age,cell_type) %>%
  summarise(avg_percentage = mean(percentage, na.rm = TRUE))


In [None]:
#Set the levels for the data for the specified order in the stacked area plot, and give each cell type a specific color
# Give a specific order:
avg_percentages$cell_type <- factor(avg_percentages$cell_type, levels=c("outer radial glia", "ventral Radial Glia", "Intermediate Progenitors",
                                                                        "Immature Excitatory", "Maturing Excitatory", "Migrating Excitatory",
                                                                        "Deep Excitatory Layers", "Upper Excitatory Layers",
                                                                        "Interneuron Precursors", "MGE INs", "CGE INs",
                                                                        "Glioblasts", "Immature Astrocytes", "Mature Astrocytes", "OPCs",
                                                                        "Oligodendrocytes", "Microglia", "Endothelial Cells", "Mural") )

colors <- c("CGE INs" = "#2E8B57", "MGE INs" = "#15693A", "Interneuron Precursors" = "#3CB371",
            "Glioblasts" = "#DDA0DD", "Immature Astrocytes" = "#DB7093","Mature Astrocytes" = "#C71585","Immature Excitatory" = "#87CEFA",
            "Maturing Excitatory" = "#00BFFF","Migrating Excitatory" = "#6495ED",
             "Deep Excitatory Layers" = "#7B68EE", "Upper Excitatory Layers" = "#6A5ACD", "Microglia" = "#8A2BE2", 
             "outer radial glia" = "#DEB887", "ventral Radial Glia" = "#D2B48C",
            "Intermediate Progenitors" = "#F4A460","Endothelial Cells" = "#B22222", "Mural" = "#BDB76B",
            "OPCs" ="#BA55D3", "Oligodendrocytes"="#9932CC" ) 


In [None]:
ggplot(avg_percentages, aes(x=age, y=avg_percentage, fill=cell_type)) + 
  geom_area() +
  scale_fill_manual(values = colors) + 
  labs(y=NULL, x= "Age (GW)", fill= "Cell Type") + 
  theme_minimal() + 
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    plot.background = element_rect(fill = "white"), 
    axis.line.x = element_line(color = "black"),
    axis.line.y = element_line(color = "black"),
    axis.text.y = element_blank() 
  )

In [None]:
#Make a rose plot, which is a combination of a bar and a pie plot for vitro and vivo combined
ggplot(data=combo_full,aes(x=cell_type,y=percentage,fill=source))+
  geom_bar(stat="identity") +
  coord_polar() + scale_fill_manual(values = custom_colors) +
  xlab("")+ylab("") + labs(fill= "scRNAseq") +
  theme(axis.text.x = element_text(size=5, vjust=0.3))

In [None]:
#Make a rose plot for the in vitro data
ggplot(data=vitro,aes(x=cell_type,y=percentage,fill=source))+
  geom_bar(stat="identity") +
  coord_polar() + scale_fill_manual(values = custom_colors) +
  xlab("")+ylab("") + labs(fill= "scRNAseq") +
  theme(axis.text.x = element_text(size=6, vjust=0.3)) +
  scale_x_discrete(labels = function(x) str_replace_all(x, " ", "\n"))

In [None]:
#Make a rose plot for the in vivo data
ggplot(data=vivo,aes(x=cell_type,y=percentage,fill=source))+
  geom_bar(stat="identity") +
  coord_polar() + scale_fill_manual(values = custom_colors) +
  xlab("")+ylab("") + labs(fill= "scRNAseq") +
  theme(axis.text.x = element_text(size=6, vjust=0.3))+
  scale_x_discrete(labels = function(x) str_replace_all(x, " ", "\n"))