<div style="border: 2px solid red; padding: 10px; border-radius: 5px;">
<strong>Notebook description:</strong>
    
This notebook plots metadata by LoF group.

<strong>Input Data:</strong>

`stats_input_data_0825.rds`

<strong>Figures:</strong>

`FS1`

<strong>Last Run All:</strong>
June 18, 2025




In [1]:

source('../../ABCA7lof2/plotting.r')
library(SingleCellExperiment)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(patchwork)

library(reshape2)

library(RColorBrewer)
library(tidyHeatmap)
library(ComplexHeatmap)


plot_coords_by_grp = function (df, x_name, y_name, annotation_name, alpha_name, colors) 
{
    df = (df[, c(x_name, y_name, annotation_name, alpha_name)])
    colnames(df) = c("x_name", "y_name", "annotation_name", "alpha_name")
    plot = ggplot(df, aes(x = x_name, y = y_name, color = annotation_name, 
        alpha = alpha_name)) + geom_point(size = 0.001) + theme_void() + 
        theme(panel.background = element_rect(colour = "black", 
            size = 0), legend.position = "none") + scale_color_manual(values = (colors[as.character(unique(df$annotation_name))])) + 
        ggtitle("") + theme(text = element_text(size = (11))) + 
        theme(legend.position = "bottom") + guides(colour = guide_legend(override.aes = list(size = 5))) #+ 
        #theme(legend.title = element_blank(), legend.position = "none")
    return(plot + theme(panel.background = element_rect(fill = "transparent"), 
        plot.background = element_rect(fill = "transparent", 
            color = NA), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), legend.background = element_rect(fill = "transparent"), 
        legend.box.background = element_rect(fill = "transparent")))
    return(plot)
}

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges

load the data

In [2]:
colors = readRDS('../../processed_data/Cell_group_colors.rds')
all_data = readRDS('../../processed_data/stats_input_data_0825.rds')
# per-cell marker expression
marker_logcounts_melted = read.csv('../../processed_data/marker_logcounts_melted.csv')
colData = read.csv('../../processed_data/colData.csv')
# various QC/stats metrics
df = read.csv('../../processed_data/celltype_annos_qc.csv')
data = readRDS('../../processed_data/celltype_anno_counts.rds')

In [3]:
# get coords data
coords = colData[,c('x', 'y', 'ABCA7LoF', 'projid', 'leiden_clusters', 'annotations2', 'gaussian_liklihood')]
coords$ABCA7LoF = ifelse(coords$ABCA7LoF==1, 'LoF', 'Con')
coords$leiden_clusters = as.character(coords$leiden_clusters)

In [4]:
# plot marker genes
marker_genes = c('SYT1', 'NRGN', 'GAD1', 'AQP4', 'CSF1R', 'MBP', 'PLP1', 'VCAN',  'PDGFRB', 'FLT1')
marker_logcounts_melted$Var1 = factor(marker_logcounts_melted$Var1, levels = marker_genes)
plt0 = ggplot(marker_logcounts_melted, aes(x = celltype, y = value, fill = celltype)) + geom_boxplot(outlier.shape = NA)  + facet_wrap(Var1 ~ ., ncol = 5, scales = 'free')+ theme_classic() + theme( strip.background = element_blank() )  + scale_fill_manual(values = colors[marker_logcounts_melted$celltype])

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/marker_boxplot.pdf", plot = plt0, width = 10, height = 3, units = "in")


In [5]:

# plot fractions
cells = unique(df$celltype)
df$celltype = factor(df$celltype, levels = cells[order(cells)])
colourCount = length(unique(df$projid))
getPalette = colorRampPalette(brewer.pal(9, "Set1"))

plt1 = ggplot(df) + aes(x = factor(projid), fill = factor(celltype)) + geom_bar(color = "black", position = "fill") + ggtitle('individual-level cell fractions') + xlab("LoF variants") + ylab("fraction") + theme_classic() + labs(x = "", fill = 'fraction') + scale_fill_manual(values = colors[as.character(df$celltype)]) + facet_grid(. ~ LOF, scales = "free", space = "free") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/individual_fractions.pdf", plot = plt1, width = 10, height = 3, units = "in")


In [20]:
# plot correlation heatmap
C = unname(colors[c('Ex', 'In', 'Ast', 'Mic', 'Oli', 'Opc')])

d = data$ind_cor%>%melt(.)%>%mutate(X=Var1)%>%mutate(Y=Var2)%>%
separate(., 'Var1', c('From_celltype', 'From_projid'), '[.]')%>%
separate(., 'Var2', c('To_celltype', 'To_projid'), '[.]')

plt3 = d%>%as_tibble(.)%>%
tidyHeatmap::heatmap(column_title='',row_title='',.row = X,.column = Y,.value = value,show_heatmap_legend =TRUE,show_column_names=F, show_row_names=F, palette_value = circlize::colorRamp2(seq(0, 1, length.out = 11), rev(RColorBrewer::brewer.pal(11, "RdBu"))), clustering_distance_rows='pearson', clustering_distance_columns='pearson')%>%
add_tile(From_celltype, palette=C, show_annotation_name=FALSE)%>%
add_tile(To_celltype, palette=C, show_annotation_name=FALSE, show_legend=FALSE)%>%wrap_heatmap()

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/celltype_heatmap.pdf", plot = plt3, width = 6, height = 5, units = "in")


# plot correlation as boxplot
df_subset = data$cross_cors
df_subset$celltype1 = as.character(df_subset$celltype1)
df_subset$celltype2 = as.character(df_subset$celltype2)
plt4 = ggplot(df_subset) + aes(fill = factor(celltype1), x = factor(celltype1), y = value) + geom_boxplot() + ylab("individual correlations") + theme_classic() + scale_fill_manual(values = colors[as.character(df_subset$celltype1)]) + theme(legend.position = "none")

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/individual_correlations.pdf", plot = plt4, width = 3, height = 3, units = "in")


In [17]:
# plot barplots
df = data$median_cells
df$celltype = as.character(df$celltype)
plt5 = ggplot(df) + aes(fill = factor(celltype), x = factor(celltype), y = median) + geom_bar(color = "black", stat="identity") + ylab("Median cells/ind.") + theme_classic() + scale_fill_manual(values = colors[as.character(df$celltype)]) + theme(legend.position = "none")
df = data$N_cells
df$celltype = as.character(df$celltype)
plt6 = ggplot(df) + aes(fill = factor(celltype), x = factor(celltype), y = N) + geom_bar(color = "black", stat="identity") + ylab("N cells/ind.") + theme_classic() + scale_fill_manual(values = colors[as.character(df$celltype)]) + theme(legend.position = "none") + 
  geom_text(label = df$N, vjust = 1.5, colour = "white")

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/median_cells.pdf", plot = plt5, width = 3, height = 3, units = "in")


In [8]:
# show additional coord info 
n <- length(unique(coords$leiden_clusters))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
names(col_vector) = as.character(unique(coords$leiden_clusters))
C2 = plot_coords_by_grp(coords, 'x', 'y', 'leiden_clusters', 'gaussian_liklihood', col_vector)

C2_mod <- C2 +
  theme(legend.position = "none") +              # drop the legend
   labs(
    x     = "X coordinate",
    y     = "Y coordinate",
    title = "2D UMAP projection of cells colored by leiden clusters"
  ) +
  coord_equal() 

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/cell_proj_with_leiden.pdf", plot = C2_mod, width = 5, height = 5, units = "in")


“[1m[22mThe `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
[36mℹ[39m Please use the `linewidth` argument instead.”


In [9]:
# also show cluster enrichments below
df = read.csv('../../processed_data/celltype_scores.csv') %>% melt(., id.vars = 'X')
colData = read.csv('../../processed_data/colData.csv')
temp = coords[!duplicated(coords$leiden_clusters),]
rownames(temp) = temp$leiden_clusters
df$celltype = temp[as.character(df$X),'annotations2']

df$X = as.character(df$X)

plt8 = df%>%as_tibble(.)%>%
tidyHeatmap::heatmap(column_title='Celltype Marker',row_title='Leiden Clusters',.row = X,.column = variable,.value = value,show_heatmap_legend =TRUE,palette_value = circlize::colorRamp2(seq(-3, 3, length.out = 11), rev(RColorBrewer::brewer.pal(11, "RdBu"))), clustering_distance_rows='pearson', clustering_distance_columns='pearson', scale='row')%>%
add_tile(celltype, palette=C, show_annotation_name=FALSE)%>%wrap_heatmap()

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/leiden_heatmap.pdf", plot = plt8, width = 3, height = 5, units = "in")


In [10]:
# cladogram
temp = all_data$av_logcounts_by_cluster_full_matrix
names = unlist(lapply(colnames(temp), function(x) strsplit(x, '[.]')[[1]][1]))
out = apply(temp, 1, function(x){aggregate(x, by=list(names=names), 'mean')})
                      
out = do.call('cbind', out)

N = unlist(lapply(colnames(out), function(x) endsWith(x, '.x')))
out2 = out[,N]
rownames(out2) = out$SAMD11.names
out2 = t(out2)
cor_clusters = cor(out2[rowVars(out2)>0,])

df = colData[!duplicated(colData$leiden_clusters),c('leiden_clusters', 'annotations2')]
rownames(df) = as.character(df$leiden_clusters)
cellnames = df[colnames(cor_clusters),'annotations2']
Col = colors[cellnames]
rownames(cor_clusters) = unlist(lapply(1:length(colnames(cor_clusters)), function(x) paste0(cellnames[x], '.C', colnames(cor_clusters)[x])))
                  
options(repr.plot.width=5, repr.plot.height=8)

D <- dist(cor_clusters)
HC <- hclust(D)
pdf('/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/hierarchical_tree.pdf', width = 5, height=8)
P = plot(ape::as.phylo(HC), type="cladogram", tip.color=Col)
dev.off()

In [12]:
df = readRDS('../../processed_data/RefCellTypeMarkers.adultBrain.rds')
df = lapply(df, function(x) x[x%in%rownames(out2)])
            
out2 = all_data$av_logcounts_by_celltype
o = lapply(names(df), function(x) colMeans((out2[df[[x]],])))

temp = as.data.frame(do.call('rbind', o))
rownames(temp) = names(df)
temp = temp[!rownames(temp)%in%c('Per', 'Endo'),]           
temp = temp[c('Ex', 'In', 'Ast', 'Mic', 'Oli', 'Opc'),c('Ex', 'In', 'Ast', 'Mic', 'Oli', 'Opc')]
h1 = Heatmap((scale(t(temp))), name='z-score', cluster_columns = F,clustering_distance_columns='pearson', cluster_rows=F,column_title = 'reference 1\n(PsychEncode)',, rect_gp = gpar(col = "black", lwd = 1))

df = readRDS('../../processed_data/PanglaoDB.by.organ.by.celltype.rds')
df = df$Brain
df = lapply(df, function(x) x[x%in%rownames(out2)])
            
out2 = all_data$av_logcounts_by_celltype
o = lapply(names(df), function(x) colMeans((out2[df[[x]],])))
           
temp2 = as.data.frame(do.call('rbind', o))
rownames(temp2) = names(df)
temp2 = temp2[rownames(temp2)%in%c('Pyramidal cells', 'Neurons','Glutaminergic neurons', 'GABAergic neurons', 'Astrocytes', 'Oligodendrocytes', 'Microglia',  'Oligodendrocyte progenitor cells'),]
N = c('Neurons', 'Pyramidal cells', 'Glutaminergic neurons','GABAergic neurons', 'Astrocytes', 'Microglia', 'Oligodendrocytes', 'Oligodendrocyte progenitor cells')
temp2 = temp2[(N),c('Ex', 'In', 'Ast', 'Mic', 'Oli', 'Opc')]
h2 = Heatmap((scale(t(temp2))), clustering_distance_columns='pearson', cluster_columns = F,cluster_rows=F, column_title = 'reference 2\n(PanglaoDB)', show_heatmap_legend = F, rect_gp = gpar(col = "black", lwd = 1))

pdf('/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/fs1/marker_hmap.pdf', width = 4.5, height=4.5)
h1+h2
dev.off()