<div style="border: 2px solid red; padding: 10px; border-radius: 5px;">
<strong>Notebook description:</strong>
    
This notebook plots ABCA7 expression results.

<strong>Input Data:</strong>

`stats_input_data_0825.rds`

`human_bulk_RNAseq.sce.rds`

<strong>Figures:</strong>

`ED4`

<strong>Last Run All:</strong>
June 18, 2025




In [1]:

source('../../ABCA7lof2/plotting.r')
library(SingleCellExperiment)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(patchwork)

library(reshape2)

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: 'MatrixGenerics'


The following objects are masked from 'package:matrixStats':

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges

In [2]:
all_data = readRDS('../../processed_data/stats_input_data_0825.rds')

# get the welch et al data
data = readRDS('../../processed_data/human_bulk_RNAseq.sce.rds')
norm_counts = assays(data)$log2rpkm
genes = c('ABCA7', 'ABCA1', 'APOE','NEUROD1')
all_p = list()

for(n in genes){
    df = as.data.frame(norm_counts[n,])
    df$pop = (colData(data)$population)
    df$grp = colData(data)$id
    df$cond = colData(data)$condition
    df$AD = colData(data)$AD
    df = df[df$cond=='ctr',]
    #df = df[df$AD=='noAD',]
    #df = df[df$pop=='NeuN+',]
    
    colnames(df) = c('gene','label', 'grp', 'condition')

    p1 = shapiro.test(df[df$label=='NeuN+','gene'])$p.value
    p2 = shapiro.test(df[df$label=='NeuN-','gene'])$p.value
    
    if((p1>0.05) & (p2>0.05)){
      test = 't.test'
    }else{
      test = 'wilcox.test'
    }

    df$label = factor(df$label, , levels=c('NeuN-', 'NeuN+'))
    all_p[[n]] = ggplot(df, aes(y=gene, x=as.factor(label), fill = as.factor(label)))  + xlab('') + ylab('normalized_expression') + ggtitle(n) + geom_boxplot()  + geom_point()+ geom_line(aes(group = grp), alpha=.2)+ stat_compare_means(method = test, paired = TRUE) + theme_classic() +  scale_fill_manual(values = c( "grey", "seagreen3"))+ theme(legend.position = "none")+scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))
}

all_p2 = list()
ind = Reduce(intersect, lapply(all_data$av_logcounts_by_ind, function(x) colnames(x)))
                               
for(n in genes){
    exp = do.call('rbind', lapply(all_data$av_logcounts_by_ind, function(x) x[n,ind]))
    df = melt(exp)
    df$grp = ifelse(df$Var1%in%c('Ex', 'In'), 'neurons', 'glia')
    temp = aggregate(df$value, list(interaction(df$grp, df$Var2)), 'mean')
    temp = separate(
      temp,
      c('Group.1'),
      c('grp', 'projid'),
      sep = "[.]")

    p1 = shapiro.test(temp[temp$grp=='neurons','x'])$p.value
    p2 = shapiro.test(temp[temp$grp=='glia','x'])$p.value
    
    if((p1>0.05) & (p2>0.05)){
      test = 't.test'
    }else{
      test = 'wilcox.test'
    }

    all_p2[[n]] = ggplot(temp, aes(y=((as.numeric(x))), x=grp, fill = factor(grp)), col = 'black') + scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) + 
       geom_boxplot(width = .8, alpha = 1) + theme_classic() +theme(legend.position = "none") + ggtitle(n) + ylab('normalized expression') + xlab('') + scale_fill_manual(values = c( "grey", "seagreen3")) + geom_line(aes(group = projid), alpha = .2)+ stat_compare_means(method = test, paired = TRUE)+ theme(legend.position = "none")
}
                                  
options(repr.plot.width=5, repr.plot.height=10)

plots1 = (all_p$ABCA7 / all_p2$ABCA7) | (all_p$ABCA1 / all_p2$ABCA1) | (all_p$APOE / all_p2$APOE) | (all_p$NEUROD1 / all_p2$NEUROD1) 

In [3]:
welch = all_p$ABCA7 | all_p$ABCA1 | all_p$APOE | all_p$NEUROD1
sn = all_p2$ABCA7 | all_p2$ABCA1 | all_p2$APOE | all_p2$NEUROD1


In [4]:

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/ed4/scRNAseq_bulk_rna.pdf", plot = sn, width = 10, height = 3, units = "in")

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/ed4/welch_et_al_bulk_rna.pdf", plot = welch, width = 10, height = 3, units = "in")



In [7]:
all_data$det.rate.celltype['ABCA7',]

In [12]:
library(ggplot2)
library(forcats)  # for fct_reorder()

# your data
df <- data.frame(
  cell_type = c("Opc","Ast","Ex","Oli","Mic","In"),
  value     = c(0.0124688279301746, 0.0811679240011176,
                0.296686818679488,  0.0199444404872142,
                0.0691049439441279,  0.15291098203431)
)

# reorder factor levels by value descending
df$cell_type <- fct_reorder(df$cell_type, df$value, .desc = TRUE)

# plot
pl = ggplot(df, aes(x = cell_type, y = value)) +
  geom_col() +
  labs(x = "Cell type", y = "fraction of cells where ABCA7 counts > 0") +
  theme_classic()

ggsave("/Users/djuna/Documents/ABCA7lof2/editorial_paper/main_panels_svgs/ed4/abca7_detection_rate.pdf", plot = pl, width = 3, height = 3, units = "in")
