In [70]:
source('./ABCA7lof2/prep_data.r')
library(tidyr)

In [71]:
# define vars
order = c('Ex', 'In', 'Ast', 'Mic', 'Oli', 'Opc', 'Vascular')
sce = readRDS('./processed_data/single_cell/sce.rds')
output_path = './processed_data/single_cell/stats_input_data.rds'

In [72]:
# use matrix multiplication to summarize (sum) across counts per cell type per individual
print('summing...')
meta = sce@colData
labels = as.data.frame(as.character(interaction(meta$annotations2, meta$projid)))
cell_labels = rownames(meta)
summed_counts_cellxind = sum_counts(assays(sce)$counts, labels, cell_labels)
summed_logcounts_cellxind = sum_counts(assays(sce)$logcounts, labels, cell_labels)

[1] "summing..."


In [73]:
# use matrix multiplication to summarize (sum) across counts per cell (including all individuals)
summed_logcounts_cell = sum_counts(assays(sce)$logcounts, label = as.data.frame(meta$annotations2), cell_labels)

In [74]:
# get averages corresponding to both count matrices
avs_logcounts_cellxind = t(apply(summed_logcounts_cellxind$summed_counts, 1, function(x){x/summed_logcounts_cellxind$ncells}))
avs_logcounts_cell = t(apply(summed_logcounts_cell$summed_counts, 1, function(x){x/summed_logcounts_cell$ncells}))

In [75]:
# in how many cells per celltype is each gene detected?
counts_nonzero = assays(sce)$counts>0
detected_genes_cell = sum_counts(counts_nonzero, label = as.data.frame(meta$annotations2), cell_labels)
fraction_detected_genes_cell = t(apply(detected_genes_cell$summed_counts, 1, function(x){x/detected_genes_cell$ncells}))

In [76]:
# get expression list 10%
expressed25 = get_expressed_genes(fraction_detected_genes_cell, .25)
expressed10 = get_expressed_genes(fraction_detected_genes_cell, .10)

In [77]:
# summarize the experiment by celltype x individual
print('summarizing experiment by individual...')
x = (strsplit(colnames(avs_logcounts_cellxind), '[.]'))
celltype = unlist(lapply(1:length(x), function(i) x[[i]][[1]]))
individual = unlist(lapply(1:length(x), function(i) x[[i]][[2]]))
celltype_unique = unique(celltype)
avs_by_ind_out = list()
for(i in celltype_unique){
    index = celltype==i
    df = avs_logcounts_cellxind[, index]
    colnames(df) = individual[index]
    avs_by_ind_out[[i]] = df
}


[1] "summarizing experiment by individual..."


In [78]:
# get metadata summary
summary = read.csv('./raw_data/metadata/single_cell_individual_metadata.csv', row.names='projid')
summary$APOE4 = ifelse(summary$apoe_genotype%in%c(24, 44, 34), 1, 0)
summary$LOF = summary$ABCA7LoF

In [79]:
# save all the data
all_data = list()

all_data[['summed_counts_by_ind']] = summed_counts_cellxind
all_data[['av_logcounts_by_celltype']] = avs_logcounts_cell
all_data[['av_logcounts_by_ind']] = avs_by_ind_out
all_data[['av_logcounts_by_ind_full_matrix']] = avs_logcounts_cellxind
all_data[['det.rate.celltype']] = fraction_detected_genes_cell
all_data[['expressed10']] = expressed10
all_data[['expressed25']] = expressed25
all_data[['summary']] = summary

In [80]:
# save the data
saveRDS(all_data, output_path)