In [None]:
library(nebula)
library(Seurat)

In [None]:
# Data loading, subset by desired condition

object <- readRDS("parentDir/project_postLabel_date.rds")
GENO <- subset(object, subset=model!="NOT")
condition <- plyr::mapvalues(
    x = GENO@meta.data$orig.ident, 
    from = c(), 
    to = c()
)
GENO@meta.data$condition <- condition

In [None]:
# Function for properly passing data to and running nebula

RunNebula <- function(seur,form_fixed,sampleCol,cpc=.1)
{
meta=seur@meta.data
dat=seur@assays$RNA@counts
print(dim(dat))
print("Reorder")
dat=dat[,order(meta[,sampleCol])]
meta=meta[order(meta[,sampleCol]),]
print("Run DE!")
df = model.matrix(form_fixed, data=meta)
print(head(df))
print(head(meta[,sampleCol]))

re = nebula(dat,meta[,sampleCol],pred=df,offset=meta$nCount_RNA,cpc=cpc)
return(re)
}

In [None]:
# Astrocytes

astro <- subset(HT, subset=cellType=="Astrocytes")
astro

astro@meta.data$condition=factor(astro@meta.data[,"condition"]);astro@meta.data["condition"]=relevel(astro@meta.data[,"condition"],ref="WT")
re <- RunNebula(astro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(astro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "astrocytes/astro_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# endothelial


endo <- subset(HT, subset=cellType=="Endothelial")
endo

endo@meta.data$condition=factor(endo@meta.data[,"condition"]);endo@meta.data["condition"]=relevel(endo@meta.data[,"condition"],ref="WT")
re <- RunNebula(endo, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(endo)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "endothelial/endo_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# excitatoryNeurons


excit <- subset(HT, subset=cellType=="Excitatory Neurons")
excit

excit@meta.data$condition=factor(excit@meta.data[,"condition"]);excit@meta.data["condition"]=relevel(excit@meta.data[,"condition"],ref="WT")
re <- RunNebula(excit, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(excit)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "excitatoryNeurons/excit_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# fibroblasts

fibro <- subset(HT, subset=cellType=="Fibroblast-like")
fibro

fibro@meta.data$condition=factor(fibro@meta.data[,"condition"]);fibro@meta.data["condition"]=relevel(fibro@meta.data[,"condition"],ref="WT")
re <- RunNebula(fibro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(fibro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "fibroblastLike/fibro_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Inhibitory Neurons


inhib <- subset(HT, subset=cellType=="Inhibitory Neurons")
inhib

inhib@meta.data$condition=factor(inhib@meta.data[,"condition"]);inhib@meta.data["condition"]=relevel(inhib@meta.data[,"condition"],ref="WT")
re <- RunNebula(inhib, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(inhib)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "inhibitoryNeurons/inhib_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# microglia

micro <- subset(HT, subset=cellType=="Microglia")
micro

micro@meta.data$condition=factor(micro@meta.data[,"condition"]);micro@meta.data["condition"]=relevel(micro@meta.data[,"condition"],ref="WT")
re <- RunNebula(micro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(micro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "microglia/micro_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# ODC

ODC <- subset(HT, subset=cellType=="ODC")
ODC

ODC@meta.data$condition=factor(ODC@meta.data[,"condition"]);ODC@meta.data["condition"]=relevel(ODC@meta.data[,"condition"],ref="WT")
re <- RunNebula(ODC, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(ODC)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "ODC/ODC_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# OPC

OPC <- subset(HT, subset=cellType=="OPC")
OPC

OPC@meta.data$condition=factor(OPC@meta.data[,"condition"]);OPC@meta.data["condition"]=relevel(OPC@meta.data[,"condition"],ref="WT")
re <- RunNebula(OPC, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(OPC)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionHT, p_value=summ$p_conditionHT)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "OPC/OPC_HTWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Neurogenisis

neuro <- subset(KO, subset=cellType=="Neurogenisis")
neuro

neuro@meta.data$condition=factor(neuro@meta.data[,"condition"]);neuro@meta.data["condition"]=relevel(neuro@meta.data[,"condition"],ref="WT")
re <- RunNebula(neuro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(neuro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionKO, p_value=summ$p_conditionKO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "neurogenisis/neuro_KOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# choroidPlexus

choroidPlexus <- subset(KO, subset=cellType=="Choroid Plexus")
choroidPlexus

choroidPlexus@meta.data$condition=factor(choroidPlexus@meta.data[,"condition"]);choroidPlexus@meta.data["condition"]=relevel(choroidPlexus@meta.data[,"condition"],ref="WT")
re <- RunNebula(choroidPlexus, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(choroidPlexus)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionKO, p_value=summ$p_conditionKO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "choroidPlexus/choroidPlexus_KOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Ependyma 

ependyma <- subset(KO, subset=cellType=="Ependyma")
ependyma

ependyma@meta.data$condition=factor(ependyma@meta.data[,"condition"]);ependyma@meta.data["condition"]=relevel(ependyma@meta.data[,"condition"],ref="WT")
re <- RunNebula(ependyma, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(ependyma)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionKO, p_value=summ$p_conditionKO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "ependyma/ependyma_KOWT_nebulaDE_dat.csv", row.names=FALSE)

In [None]:
# cajalRetzius 

cajalRetzius <- subset(KO, subset=cellType=="Cajal Retzius")
cajalRetzius

cajalRetzius@meta.data$condition=factor(cajalRetzius@meta.data[,"condition"]);cajalRetzius@meta.data["condition"]=relevel(cajalRetzius@meta.data[,"condition"],ref="WT")
re <- RunNebula(cajalRetzius, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(cajalRetzius)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionKO, p_value=summ$p_conditionKO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "cajalRetzius/cajalRetzius_KOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Check that you didn't miss any cell types
as.data.frame(table(object@meta.data$cellType))