In [None]:
# Install required packages if they are not already installed
install.packages("devtools")
library(devtools)
install_github("lhe17/nebula")

In [None]:
# Load required packages
library(nebula)
library(Seurat)

In [None]:
# Data loading, subset by desired genotype 
    # replace "parentDir/project_postLabel_date.rds" with the path to your data object
    # replace GENO (globally) with the genotype of choice (i.e. HT)
    # replace NOT with the genotype that you are not analyzing (i.e. KO)
        # This way, only the genotype you want to analyze and the WT replicates will be chosen

object <- readRDS("parentDir/project_postLabel_date.rds")
GENO <- subset(object, subset=model!="NOT")
GENO@meta.data$condition <- GENO@meta.data$model

In [None]:
# Function for properly passing data to and running nebula

RunNebula <- function(seur,form_fixed,sampleCol,cpc=.1)
{
meta=seur@meta.data
dat=seur@assays$RNA@counts
print(dim(dat))
print("Reorder")
dat=dat[,order(meta[,sampleCol])]
meta=meta[order(meta[,sampleCol]),]
print("Run DE!")
df = model.matrix(form_fixed, data=meta)
print(head(df))
print(head(meta[,sampleCol]))

re = nebula(dat,meta[,sampleCol],pred=df,offset=meta$nCount_RNA,cpc=cpc)
return(re)
}

In [None]:
# Run the analysis for each cell type in your dataset
    # I have only included the common neuronal celltypes so you may need to add/remove cell types
    # replace GENO with genotype being analyzed

# Astrocytes

astro <- subset(GENO, subset=cellType=="Astrocytes")
astro

astro@meta.data$condition=factor(astro@meta.data[,"condition"]);astro@meta.data["condition"]=relevel(astro@meta.data[,"condition"],ref="WT")
re <- RunNebula(astro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# Get all gene names so you can add back the filtered out gene names to the final results
    # This makes it easier to compare results across experiments
allGenes <- rownames(astro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order in alphabetical order by gene name, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "astrocytes/astro_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Endothelial

endo <- subset(GENO, subset=cellType=="Endothelial")
endo

endo@meta.data$condition=factor(endo@meta.data[,"condition"]);endo@meta.data["condition"]=relevel(endo@meta.data[,"condition"],ref="WT")
re <- RunNebula(endo, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(endo)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "endothelial/endo_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Excitatory Neurons


excit <- subset(GENO, subset=cellType=="Excitatory Neurons")
excit

excit@meta.data$condition=factor(excit@meta.data[,"condition"]);excit@meta.data["condition"]=relevel(excit@meta.data[,"condition"],ref="WT")
re <- RunNebula(excit, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(excit)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "excitatoryNeurons/excit_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Fibroblast-like

fibro <- subset(GENO, subset=cellType=="Fibroblast-like")
fibro

fibro@meta.data$condition=factor(fibro@meta.data[,"condition"]);fibro@meta.data["condition"]=relevel(fibro@meta.data[,"condition"],ref="WT")
re <- RunNebula(fibro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(fibro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "fibroblastLike/fibro_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Inhibitory Neurons


inhib <- subset(GENO, subset=cellType=="Inhibitory Neurons")
inhib

inhib@meta.data$condition=factor(inhib@meta.data[,"condition"]);inhib@meta.data["condition"]=relevel(inhib@meta.data[,"condition"],ref="WT")
re <- RunNebula(inhib, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(inhib)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "inhibitoryNeurons/inhib_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Microglia

micro <- subset(GENO, subset=cellType=="Microglia")
micro

micro@meta.data$condition=factor(micro@meta.data[,"condition"]);micro@meta.data["condition"]=relevel(micro@meta.data[,"condition"],ref="WT")
re <- RunNebula(micro, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(micro)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "microglia/micro_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# ODC

ODC <- subset(GENO, subset=cellType=="ODC")
ODC

ODC@meta.data$condition=factor(ODC@meta.data[,"condition"]);ODC@meta.data["condition"]=relevel(ODC@meta.data[,"condition"],ref="WT")
re <- RunNebula(ODC, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(ODC)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "ODC/ODC_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# OPC

OPC <- subset(GENO, subset=cellType=="OPC")
OPC

OPC@meta.data$condition=factor(OPC@meta.data[,"condition"]);OPC@meta.data["condition"]=relevel(OPC@meta.data[,"condition"],ref="WT")
re <- RunNebula(OPC, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(OPC)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "OPC/OPC_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# otherVascular

otherVascular <- subset(GENO, subset=cellType=="otherVascular")
otherVascular

otherVascular@meta.data$condition=factor(otherVascular@meta.data[,"condition"]);otherVascular@meta.data["condition"]=relevel(otherVascular@meta.data[,"condition"],ref="WT")
re <- RunNebula(otherVascular, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# If adding back rows for filtered genes
allGenes <- rownames(otherVascular)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "otherVascular/otherVascular_GENOWT_nebulaDE_date.csv", row.names=FALSE)

In [None]:
# Check that you didn't miss any cell types
as.data.frame(table(object@meta.data$cellType))