In [None]:
# Install required packages if they are not already installed
install.packages("devtools")
library(devtools)
install_github("lhe17/nebula")

In [None]:
library(nebula)
library(Seurat)

In [None]:
# Data loading, subset by desired genotype 
    # replace "parentDir/project_postLabel_date.rds" with the path to your data object
    # replace GENO (globally) with the genotype of choice (i.e. HT)
    # replace NOT with the genotype that you are not analyzing (i.e. GENO)
        # This way, only the genotype you want to analyze and the WT replicates will be chosen

object <- readRDS("parentDir/project_postLabel_date.rds")
GENO <- subset(object, subset=model!=NOT)
GENO@meta.data$condition <- GENO@meta.data$model

In [None]:
# Function for properly passing data to and running nebula

RunNebula <- function(seur,form_fixed,sampleCol,cpc=.1)
{
meta=seur@meta.data
dat=seur@assays$RNA@counts
print(dim(dat))
print("Reorder")
dat=dat[,order(meta[,sampleCol])]
meta=meta[order(meta[,sampleCol]),]
print("Run DE!")
df = model.matrix(form_fixed, data=meta)
print(head(df))
print(head(meta[,sampleCol]))

re = nebula(dat,meta[,sampleCol],pred=df,offset=meta$nCount_RNA,cpc=cpc)
return(re)
}

In [None]:
# Variables to replace
    # PROJECT: the name of the dataset you are analyzing (likely GENE_AGEREGION)
    # DATE: The date of the analysis

# Set-up and run nebula
GENO@meta.data$condition=factor(GENO@meta.data[,"condition"]);GENO@meta.data["condition"]=relevel(GENO@meta.data[,"condition"],ref="WT")
re <- RunNebula(GENO, ~condition, "orig.ident", cpc=0.1)
summ <- re$summary

# Get all gene names so you can add back the filtered out gene names to the final results
    # This makes it easier to compare results across experiments
allGenes <- rownames(GENO)
# Set up results table, add padj and stat column
summ_filt <- data.frame(gene=summ$gene, logFC=summ$logFC_conditionGENO, p_value=summ$p_conditionGENO)
summ_filt$padj <- p.adjust(summ_filt$p_value, method="fdr")
summ_filt$stat <- -log(summ_filt$p_value)*sign(summ_filt$logFC)

# Find genes filtered for low expression
naGenes <- allGenes[which(!allGenes %in% summ_filt$gene)]

# Make df with NA values for dropped genes
naRows <- data.frame(matrix(ncol=5, nrow=length(naGenes)))
cols = c("gene", "logFC", "p_value", "padj", "stat")
colnames(naRows) <- cols
naRows$gene <- naGenes

# Combine df's, order in alphabetical order by gene name, check
summ_full <- rbind(summ_filt, naRows)
summ_full <- summ_full[order(summ_full$gene),]
head(summ_full)
dim(summ_full)

# Write nebula results to csv
write.csv(summ_full, "PROJECT_GENOWT_nebulaDE_DATE.csv", row.names=FALSE)