In [4]:
suppressMessages(library(tidyverse))     # tidyverse will pull in ggplot2, readr, other useful libraries
suppressMessages(library(magrittr))      # provides the %>% operator
suppressMessages(library("genefilter"))
suppressMessages(library(DESeq2))
suppressMessages(library(tximport))
suppressMessages(library("readr"))
suppressMessages(library("tximportData"))
suppressMessages(library(data.table))

suppressMessages(library(matrixStats))
suppressMessages(library(org.Hs.eg.db))
suppressMessages(library(topGO))

suppressMessages(library(pheatmap))
suppressMessages(library(gplots))
suppressMessages(library(matrixStats))

suppressMessages(library(data.table))

options(stringsAsFactors = FALSE)

RDS_FILENAME = "daa_output/misc-covic_paired.rds"
GENE_TABLE_FILENAME = "./tables/cf_misc-covid_paired_DESeq.tsv"

---
## Load in Meta Data

In [5]:
meta_data <- read.csv("../../1_sample-data/STable6_cfrna-samples.csv") %>% 
    filter( (Diagnosis == "MIS-C" & timepoint == "acute") | (Diagnosis == "COVID-19" & timepoint == "acute"))

paired <- read.delim("../../1_sample-data/paired_sample_key.tsv") %>%
    filter(!is.na(cfrna_sample_id) & !is.na(wbrna_sample_id))

meta_data <- meta_data %>% filter(cfrna_sample_id %in% all_of(paired$cfrna_sample_id))



samples <- meta_data

GROUPS = c("MIS-C", "COVID-19")

samples$expGroup <- samples$Diagnosis


table(samples$expGroup)


COVID-19    MIS-C 
      12       29 

---

In [6]:
##------------------------------------
# PREPARE COUNTS
##------------------------------------

##------------------------------------
# load count data with tximport
sample_ids = unique(samples$cfrna_sample_id)                                                                    # Get sample ids that pass qc

counts = read.delim("../../1_sample-data/cfrna_ftcounts.txt")
rownames(counts) <- counts$Geneid

counts = counts[,sample_ids]

##------------------------------------
# Remove ChrX, ChrY, ChrM, and RB genes
gene.list <- read.delim("../../0_support-files/genelist.hs.tsv",col.names = c("type,","ENSMBL","gene_symbol"))

gene.ids <- gsub("\\..*","",rownames(counts))

exclude.idx <- gene.ids %in% gene.list[,2]

counts = counts[!exclude.idx,]         


##------------------------------------
# RUN DESEQ2
##------------------------------------

##------------------------------------
# Contstruct DESeq Data Set
dds <- DESeqDataSetFromMatrix(round(counts),
                                colData = samples,
                                design = ~ expGroup + 0)

##------------------------------------
# Add Gene metadata
annotation = fread(file="../../0_support-files/gencode.biotype.name.key.tsv")
annotation <- annotation[match(rownames(dds), annotation$gene_id),]
all(rownames(dds) == annotation$ftcount_id)
mcols(dds) <- cbind(mcols(dds), annotation)


##------------------------------------
# Re-factor
dds$expGroup <- factor(dds$expGroup, levels = GROUPS)

##------------------------------------
# Pre-filter
# keep <- rowSums(counts(dds)) >= 10
# dds <- dds[keep,]

##------------------------------------
# DAA
dds <- DESeq(dds)


##------------------------------------
# ANALYZE
##------------------------------------

res <- results(dds,alpha=0.05, contrast = c("expGroup",GROUPS))

res$gene_name <- mcols(dds)$gene_name
res$gene_type <- mcols(dds)$gene_type

summary(res)

# tg_up <- suppressMessages(topGO(res,SIG_THRESH = 0.05,"+"))
# tg_down <- suppressMessages(topGO(res,SIG_THRESH = 0.05,"-"))

##------------------------------------
# SAVE
##------------------------------------

rslts <- list()
rslts[['dds']] <- dds
rslts[['res']] <- res
# rslts[['TopGO+']] <- tg_up
# rslts[['TopGO-']] <- tg_down

saveRDS(rslts, file = RDS_FILENAME)

data.frame(res) %>% rownames_to_column(var="GeneID") %>% write.table(GENE_TABLE_FILENAME,sep="\t",row.names = FALSE)

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters



estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use



out of 47083 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 152, 0.32%
LFC < 0 (down)     : 210, 0.45%
outliers [1]       : 0, 0%
low counts [2]     : 36124, 77%
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

