In [1]:
library(edgeR)
library(ggplot2)
library(glue)

Loading required package: limma

“package ‘ggplot2’ was built under R version 4.0.0”


## 1. load in data

In [2]:
raw_filename            = '../data/16p12_lcl_gene_reads_underscores.gct'
pheno_filename          = '../data/pheno_final.tsv'
mapping_filename        = '../data/gene_names_mapping_simple.tsv'

In [3]:
pheno                   = read.table(pheno_filename, sep='\t', header=TRUE)
rownames(pheno)         = pheno$sample

In [4]:
rawdf               = read.table(raw_filename, sep='\t', header=TRUE)
rownames(rawdf)     = rawdf$Name
rawdf$Name          = NULL

In [5]:
gencode2ensembl = function(s) {
    return(strsplit(s, '.', fixed=T)[[1]][1])
}

rownames(rawdf) = unlist(lapply(rownames(rawdf), gencode2ensembl))
rawmat          = as.matrix(rawdf)

pheno = pheno[colnames(rawmat),]

In [6]:
contrast_map = function(design, group) {
    group = paste0('group', group)
    pos = (1:length(colnames(design)))[colnames(design) == group]
    return(pos)
}

In [7]:
for (excl_sub in unique(pheno$subject)) {
    excl_sub
    excl_pheno = pheno[pheno$subject != excl_sub,]
    samples = excl_pheno$sample
    excl_rawmat = rawmat[,samples]
    
    group         = excl_pheno$status3
    family        = excl_pheno$family
    sex           = excl_pheno$sex
    
    y = DGEList(counts=excl_rawmat, group=group)
    keep = filterByExpr(y)
    write.table(names(keep[keep]), glue('output/edgr_exclude_one/keep.{excl_sub}.txt'), 
            row.names = F, col.names = F, quote = F)
    y = y[keep,,keep.lib.sizes=FALSE]
    y = calcNormFactors(y)
    
    design        = model.matrix(~0+group+family)
    rownames(design) = colnames(y)
    y = estimateDisp(y, design, robust=TRUE)
    fit = glmQLFit(y, design, robust=TRUE)
    
    contrast = numeric(length(colnames(design)))
    contrast[contrast_map(design, 'non_carrier')] = -1
    contrast[contrast_map(design, 'carrier')] = 1
    
    qlf = glmQLFTest(fit,contrast=contrast)
    topdf = topTags(qlf, n=56202, p.value=0.05)$table
    dim(topdf)
    
    diff_save = cbind(rownames(topdf), topdf)
    colnames(diff_save)[1] = "ensembl"
    outfile = glue('output/edgr_exclude_one/{excl_sub}.tsv')
    write.table(diff_save, outfile, sep='\t', row.names=F, col.names=T)
}