In [2]:
library(dplyr)
library(edgeR)
library(readr)

In [3]:
packageVersion("edgeR")

[1] '3.40.2'

In [4]:
tcga_groups <- readxl::read_excel("../data/processed/tcga_groups.xlsx")

tcga_groups = tcga_groups %>%
    dplyr::filter(normal > 20)

tcga_groups = tcga_groups$project

In [5]:
for (ctype in tcga_groups) {
    est_counts <- readr::read_csv(sprintf("../data/processed/tcga_type_data/%s_est_counts.csv", ctype), show_col_types = FALSE)

    est_counts = est_counts %>% 
        tibble::column_to_rownames("isoform_id") %>%
        dplyr::select(order(colnames(.)))
    
    design_matrix <- readr::read_csv(sprintf("../data/processed/tcga_type_data/%s_dm.csv", ctype), show_col_types = FALSE)
    
    design_matrix = design_matrix %>%
        dplyr::arrange(sampleID)
    
    conditions <- design_matrix$condition
    
    deg <- edgeR::DGEList(counts = est_counts, group = conditions)
    
    filter_edgeR <- edgeR::filterByExpr(deg)
    deg = deg[filter_edgeR, , keep.lib.sizes = FALSE]
    
    deg <- edgeR::calcNormFactors(deg)
    design <- model.matrix(~conditions)
    
    deg <- edgeR::estimateDisp(deg, design)
    
    fit <- edgeR::glmQLFit(deg, design)
    qlf <- edgeR::glmQLFTest(fit, coef = 2)
    
    write.csv(edgeR::topTags(qlf, n = nrow(qlf$table)), sprintf("../results/general/%s_edger.csv", ctype))
}

brca done

kirc done

thca done

luad done

prad done

lusc done

lihc done

hnsc done

coad done

stad done

kirp done

kich done

