In [1]:
library(tidyverse)
library(TCGAbiolinks)
library(HDF5Array)
library(SummarizedExperiment)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Loading required package: DelayedArray
Loading required package: stats4
Loading required package: matrixStats

Attaching package: ‘matrixStats’

The following object is masked from ‘package:dplyr’:

    count

Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:dplyr’:

    co

# Constants

In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-OV", "TCGA-UCS", "TCGA-UCEC", "TCGA-BRCA")
project_paths <- unlist(map(projects, function(prj) paste0(dirs$data_dir, "/", prj)))
biolinks_dir <- "tcga_biolinks_downloads"
RSE_objects_dir <- "saved_RSE_objects"
TCGA_dest_dir <- paste0(dirs$data_dir, "/", biolinks_dir)
RSE_objects_dest_dir <- paste0(dirs$data_dir, "/", RSE_objects_dir)

# Functions

In [3]:
rna_seq_query <- function(p) {
    return(GDCquery(
        project = p,
        data.category = "Transcriptome Profiling",
        data.type = "Gene Expression Quantification",
        workflow.type = "HTSeq - Counts"
    ))
}


prep_and_save_count_data <- function(rses, label_field, dest_dir, dest_subdir) {
    id_symbol_map <- as_tibble(rowData(rses[[1]]))
    
    # Does the matrix data directory exist? If not, create it
    dir.create(paste0(dest_dir, "/", dest_subdir))
    
    for (n in names(rses)) {
        counts_df <- assays(rses[[n]])[["HTSeq - Counts"]] %>%
            as_tibble(rownames = "ensembl_gene_id") %>%
            inner_join(id_symbol_map, by = "ensembl_gene_id") %>%
            dplyr::select(ensembl_gene_id, external_gene_name, everything()) %>%
            dplyr::select(-original_ensembl_gene_id)
        
        condition_labels <- rses[[n]][[label_field]]
        coldata_df <- as_tibble(colnames(counts_df)[-c(1:2)]) %>%
            dplyr::rename(sample_name = value) %>%
            mutate(condition = condition_labels) %>%
            mutate(project = n)
        
        write_tsv(counts_df, path = paste0(dest_dir, "/", dest_subdir, "/", n, "_counts.tsv"))
        write_tsv(coldata_df, path = paste0(dest_dir, "/", dest_subdir, "/", n, "_coldata.tsv"))
    }
}

# Download and save data
No loops used here since API is really finicky and quits abruptly

In [None]:
proj_idx <- 1
q <- rna_seq_query(projects[proj_idx])
GDCdownload(q, method = "api", directory = TCGA_dest_dir, files.per.chunk = 10)
data <- GDCprepare(q, directory = TCGA_dest_dir)
data
saveHDF5SummarizedExperiment(data, dir = RSE_objects_dest_dir, prefix = paste0(projects[proj_idx], "_RNA_"))

In [None]:
proj_idx <- 2
q <- rna_seq_query(projects[proj_idx])
GDCdownload(q, method = "api", directory = TCGA_dest_dir, files.per.chunk = 10)
data <- GDCprepare(q, directory = TCGA_dest_dir)
data
saveHDF5SummarizedExperiment(data, dir = RSE_objects_dest_dir, prefix = paste0(projects[proj_idx], "_RNA_"))

In [None]:
proj_idx <- 3
q <- rna_seq_query(projects[proj_idx])
GDCdownload(q, method = "api", directory = TCGA_dest_dir, files.per.chunk = 10)
data <- GDCprepare(q, directory = TCGA_dest_dir)
data
saveHDF5SummarizedExperiment(data, dir = RSE_objects_dest_dir, prefix = paste0(projects[proj_idx], "_RNA_"))

In [None]:
proj_idx <- 4
q <- rna_seq_query(projects[proj_idx])
GDCdownload(q, method = "api", directory = TCGA_dest_dir, files.per.chunk = 10)
data <- GDCprepare(q, directory = TCGA_dest_dir)
data
saveHDF5SummarizedExperiment(data, dir = RSE_objects_dest_dir, prefix = paste0(projects[proj_idx], "_RNA_"))

In [4]:
proj_idx <- 5
q <- rna_seq_query(projects[proj_idx])
GDCdownload(q, method = "api", directory = TCGA_dest_dir, files.per.chunk = 10)
data <- GDCprepare(q, directory = TCGA_dest_dir)
data
saveHDF5SummarizedExperiment(data, dir = RSE_objects_dest_dir, prefix = paste0(projects[proj_idx], "_RNA_"))

--------------------------------------
o GDCquery: Searching in GDC database
--------------------------------------
Genome of reference: hg38
--------------------------------------------
oo Accessing GDC. This might take a while...
--------------------------------------------
ooo Project: TCGA-BRCA
--------------------
oo Filtering results
--------------------
ooo By data.type
ooo By workflow.type
----------------
oo Checking data
----------------
ooo Check if there are duplicated cases
ooo Check if there results for the query
-------------------
o Preparing output
-------------------
Downloading data for project TCGA-BRCA
GDCdownload will download 1222 files. A total of 310.760859 MB
Downloading chunk 1 of 123 (10 files, size = 2.551892 MB) as Tue_Sep_15_16_16_15_2020_0.tar.gz


Downloading: 2.5 MB     

Downloading chunk 2 of 123 (10 files, size = 2.553879 MB) as Tue_Sep_15_16_16_15_2020_1.tar.gz


Downloading: 2.5 MB       

Downloading chunk 3 of 123 (10 files, size = 2.555859 MB) as Tue_Sep_15_16_16_15_2020_2.tar.gz


Downloading: 2.5 MB       

Downloading chunk 4 of 123 (10 files, size = 2.551648 MB) as Tue_Sep_15_16_16_15_2020_3.tar.gz


Downloading: 2.5 MB       

Downloading chunk 5 of 123 (10 files, size = 2.544234 MB) as Tue_Sep_15_16_16_15_2020_4.tar.gz


Downloading: 2.5 MB       

Downloading chunk 6 of 123 (10 files, size = 2.548533 MB) as Tue_Sep_15_16_16_15_2020_5.tar.gz


Downloading: 2.5 MB       

Downloading chunk 7 of 123 (10 files, size = 2.550204 MB) as Tue_Sep_15_16_16_15_2020_6.tar.gz


Downloading: 2.5 MB       

Downloading chunk 8 of 123 (10 files, size = 2.538471 MB) as Tue_Sep_15_16_16_15_2020_7.tar.gz


Downloading: 2.5 MB       

Downloading chunk 9 of 123 (10 files, size = 2.543641 MB) as Tue_Sep_15_16_16_15_2020_8.tar.gz


Downloading: 2.5 MB       

Downloading chunk 10 of 123 (10 files, size = 2.537036 MB) as Tue_Sep_15_16_16_15_2020_9.tar.gz


Downloading: 2.5 MB       

Downloading chunk 11 of 123 (10 files, size = 2.538664 MB) as Tue_Sep_15_16_16_15_2020_10.tar.gz


Downloading: 2.5 MB     

Downloading chunk 12 of 123 (10 files, size = 2.530501 MB) as Tue_Sep_15_16_16_15_2020_11.tar.gz


Downloading: 2.5 MB       

Downloading chunk 13 of 123 (10 files, size = 2.532912 MB) as Tue_Sep_15_16_16_15_2020_12.tar.gz


Downloading: 2.5 MB       

Downloading chunk 14 of 123 (10 files, size = 2.543346 MB) as Tue_Sep_15_16_16_15_2020_13.tar.gz


Downloading: 2.5 MB       

Downloading chunk 15 of 123 (10 files, size = 2.554133 MB) as Tue_Sep_15_16_16_15_2020_14.tar.gz


Downloading: 2.5 MB       

Downloading chunk 16 of 123 (10 files, size = 2.5527 MB) as Tue_Sep_15_16_16_15_2020_15.tar.gz


Downloading: 2.5 MB     

Downloading chunk 17 of 123 (10 files, size = 2.543714 MB) as Tue_Sep_15_16_16_15_2020_16.tar.gz


Downloading: 2.5 MB       

Downloading chunk 18 of 123 (10 files, size = 2.537472 MB) as Tue_Sep_15_16_16_15_2020_17.tar.gz


Downloading: 2.5 MB       

Downloading chunk 19 of 123 (10 files, size = 2.551843 MB) as Tue_Sep_15_16_16_15_2020_18.tar.gz


Downloading: 2.5 MB       

Downloading chunk 20 of 123 (10 files, size = 2.54571 MB) as Tue_Sep_15_16_16_15_2020_19.tar.gz


Downloading: 2.5 MB       

Downloading chunk 21 of 123 (10 files, size = 2.534238 MB) as Tue_Sep_15_16_16_15_2020_20.tar.gz


Downloading: 2.5 MB     

Downloading chunk 22 of 123 (10 files, size = 2.532046 MB) as Tue_Sep_15_16_16_15_2020_21.tar.gz


Downloading: 2.5 MB       

Downloading chunk 23 of 123 (10 files, size = 2.534137 MB) as Tue_Sep_15_16_16_15_2020_22.tar.gz


Downloading: 2.5 MB       

Downloading chunk 24 of 123 (10 files, size = 2.55986 MB) as Tue_Sep_15_16_16_15_2020_23.tar.gz


Downloading: 2.6 MB       

Downloading chunk 25 of 123 (10 files, size = 2.560033 MB) as Tue_Sep_15_16_16_15_2020_24.tar.gz


Downloading: 2.6 MB       

Downloading chunk 26 of 123 (10 files, size = 2.535914 MB) as Tue_Sep_15_16_16_15_2020_25.tar.gz


Downloading: 2.5 MB       

Downloading chunk 27 of 123 (10 files, size = 2.544914 MB) as Tue_Sep_15_16_16_15_2020_26.tar.gz


Downloading: 2.5 MB       

Downloading chunk 28 of 123 (10 files, size = 2.549801 MB) as Tue_Sep_15_16_16_15_2020_27.tar.gz


Downloading: 2.5 MB       

Downloading chunk 29 of 123 (10 files, size = 2.537587 MB) as Tue_Sep_15_16_16_15_2020_28.tar.gz


Downloading: 2.5 MB       

Downloading chunk 30 of 123 (10 files, size = 2.548569 MB) as Tue_Sep_15_16_16_15_2020_29.tar.gz


Downloading: 2.5 MB       

Downloading chunk 31 of 123 (10 files, size = 2.544384 MB) as Tue_Sep_15_16_16_15_2020_30.tar.gz


Downloading: 2.5 MB       

Downloading chunk 32 of 123 (10 files, size = 2.547873 MB) as Tue_Sep_15_16_16_15_2020_31.tar.gz


Downloading: 2.5 MB       

Downloading chunk 33 of 123 (10 files, size = 2.558422 MB) as Tue_Sep_15_16_16_15_2020_32.tar.gz


Downloading: 2.6 MB       

Downloading chunk 34 of 123 (10 files, size = 2.538785 MB) as Tue_Sep_15_16_16_15_2020_33.tar.gz


Downloading: 2.5 MB       

Downloading chunk 35 of 123 (10 files, size = 2.532187 MB) as Tue_Sep_15_16_16_15_2020_34.tar.gz


Downloading: 2.5 MB     

Downloading chunk 36 of 123 (10 files, size = 2.542375 MB) as Tue_Sep_15_16_16_15_2020_35.tar.gz


Downloading: 2.5 MB       

Downloading chunk 37 of 123 (10 files, size = 2.543608 MB) as Tue_Sep_15_16_16_15_2020_36.tar.gz


Downloading: 2.5 MB       

Downloading chunk 38 of 123 (10 files, size = 2.534536 MB) as Tue_Sep_15_16_16_15_2020_37.tar.gz


Downloading: 2.5 MB       

Downloading chunk 39 of 123 (10 files, size = 2.526378 MB) as Tue_Sep_15_16_16_15_2020_38.tar.gz


Downloading: 2.5 MB       

Downloading chunk 40 of 123 (10 files, size = 2.551036 MB) as Tue_Sep_15_16_16_15_2020_39.tar.gz


Downloading: 2.5 MB       

Downloading chunk 41 of 123 (10 files, size = 2.539465 MB) as Tue_Sep_15_16_16_15_2020_40.tar.gz


Downloading: 2.5 MB     

Downloading chunk 42 of 123 (10 files, size = 2.534972 MB) as Tue_Sep_15_16_16_15_2020_41.tar.gz


Downloading: 2.5 MB       

Downloading chunk 43 of 123 (10 files, size = 2.548431 MB) as Tue_Sep_15_16_16_15_2020_42.tar.gz


Downloading: 2.5 MB     

Downloading chunk 44 of 123 (10 files, size = 2.559977 MB) as Tue_Sep_15_16_16_15_2020_43.tar.gz


Downloading: 2.6 MB       

Downloading chunk 45 of 123 (10 files, size = 2.540564 MB) as Tue_Sep_15_16_16_15_2020_44.tar.gz


Downloading: 2.5 MB       

Downloading chunk 46 of 123 (10 files, size = 2.553497 MB) as Tue_Sep_15_16_16_15_2020_45.tar.gz


Downloading: 2.5 MB       

Downloading chunk 47 of 123 (10 files, size = 2.554181 MB) as Tue_Sep_15_16_16_15_2020_46.tar.gz


Downloading: 2.5 MB       

Downloading chunk 48 of 123 (10 files, size = 2.516413 MB) as Tue_Sep_15_16_16_15_2020_47.tar.gz


Downloading: 2.5 MB       

Downloading chunk 49 of 123 (10 files, size = 2.54021 MB) as Tue_Sep_15_16_16_15_2020_48.tar.gz


Downloading: 2.5 MB       

Downloading chunk 50 of 123 (10 files, size = 2.536234 MB) as Tue_Sep_15_16_16_15_2020_49.tar.gz


Downloading: 2.5 MB       

Downloading chunk 51 of 123 (10 files, size = 2.543287 MB) as Tue_Sep_15_16_16_15_2020_50.tar.gz


Downloading: 2.5 MB       

Downloading chunk 52 of 123 (10 files, size = 2.552016 MB) as Tue_Sep_15_16_16_15_2020_51.tar.gz


Downloading: 2.5 MB       

Downloading chunk 53 of 123 (10 files, size = 2.545517 MB) as Tue_Sep_15_16_16_15_2020_52.tar.gz


Downloading: 2.5 MB       

Downloading chunk 54 of 123 (10 files, size = 2.555137 MB) as Tue_Sep_15_16_16_15_2020_53.tar.gz


Downloading: 2.5 MB       

Downloading chunk 55 of 123 (10 files, size = 2.553134 MB) as Tue_Sep_15_16_16_15_2020_54.tar.gz


Downloading: 2.5 MB     

Downloading chunk 56 of 123 (10 files, size = 2.529342 MB) as Tue_Sep_15_16_16_15_2020_55.tar.gz


Downloading: 2.5 MB       

Downloading chunk 57 of 123 (10 files, size = 2.549819 MB) as Tue_Sep_15_16_16_15_2020_56.tar.gz


Downloading: 2.5 MB     

Downloading chunk 58 of 123 (10 files, size = 2.55505 MB) as Tue_Sep_15_16_16_15_2020_57.tar.gz


Downloading: 2.5 MB       

Downloading chunk 59 of 123 (10 files, size = 2.547319 MB) as Tue_Sep_15_16_16_15_2020_58.tar.gz


Downloading: 2.5 MB     

Downloading chunk 60 of 123 (10 files, size = 2.538856 MB) as Tue_Sep_15_16_16_15_2020_59.tar.gz


Downloading: 2.5 MB       

Downloading chunk 61 of 123 (10 files, size = 2.525881 MB) as Tue_Sep_15_16_16_15_2020_60.tar.gz


Downloading: 2.5 MB       

Downloading chunk 62 of 123 (10 files, size = 2.527676 MB) as Tue_Sep_15_16_16_15_2020_61.tar.gz


Downloading: 2.5 MB       

Downloading chunk 63 of 123 (10 files, size = 2.549812 MB) as Tue_Sep_15_16_16_15_2020_62.tar.gz


Downloading: 2.5 MB       

Downloading chunk 64 of 123 (10 files, size = 2.540966 MB) as Tue_Sep_15_16_16_15_2020_63.tar.gz


Downloading: 2.5 MB       

Downloading chunk 65 of 123 (10 files, size = 2.518445 MB) as Tue_Sep_15_16_16_15_2020_64.tar.gz


Downloading: 2.5 MB     

Downloading chunk 66 of 123 (10 files, size = 2.532663 MB) as Tue_Sep_15_16_16_15_2020_65.tar.gz


Downloading: 2.5 MB       

Downloading chunk 67 of 123 (10 files, size = 2.518073 MB) as Tue_Sep_15_16_16_15_2020_66.tar.gz


Downloading: 2.5 MB     

Downloading chunk 68 of 123 (10 files, size = 2.539858 MB) as Tue_Sep_15_16_16_15_2020_67.tar.gz


Downloading: 2.5 MB       

Downloading chunk 69 of 123 (10 files, size = 2.55855 MB) as Tue_Sep_15_16_16_15_2020_68.tar.gz


Downloading: 2.6 MB       

Downloading chunk 70 of 123 (10 files, size = 2.527749 MB) as Tue_Sep_15_16_16_15_2020_69.tar.gz


Downloading: 2.5 MB       

Downloading chunk 71 of 123 (10 files, size = 2.532069 MB) as Tue_Sep_15_16_16_15_2020_70.tar.gz


Downloading: 2.5 MB     

Downloading chunk 72 of 123 (10 files, size = 2.545364 MB) as Tue_Sep_15_16_16_15_2020_71.tar.gz


Downloading: 2.5 MB       

Downloading chunk 73 of 123 (10 files, size = 2.547647 MB) as Tue_Sep_15_16_16_15_2020_72.tar.gz


Downloading: 2.5 MB     

Downloading chunk 74 of 123 (10 files, size = 2.55715 MB) as Tue_Sep_15_16_16_15_2020_73.tar.gz


Downloading: 2.6 MB       

Downloading chunk 75 of 123 (10 files, size = 2.522673 MB) as Tue_Sep_15_16_16_15_2020_74.tar.gz


Downloading: 2.5 MB     

Downloading chunk 76 of 123 (10 files, size = 2.545072 MB) as Tue_Sep_15_16_16_15_2020_75.tar.gz


Downloading: 2.5 MB       

Downloading chunk 77 of 123 (10 files, size = 2.552045 MB) as Tue_Sep_15_16_16_15_2020_76.tar.gz


Downloading: 2.5 MB     

Downloading chunk 78 of 123 (10 files, size = 2.544992 MB) as Tue_Sep_15_16_16_15_2020_77.tar.gz


Downloading: 2.5 MB       

Downloading chunk 79 of 123 (10 files, size = 2.544958 MB) as Tue_Sep_15_16_16_15_2020_78.tar.gz


Downloading: 2.5 MB     

Downloading chunk 80 of 123 (10 files, size = 2.533673 MB) as Tue_Sep_15_16_16_15_2020_79.tar.gz


Downloading: 2.5 MB     

Downloading chunk 81 of 123 (10 files, size = 2.561907 MB) as Tue_Sep_15_16_16_15_2020_80.tar.gz


Downloading: 2.6 MB       

Downloading chunk 82 of 123 (10 files, size = 2.538314 MB) as Tue_Sep_15_16_16_15_2020_81.tar.gz


Downloading: 2.5 MB       

Downloading chunk 83 of 123 (10 files, size = 2.552011 MB) as Tue_Sep_15_16_16_15_2020_82.tar.gz


Downloading: 2.5 MB       

Downloading chunk 84 of 123 (10 files, size = 2.540321 MB) as Tue_Sep_15_16_16_15_2020_83.tar.gz


Downloading: 2.5 MB     

Downloading chunk 85 of 123 (10 files, size = 2.546799 MB) as Tue_Sep_15_16_16_15_2020_84.tar.gz


Downloading: 2.5 MB       

Downloading chunk 86 of 123 (10 files, size = 2.523706 MB) as Tue_Sep_15_16_16_15_2020_85.tar.gz


Downloading: 2.5 MB     

Downloading chunk 87 of 123 (10 files, size = 2.538568 MB) as Tue_Sep_15_16_16_15_2020_86.tar.gz


Downloading: 2.5 MB       

Downloading chunk 88 of 123 (10 files, size = 2.527232 MB) as Tue_Sep_15_16_16_15_2020_87.tar.gz


Downloading: 2.5 MB       

Downloading chunk 89 of 123 (10 files, size = 2.542667 MB) as Tue_Sep_15_16_16_15_2020_88.tar.gz


Downloading: 2.5 MB       

Downloading chunk 90 of 123 (10 files, size = 2.548135 MB) as Tue_Sep_15_16_16_15_2020_89.tar.gz


Downloading: 2.5 MB     

Downloading chunk 91 of 123 (10 files, size = 2.541777 MB) as Tue_Sep_15_16_16_15_2020_90.tar.gz


Downloading: 2.5 MB       

Downloading chunk 92 of 123 (10 files, size = 2.533706 MB) as Tue_Sep_15_16_16_15_2020_91.tar.gz


Downloading: 2.5 MB     

Downloading chunk 93 of 123 (10 files, size = 2.552891 MB) as Tue_Sep_15_16_16_15_2020_92.tar.gz


Downloading: 2.5 MB       

Downloading chunk 94 of 123 (10 files, size = 2.544355 MB) as Tue_Sep_15_16_16_15_2020_93.tar.gz


Downloading: 2.5 MB     

Downloading chunk 95 of 123 (10 files, size = 2.557938 MB) as Tue_Sep_15_16_16_15_2020_94.tar.gz


Downloading: 2.6 MB     

Downloading chunk 96 of 123 (10 files, size = 2.537086 MB) as Tue_Sep_15_16_16_15_2020_95.tar.gz


Downloading: 2.5 MB       

Downloading chunk 97 of 123 (10 files, size = 2.545981 MB) as Tue_Sep_15_16_16_15_2020_96.tar.gz


Downloading: 2.5 MB       

Downloading chunk 98 of 123 (10 files, size = 2.540058 MB) as Tue_Sep_15_16_16_15_2020_97.tar.gz


Downloading: 2.5 MB       

Downloading chunk 99 of 123 (10 files, size = 2.516888 MB) as Tue_Sep_15_16_16_15_2020_98.tar.gz


Downloading: 2.5 MB     

Downloading chunk 100 of 123 (10 files, size = 2.535845 MB) as Tue_Sep_15_16_16_15_2020_99.tar.gz


Downloading: 2.5 MB       

Downloading chunk 101 of 123 (10 files, size = 2.542519 MB) as Tue_Sep_15_16_16_15_2020_100.tar.gz


Downloading: 2.5 MB     

Downloading chunk 102 of 123 (10 files, size = 2.526626 MB) as Tue_Sep_15_16_16_15_2020_101.tar.gz


Downloading: 2.5 MB       

Downloading chunk 103 of 123 (10 files, size = 2.558204 MB) as Tue_Sep_15_16_16_15_2020_102.tar.gz


Downloading: 2.6 MB     

Downloading chunk 104 of 123 (10 files, size = 2.541304 MB) as Tue_Sep_15_16_16_15_2020_103.tar.gz


Downloading: 2.5 MB       

Downloading chunk 105 of 123 (10 files, size = 2.548615 MB) as Tue_Sep_15_16_16_15_2020_104.tar.gz


Downloading: 2.5 MB     

Downloading chunk 106 of 123 (10 files, size = 2.571523 MB) as Tue_Sep_15_16_16_15_2020_105.tar.gz


Downloading: 2.6 MB       

Downloading chunk 107 of 123 (10 files, size = 2.533453 MB) as Tue_Sep_15_16_16_15_2020_106.tar.gz


Downloading: 2.5 MB     

Downloading chunk 108 of 123 (10 files, size = 2.538887 MB) as Tue_Sep_15_16_16_15_2020_107.tar.gz


Downloading: 2.5 MB       

Downloading chunk 109 of 123 (10 files, size = 2.552716 MB) as Tue_Sep_15_16_16_15_2020_108.tar.gz


Downloading: 2.5 MB       

Downloading chunk 110 of 123 (10 files, size = 2.557063 MB) as Tue_Sep_15_16_16_15_2020_109.tar.gz


Downloading: 2.6 MB       

Downloading chunk 111 of 123 (10 files, size = 2.557587 MB) as Tue_Sep_15_16_16_15_2020_110.tar.gz


Downloading: 2.6 MB       

Downloading chunk 112 of 123 (10 files, size = 2.555867 MB) as Tue_Sep_15_16_16_15_2020_111.tar.gz


Downloading: 2.5 MB     

Downloading chunk 113 of 123 (10 files, size = 2.529252 MB) as Tue_Sep_15_16_16_15_2020_112.tar.gz


Downloading: 2.5 MB       

Downloading chunk 114 of 123 (10 files, size = 2.559467 MB) as Tue_Sep_15_16_16_15_2020_113.tar.gz


Downloading: 2.6 MB     

Downloading chunk 115 of 123 (10 files, size = 2.512709 MB) as Tue_Sep_15_16_16_15_2020_114.tar.gz


Downloading: 2.5 MB       

Downloading chunk 116 of 123 (10 files, size = 2.544341 MB) as Tue_Sep_15_16_16_15_2020_115.tar.gz


Downloading: 2.5 MB     

Downloading chunk 117 of 123 (10 files, size = 2.550581 MB) as Tue_Sep_15_16_16_15_2020_116.tar.gz


Downloading: 2.5 MB       

Downloading chunk 118 of 123 (10 files, size = 2.547026 MB) as Tue_Sep_15_16_16_15_2020_117.tar.gz


Downloading: 2.5 MB       

Downloading chunk 119 of 123 (10 files, size = 2.553045 MB) as Tue_Sep_15_16_16_15_2020_118.tar.gz


Downloading: 2.5 MB     

Downloading chunk 120 of 123 (10 files, size = 2.53485 MB) as Tue_Sep_15_16_16_15_2020_119.tar.gz


Downloading: 2.5 MB       

Downloading chunk 121 of 123 (10 files, size = 2.548373 MB) as Tue_Sep_15_16_16_15_2020_120.tar.gz


Downloading: 2.5 MB     

Downloading chunk 122 of 123 (10 files, size = 2.545924 MB) as Tue_Sep_15_16_16_15_2020_121.tar.gz


Downloading: 2.5 MB       

Downloading chunk 123 of 123 (2 files, size = 502.93 KB) as Tue_Sep_15_16_16_15_2020_122.tar.gz




Starting to add information to samples
 => Add clinical information to samples
Add FFPE information. More information at: 
=> https://cancergenome.nih.gov/cancersselected/biospeccriteria 
=> http://gdac.broadinstitute.org/runs/sampleReports/latest/FPPP_FFPE_Cases.html
 => Adding subtype information to samples
brca subtype information from:doi.org/10.1016/j.ccell.2018.03.014
Accessing www.ensembl.org to get gene information
Downloading genome information (try:0) Using: Human genes (GRCh38.p13)
“`select_()` is deprecated as of dplyr 0.7.0.
Please use `select()` instead.
“`filter_()` is deprecated as of dplyr 0.7.0.
Please use `filter()` instead.
See vignette('programming') for more help


class: RangedSummarizedExperiment 
dim: 56457 1222 
metadata(1): data_release
assays(1): HTSeq - Counts
rownames(56457): ENSG00000000003 ENSG00000000005 ... ENSG00000281912
  ENSG00000281920
rowData names(3): ensembl_gene_id external_gene_name
  original_ensembl_gene_id
colnames(1222): TCGA-A2-A0T0-01A-22R-A084-07
  TCGA-BH-A0W4-01A-11R-A109-07 ... TCGA-LL-A5YL-01A-12R-A29R-07
  TCGA-A8-A08X-01A-21R-A00Z-07
colData names(91): sample patient ... subtype_PARADIGM.Clusters
  subtype_Pan.Gyn.Clusters

# Save data in count matrix + coldata format

Save data in a format ready for DESeq/clustering/etc.

In [5]:
data_ls <- load_RSE_objects(RSE_objects_dest_dir, projects, paste0(projects, "_RNA_"))

In [6]:
prep_and_save_count_data(
    rses = data_ls,
    label_field = "definition",
    dest_dir = dirs$data_dir,
    dest_subdir = "TCGA_RNA_matrix_count_data"
)

“'/mnt/d/fogg_lab_gyn_cancer_data/TCGA_RNA_matrix_count_data' already exists”