In [1]:
library(tidyverse)
library(survival)
library(survminer)
library(DESeq2)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Loading required package: ggpubr
Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, 

In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list)
for (i in seq_len(length(unified_dsets))) {
    cat(paste0("Processing dataset: ", unified_dsets[i]))
    
    counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[i], "/counts.tsv")) %>%
        dplyr::rename(geneID = Hugo_Symbol) %>%
        dplyr::select(-Entrez_Gene_Id) %>%
        dplyr::mutate_if(is.numeric, round, 0)
    
    counts <- as.matrix(counts_df[, -1])
    rownames(counts) <- counts_df$geneID
    norm_counts <- varianceStabilizingTransformation(counts)
    
    norm_counts_df <- norm_counts %>%
        as_tibble(rownames = "geneID")
    
    matrisome_counts_df <- counts_df %>%
        dplyr::filter(geneID %in% matrisome_df$gene_symbol)

    norm_matrisome_counts_df <- norm_counts %>%
        as_tibble(rownames = "geneID") %>%
        dplyr::filter(geneID %in% matrisome_df$gene_symbol)
    
    write_tsv(matrisome_counts_df, paste0(dirs$data_dir, "/", unified_dsets[i], "/matrisome_counts.tsv"))
    write_tsv(norm_matrisome_counts_df, paste0(dirs$data_dir, "/", unified_dsets[i], "/norm_matrisome_counts.tsv"))
    
    write_tsv(norm_counts_df, paste0(dirs$data_dir, "/", unified_dsets[i], "/norm_counts.tsv"))
}

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


Processing dataset: unified_cervical_data

Parsed with column specification:
cols(
  .default = col_double(),
  Hugo_Symbol = col_character()
)
See spec(...) for full column specifications.
converting counts to integer mode


Processing dataset: unified_uterine_data

Parsed with column specification:
cols(
  .default = col_double(),
  Hugo_Symbol = col_character()
)
See spec(...) for full column specifications.
converting counts to integer mode


Processing dataset: unified_uterine_endometrial_data

Parsed with column specification:
cols(
  .default = col_double(),
  Hugo_Symbol = col_character()
)
See spec(...) for full column specifications.
converting counts to integer mode
