In [1]:
library(tidyverse)

library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")


In [3]:
dset_idx <- 3

In [6]:
coldata_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/coldata.tsv"))
nm_counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/norm_matrisome_counts.tsv"))


Parsed with column specification:
cols(
  sample_name = col_character(),
  condition = col_character(),
  data_source = col_character()
)
Parsed with column specification:
cols(
  .default = col_double(),
  geneID = col_character()
)
See spec(...) for full column specifications.


In [10]:
head(coldata_df)
head(nm_counts_df)

sample_name,condition,data_source
<chr>,<chr>,<chr>
GTEX-T6MO-1526-SM-4DM57,healthy,GTEx
GTEX-11P81-1626-SM-5BC52,healthy,GTEx
GTEX-13N11-1126-SM-5KM41,healthy,GTEx
GTEX-RTLS-2426-SM-46MUO,healthy,GTEx
GTEX-ZP4G-0726-SM-4WWF2,healthy,GTEx
GTEX-WEY5-0726-SM-4LMID,healthy,GTEx


geneID,GTEX-T6MO-1526-SM-4DM57,GTEX-11P81-1626-SM-5BC52,GTEX-13N11-1126-SM-5KM41,GTEX-RTLS-2426-SM-46MUO,GTEX-ZP4G-0726-SM-4WWF2,GTEX-WEY5-0726-SM-4LMID,GTEX-T2IS-2226-SM-4DM65,GTEX-PX3G-2026-SM-48U1H,GTEX-U3ZN-0726-SM-4DXT5,⋯,TCGA-EO-A3AU-01A-21R-A19W-07,TCGA-AJ-A3EK-01A-11R-A19W-07,TCGA-5S-A9Q8-01A-11R-A40A-07,TCGA-BK-A139-01A-11R-A277-07,TCGA-D1-A3DG-01A-11R-A19W-07,TCGA-EO-A22X-01A-11R-A180-07,TCGA-KP-A3W1-01A-11R-A22K-07,TCGA-PG-A916-01A-11R-A37O-07,TCGA-EY-A4KR-01A-11R-A27V-07,TCGA-KP-A3VZ-01A-11R-A22K-07
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
PGF,8.879329,7.434462,9.278404,8.18746,8.792995,9.564547,10.653829,9.316795,8.192681,⋯,7.05046,7.624509,8.627896,7.096096,12.074398,8.025731,9.248411,7.392115,7.831148,7.805377
TIMP4,9.005961,5.660309,7.91117,8.361972,9.741767,9.226092,7.402267,6.374619,5.67524,⋯,4.726243,3.955199,4.346853,3.586865,3.586865,4.853974,4.014704,5.070334,4.91037,4.950378
C1QTNF6,9.673245,9.462604,10.114966,9.754473,9.638398,9.233075,10.469207,10.462055,10.258132,⋯,10.83655,9.292369,10.98022,7.743795,7.467023,10.036937,11.672567,12.174879,10.847096,8.363901
TNC,14.523969,15.208986,15.814109,8.871503,15.468258,14.770415,13.120999,11.409056,14.223366,⋯,14.592956,12.317,9.776307,11.74469,11.440915,14.177643,10.027468,13.623124,12.005405,7.412211
PRL,8.63802,7.422681,9.385488,5.484139,5.487284,5.40635,7.726366,9.383876,8.027712,⋯,3.97543,3.955199,3.586865,3.586865,6.540419,3.980093,4.616692,4.100709,4.502431,5.0319
OGN,12.01316,12.713526,9.189995,11.430048,12.185254,12.325556,13.610493,12.716741,12.454832,⋯,4.94175,4.669302,5.356919,6.399441,4.795412,6.061354,3.586865,7.12206,3.965857,4.30989


In [31]:
# Transpose counts (genes as columns, samples as rows)
transpose_counts <- t(nm_counts_df[-1])
colnames(transpose_counts) <- nm_counts_df$geneID
transpose_counts_df <- transpose_counts %>%
    as_tibble(rownames = "sample_name")
head(transpose_counts_df)
# Inner join "coldata" data frame with transposed counts data frame (by sample_name)
joined_df <- coldata_df %>%
    inner_join(transpose_counts_df, by = "sample_name")

sample_name,PGF,TIMP4,C1QTNF6,TNC,PRL,OGN,C1QL3,FGB,NDNF,⋯,PIK3IP1,C1QTNF2,PCSK5,ANXA1,HGF,VWA2,FGF3,POSTN,NTF3,S100A6
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
GTEX-T6MO-1526-SM-4DM57,8.879329,9.005961,9.673245,14.523969,8.63802,12.01316,5.560305,4.100968,6.997296,⋯,11.70869,8.907417,9.731991,13.25319,8.515582,5.144458,3.586865,12.412159,7.984279,14.72407
GTEX-11P81-1626-SM-5BC52,7.434462,5.660309,9.462604,15.208986,7.422681,12.713526,5.526591,5.742299,7.536329,⋯,11.91527,7.503173,9.370995,13.07284,9.795993,6.261985,3.586865,11.534521,5.526591,13.44674
GTEX-13N11-1126-SM-5KM41,9.278404,7.91117,10.114966,15.814109,9.385488,9.189995,4.990444,7.190144,6.650881,⋯,11.784,8.474505,10.253858,13.17389,9.593789,6.236732,3.586865,11.939159,6.690134,14.10837
GTEX-RTLS-2426-SM-46MUO,8.18746,8.361972,9.754473,8.871503,5.484139,11.430048,4.697911,5.256237,7.329749,⋯,11.85426,6.890333,9.881287,11.85601,8.597905,5.535091,3.586865,9.319952,7.495292,13.38047
GTEX-ZP4G-0726-SM-4WWF2,8.792995,9.741767,9.638398,15.468258,5.487284,12.185254,6.279104,4.149321,6.982618,⋯,10.44837,8.36906,9.52885,13.01616,8.968487,6.279104,3.985832,11.067682,5.941299,15.23562
GTEX-WEY5-0726-SM-4LMID,9.564547,9.226092,9.233075,14.770415,5.40635,12.325556,4.953755,3.586865,6.779393,⋯,9.91643,8.901689,10.012876,12.74452,8.567741,5.557855,3.586865,10.232552,7.357694,14.66298


In [None]:
df1 <- tibble("col" = c)