In [1]:
library(tidyverse)
library(survival)
library(survminer)
library(WGCNA)

# Custom package
library(rutils)


# Define constants
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

event_code <- list("Alive" = 0, "Dead" = 1)
dep_cols <- c("vital_status", "survival_time")


-- [1mAttaching packages[22m ------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: ggpubr

Loading required package: dynamicTreeCut

Loading required package: fastcluster


Attaching package: 'fastcluster'


The following object is masked from 'package:stats':

    hclust





Attachi

In [2]:
dset_idx <- 1

In [3]:
# Load and filter survival data
survival_path <- paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/survival_data.tsv")
survival_df <- load_survival_df(survival_path, event_code)
umsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_demg_list.txt"))
cutoff_df <- read_tsv(paste0(dirs$analysis_dir, "/survival/", unified_dsets[dset_idx], "_expression_cutoffs.tsv"))

filtered_survival_df <- survival_df %>%
    dplyr::select(one_of(c("sample_name", dep_cols))) %>%
    dplyr::filter(rowSums(is.na(.)) == 0)

# Load normalized matrisome count data
norm_matrisome_counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/norm_matrisome_counts.tsv"))
norm_matrisome_counts_t_df <- norm_matrisome_counts_df %>%
    dplyr::select(c("geneID", filtered_survival_df$sample_name)) %>%
    transpose_df("geneID", "sample_name")
# Combine survival data and normalized count data
filtered_joined_df <- filtered_survival_df %>%
    inner_join(norm_matrisome_counts_t_df, by = "sample_name") %>%
    select(one_of("sample_name", "vital_status", "survival_time", umsmg_demg_list)) %>%
    # cannot have survival times of 0 for univariate Cox PH analysis
    dplyr::filter(survival_time > 0)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  sample_name = [31mcol_character()[39m,
  vital_status = [31mcol_character()[39m,
  survival_time = [32mcol_double()[39m,
  figo_stage = [31mcol_character()[39m,
  days_to_last_follow_up = [32mcol_double()[39m,
  days_to_death = [32mcol_double()[39m,
  age_at_diagnosis = [32mcol_double()[39m,
  age_at_index = [32mcol_double()[39m,
  height = [32mcol_double()[39m,
  weight = [32mcol_double()[39m,
  bmi = [32mcol_double()[39m,
  race = [31mcol_character()[39m,
  ethnicity = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  cutoff = [32mcol_double()[39m
)



[36m--[39m 

In [16]:
get_high_low <- function(df, col_str, cutoff) {
    col = as.name(col_str)
    df %>%
        mutate(high = !!col > cutoff, high_low = ifelse(high == TRUE, "high", "low")) %>%
        select(-high)
}

In [108]:
gene <- "CCL17"
# gene <- "CXCL2"
cph_formula <- as.formula(paste0("Surv(survival_time, vital_status) ~ ", gene))
km_formula <- Surv(survival_time, vital_status) ~ high_low

In [109]:
cutoff <- cutoff_df %>%
    dplyr::filter(geneID == gene) %>%
    pull(cutoff)
simp_df <- filtered_joined_df %>%
    dplyr::select(c("sample_name", "survival_time", "vital_status", gene)) %>%
    get_high_low(gene, cutoff)

In [110]:
km_fit <- survfit(km_formula, type = "kaplan-meier", data = simp_df)
km_diff <- survdiff(km_formula, data = simp_df)

In [111]:
km_diff
km_fit

Call:
survdiff(formula = km_formula, data = simp_df)

                N Observed Expected (O-E)^2/E (O-E)^2/V
high_low=high  78       14     19.9     1.766      2.55
high_low=low  172       52     46.1     0.764      2.55

 Chisq= 2.5  on 1 degrees of freedom, p= 0.1 

Call: survfit(formula = km_formula, data = simp_df, type = "kaplan-meier")

                n events median 0.95LCL 0.95UCL
high_low=high  78     14   3097    2052      NA
high_low=low  172     52   3046    1453      NA

In [89]:
cph_fit <- coxph(cph_formula, data = filtered_joined_df)

In [90]:
cph_fit

Call:
coxph(formula = cph_formula, data = filtered_joined_df)

         coef exp(coef) se(coef)     z        p
CXCL2 0.22530   1.25270  0.06551 3.439 0.000584

Likelihood ratio test=11.04  on 1 df, p=0.0008898
n= 250, number of events= 66 

In [28]:
as.numeric(cph_fit$coefficient)