In [4]:
wd <- dirname(dirname(dirname(dirname(getwd()))))
source(paste0(wd,"/mission_control/treasure_map.R"))
library(tidyverse)

CLIN_DIR <- paste0(E_DIR, "/val_hebron/clinical/clean/")
I_DIR <- paste0(E_DIR, "/val_hebron/rna/raw/")
O_DIR <- paste0(E_DIR, "/val_hebron/rna/clean/")

### 0 - Read data sources

In [5]:
clinical <- readRDS( paste0(CLIN_DIR, "combine.Rds"))
rna <- read.csv( paste0(I_DIR, "nanostring_norm_counts.csv"), sep = ";", stringsAsFactors = FALSE)
annotations <- read.csv( paste0(I_DIR, "annotations_share.csv"), sep = ";", stringsAsFactors = FALSE)

### 1 - Prep RNA Data

In [19]:
tmp0 <- data.frame(t(rna %>% select(-genes)))
colnames(tmp0) <- c(rna %>% pull(genes))
tmp1 <- log(tmp0)

In [20]:
step1 <- apply(is.na(tmp1), 2, sum)
show <- data.frame( gene = names(step1), missing_values = step1) %>% arrange(missing_values)

##### Add factors

In [21]:
gene_sets <- list(
    "tgfb" = c("HEYL", "NOTCH3", "TGFb3", "TGFb1", "NOTCH4", "DLL4", "SERPINE1"),
    "prolif" = c("BRCA1", "BRCA2", "TUBB"),
    "tcell" = c("CXCL9", "CXCL10", "CXCL11", "GZMA", "GZMB", "IFNg")
)
tmp1$tgfb <- apply( tmp1[,gene_sets$tgfb], 1, mean)
tmp1$prolif <- apply( tmp1[,gene_sets$prolif], 1, mean)
tmp1$tcell <- apply( tmp1[,gene_sets$tcell], 1, mean)

### 2 - Combine with annotation data

In [22]:
tmp2 <- tmp1 %>% rownames_to_column(var = "LAB.ID")
rna_ready <- annotations %>% left_join(tmp2 , on = "LAB.ID") %>% rename(patient_id = NHC)
rna_ready$patient_id <- as.character(rna_ready$patient_id)

Joining, by = "LAB.ID"


In [23]:
saveRDS(rna_ready, paste0(O_DIR, "rna_full.Rds"))

In [15]:
rna_ready$panel <- unlist(lapply( rna_ready$Panel.de.ensayos,  function(i) strsplit(i, " ")[[1]][3]))
rna_ready$panel <- ifelse(rna_ready$panel %in% c("v12", "v13"), rna_ready$panel, "v<=11")

### 3 - Select and Output

In [8]:
rna_out <- 
rna_ready %>% transmute(
        LAB.ID,
        patient_id,
        Cohort,
        panel, 
        tcell = scale(tcell), 
        tgfb = scale(tgfb),
        prolif = scale(prolif),
        pdl1 = CD274
    ) %>% drop_na(tcell, tgfb, prolif)

### 4 - Send it !

In [9]:
saveRDS( rna_out, paste0(O_DIR, "rna_go.Rds"))