In [1]:
#view 600*200 dataframe
options(repr.matrix.max.rows=600, repr.matrix.max.cols=200)

library("Hmisc")


#----------NA row remove function----------
na_rm <- function(data, cols) {
  mask <- complete.cases(data[cols, ])
  return(data[mask, ])
}
#mRNA_f <- t(na_rm(t(mRNA_f)))
#mRNA <- mRNA_f[1:nrow(mRNA_f)-1,]
#----------NA row remove function----------

Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Loading required package: ggplot2

Attaching package: ‘Hmisc’

The following objects are masked from ‘package:base’:

    format.pval, round.POSIXt, trunc.POSIXt, units



---
### data import: data.csv

In [2]:
clinical1 <- read.csv("data.csv", header = TRUE)
clinical2 <- clinical1[, c("multifocality", "BRAF", "name")]


#name processing: TCGA.BJ.A0YZ.01A to TCGA.BJ.A0YZ.01
clinical2$name <- substr(clinical1$name, 1, 15)


# converting focality
clinical2$multifocality <- as.character(clinical1$multifocality)
clinical2$multifocality[clinical1$multifocality == "multifocal"] <- 1
clinical2$multifocality[clinical1$multifocality == "unifocal"] <- 0
clinical2$multifocality <- as.numeric(clinical2$multifocality)


#separate BRAF_p/focality
clinical_p <- clinical2[clinical2$BRAF %in% "1",]

---
### loading mRNA: mRNA.RData, selected_mRNA.csv

In [3]:
load("mRNA.RData")


#mRNA df name col 분리
name <- rownames(mRNA)
rownames(mRNA) <- NULL
mRNA <- cbind(name,mRNA)


#name 형식 변경
#TCGA-DJ-A2Q5-01A-11R-A18B-13 type >>> TCGA.BJ.A0YZ.01 type(clinical2)
mRNA$name <- gsub("-", ".", mRNA$name)
mRNA$name <- substr(mRNA$name, 1, 15)


selected_mRNA = read.csv("mRNA_selected.csv")

#mRNA - selected mRNA(1열 mRNA: 2열: 1) binding >>> NA col 생성
mRNA_f <- dplyr::bind_rows(mRNA, selected_mRNA)


#----------NA row remove function----------
mRNA_f <- t(na_rm(t(mRNA_f)))
mRNA <- mRNA_f[1:nrow(mRNA_f)-1,]
#----------NA row remove function----------

mrna_p <- merge(clinical_p, mRNA, by = "name")
mrna_p <- mrna_p[4:ncol(mrna_p)]

“binding character and factor vector, coercing into character vector”

---
### loading miRNA

In [4]:
load("miRNA.RData")


#miRNA df name col 분리
name <- rownames(miRNA)
rownames(miRNA) <- NULL
miRNA <- cbind(name,miRNA)


#name 형식 변경
#TCGA-DJ-A2Q5-01A-11R-A18B-13 type >>> TCGA.BJ.A0YZ.01 type(clinical2)
miRNA$name <- gsub("-", ".", miRNA$name)
miRNA$name <- substr(miRNA$name, 1, 15)


count.zero <- sapply(1:ncol(miRNA), function(i) sum(miRNA[, i] == 0)) 
selected.mirna <- names(miRNA)[which(count.zero/nrow(miRNA) < 0.1)]
del.mirna <- setdiff(names(miRNA), selected.mirna)

mirna <- miRNA[, selected.mirna]
#mirna[mirna == 0] <- NA
                     
p_miRNA <- merge(clinical_p, mirna, by.x = "name", by.y = "name", all.x = TRUE)

mirna_p <- p_miRNA[4:ncol(p_miRNA)]

---
### corrtest

In [5]:
corr_rna <- data.frame(mrna_p, mirna_p)

result_rcorr <- rcorr(as.matrix(corr_rna), type = "spearman")

result_rcorr$r <- result_rcorr$r[1:ncol(mrna_p),(ncol(result_rcorr$r)-ncol(mirna_p)+1):ncol(result_rcorr$r)]
result_rcorr$P <- result_rcorr$P[1:ncol(mrna_p),(ncol(result_rcorr$P)-ncol(mirna_p)+1):ncol(result_rcorr$P)]

---
### data selection by pathway

In [6]:
wnt_pluri <- c('AXIN2','CCND2','FZD3','FZD4','CD44')
neural <- c('AXIN2','ETS1','FZD3','HEY2','TCF4')
O_linked <- c('ADAMTS9','ADAMTSL2','B4GALT6','THSD7B') #GALNT16 missing
axon <- c('BMPR2','BOC','FZD3','PIK3R3','SLIT3','TRPC4')
TCF <- c('AXIN2','FZD4','HIST1H2AC','HIST1H4H','RNF146','TCF4')
sig_pluri <- c('AXIN2','BMPR2','FZD3','FZD4','PIK3R3')
ectoderm <- c('BCAS3','BOC','ELOVL2','FZD4','PODXL')
phosphoD <- c('AGPAT5','PDGFD','PIK3R3','RALB','RAPGEF4')
rap1 <- c('ARAP3','PDGFD','PIK3R3','RALB','RAPGEF4','TEK')
breast <- c('AXIN2','FZD3','FZD4','HEY2','PIK3R3')
gastric <- c('ABCB1','AXIN2','FZD3','FZD4','PIK3R3')
sig_wnt <- c('AXIN2','FZD3','FZD4','HIST1H2AC','HIST1H4H','RNF146','TCF4')
hippo <- c('AXIN2','BMPR2','CCND2','FZD3','FZD4')

In [7]:
result_t <- t(result_rcorr$r)
result_t_pval <- t(result_rcorr$P)

target <- hippo

result_path <- t(result_t[, target])
result_path_pval <- t(result_t_pval[, target])

In [8]:
#corr_value check (>-0.1 = NA)
del <- ""
for(i in 1:ncol(result_path)-1){
    if(max(result_path[,i+1])>(-0.1)){
        del <- c(del, NA)
    }else{
        del <- c(del, 1)
    }
}


#making NA col
del <- del[2:length(del)]
del <- t(data.frame(del))


#NA col : result binding
result_path <- rbind(result_path, del)
result_path_pval <- rbind(result_path_pval, del)


#----------NA row remove function----------
result_path <- t(na_rm(t(result_path)))
result_path_pval <- t(na_rm(t(result_path)))
result_path <- result_path[1:nrow(result_path)-1,]
result_path_pval <- result_path_pval[1:nrow(result_path_pval)-1,]
#----------NA row remove function----------


#data output
write.csv(result_path, "result_path.csv")
write.csv(result_path_pval, "result_path_pval.csv")