Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 50dc0a6
Showing
13 changed files
with
576 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Package: IGST | ||
Type: Package | ||
Title: Informative Gene Selection Tool | ||
Version: 0.1.0 | ||
Author: Nitesh Kumar Sharma, Dwijesh Chandra Mishra, Neeraj Budhlakoti and Md. Samir Farooqi | ||
Maintainer: Nitesh Kumar Sharma <sharmanitesh.iasri@gmail.com> | ||
Description: Mining informative genes with certain biological meanings are important for clinical diagnosis of disease and discovery of disease mechanisms in plants and animals. This process involves identification of relevant genes and removal of redundant genes as much as possible from a whole gene set. This package selects the informative genes related to a specific trait using gene expression dataset. These trait specific genes are considered as informative genes. This package returns the informative gene set from the high dimensional gene expression data using a combination of methods SVM and MRMR (for feature selection) with bootstrapping procedure. | ||
Depends: R (>= 3.5) | ||
Imports: e1071, BootMRMR | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
NeedsCompilation: no | ||
Packaged: 2020-01-21 10:19:01 UTC; Nitesh-PC | ||
Repository: CRAN | ||
Date/Publication: 2020-01-31 16:00:06 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
d26c3f949d3f5ccac0b0fd254c6d364c *DESCRIPTION | ||
015891f2d765b557316761f08fa3a009 *NAMESPACE | ||
aa99b1857cd75fb3ef05c5bc4210fed9 *R/IGST.bootmrmrsvm.pval.cutoff.R | ||
cb009fa34bb2b7f9e0e2b7049e4bd567 *R/IGST.bootmrmrsvm.weight.cutoff.R | ||
0fc6cc84658cee09c4a6f47777b737c7 *R/IGST.pval.bootmrmrsvm.R | ||
4b6a2b8c81b83a5ba3a2f97aa1e0b4c4 *R/IGST.weight.bootmrmrsvm.R | ||
0e85bb93ea49f3120097f34d2140f192 *data/rice_cold.RData | ||
ca71f92cb5014e547d6301a96ad23db1 *man/IGST.bootmrmrsvm.pval.cutoff.Rd | ||
70dc9368346ed32f9b6a3a508372d4ab *man/IGST.bootmrmrsvm.weight.cutoff.Rd | ||
f2e3022332700f01bd428559940fc646 *man/IGST.pval.bootmrmrsvm.Rd | ||
890e3a3568777ce85dd7697529da377f *man/IGST.weight.bootmrmrsvm.Rd | ||
39ed6bf4ce380848969cf6e1e6fcbd75 *man/rice_cold.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
importFrom("stats", "cor", "pnorm", "var") | ||
import(e1071) | ||
import(BootMRMR) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
########################################### | ||
requireNamespace("e1071") | ||
requireNamespace("BootMRMR") | ||
|
||
IGST.bootmrmrsvm.pval.cutoff<-function (x, y, s, Q, v,re) | ||
{ | ||
|
||
|
||
this.call = match.call() | ||
if ((!class(x) == "data.frame")) { | ||
warning("x must be a data frame and rows as gene names") | ||
} | ||
if ((!class(y) == "numeric")) { | ||
warning("y must be a vector of 1/-1's for two class problems") | ||
} | ||
if (!length(y) == ncol(x)) { | ||
warning("Number of samples in x must have same number of sample labels in y") | ||
} | ||
if (re < 0 & re <= 50) { | ||
warning("s must be numeric and sufficiently large") | ||
} | ||
if (Q < 0 & Q > 1) { | ||
warning("Q is the quartile value of rank scores and must be within 0 and 1") | ||
} | ||
if (missing(Q)) { | ||
Q <- 0.5 | ||
} | ||
if (v < 0 & v > 1) { | ||
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ") | ||
} | ||
if (s > nrow(x)) { | ||
stop("Number of informative genes to be selected must be less than total number of genes") | ||
} | ||
|
||
|
||
|
||
stopifnot(!is.null(x) == TRUE, !is.null(y) == TRUE) | ||
cls <- as.numeric(y) | ||
genes <- rownames(x) | ||
g1<- as.matrix(x) | ||
g <- as.matrix(x) | ||
n1 <- nrow(g) | ||
M <- ncol(x) | ||
|
||
GeneRankedList <- vector(length = n1) | ||
M1 <- matrix(0, n1, re) | ||
for (j in 1:re) { | ||
samp <- sample(M, M, replace = TRUE) | ||
x1 <- g[, samp] | ||
y1 <- cls[samp] | ||
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson", | ||
use = "p") - diag(n1)), 1, sum))/(n1 - 1)) | ||
idx <- which(y1 == 1) | ||
idy <- which(y1 == -1) | ||
B = vector(mode = "numeric", n1) | ||
for (i in 1:nrow(x1)) { | ||
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) + | ||
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i, | ||
idx]) + var(x1[i, idy])) | ||
B[i] <- f.mes | ||
} | ||
svmModeli = svm(t(x1), as.matrix(y1), cost = 10, | ||
cachesize = 500, scale = FALSE, type = "C-classification", | ||
kernel = "linear") | ||
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV)) | ||
rsi <- abs(B) | ||
rankingCriteria <- v * w + (1-v) * (rsi/qsi) | ||
GeneRankedList <- sort(rankingCriteria, index.return = TRUE)$ix | ||
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix | ||
rankscore <- (n1 + 1 - rankvalue)/(n1) | ||
M1[, j] <- as.vector(rankscore) | ||
} | ||
rankscore <- as.matrix(M1) | ||
mu <- Q | ||
R <- rankscore - mu | ||
sam <- nrow(R) | ||
pval.vec <- vector(mode = "numeric", length = nrow(rankscore)) | ||
for (i in 1:sam) { | ||
z <- R[i, ] | ||
z <- z[z != 0] | ||
n11 <- length(z) | ||
r <- rank(abs(z)) | ||
tplus <- sum(r[z > 0]) | ||
etplus <- n11 * (n11 + 1)/4 | ||
vtplus <- n11 * (n11 + 1) * (2 * n11 + 1)/24 | ||
p.value = pnorm(tplus, etplus, sqrt(vtplus), lower.tail = FALSE) | ||
pval.vec[i] = p.value | ||
} | ||
|
||
|
||
pvalue <- pval.vec | ||
#genes <- names(pvalue) | ||
w11 <- as.vector(pvalue) | ||
gene.id <- sort(w11, index.return = TRUE)$ix | ||
temp <- gene.id[1:s] | ||
select.gene <- genes[temp] | ||
class(select.gene) <- "Informative geneset" | ||
return(select.gene) | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
################################################ | ||
requireNamespace("e1071") | ||
requireNamespace("BootMRMR") | ||
|
||
IGST.bootmrmrsvm.weight.cutoff<-function (x, y, s, v, re) | ||
{ | ||
|
||
|
||
this.call = match.call() | ||
if ((!class(x) == "data.frame")) { | ||
warning("x must be a data frame and rows as gene names") | ||
} | ||
if ((!class(y) == "numeric")) { | ||
warning("y must be a vector of 1/-1's for two class problems") | ||
} | ||
if (!length(y) == ncol(x)) { | ||
warning("Number of samples in x must have same number of sample labels in y") | ||
} | ||
if (re < 0 & re <= 50) { | ||
warning("s must be numeric and sufficiently large") | ||
} | ||
|
||
if (v < 0 & v > 1) { | ||
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ") | ||
} | ||
if (s > nrow(x)) { | ||
stop("Number of informative genes to be selected must be less than total number of genes") | ||
} | ||
|
||
|
||
|
||
cls <- as.numeric(y) | ||
genes <- rownames(x) | ||
g <- as.matrix(x) | ||
n1 <- nrow(g) | ||
M <- ncol(x) | ||
GeneRankedList <- vector(length = n1) | ||
M1 <- matrix(0, n1, re) | ||
for (j in 1:re) { | ||
samp <- sample(M, M, replace = TRUE) | ||
x1 <- g[, samp] | ||
y1 <- cls[samp] | ||
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson", | ||
use = "p") - diag(n1)), 1, sum))/(n1 - 1)) | ||
idx <- which(y1 == 1) | ||
idy <- which(y1 == -1) | ||
B = vector(mode = "numeric", n1) | ||
for (i in 1:nrow(x1)) { | ||
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) + | ||
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i, | ||
idx]) + var(x1[i, idy])) | ||
B[i] <- f.mes | ||
} | ||
svmModeli = svm(t(x1), as.matrix(y1), cost = 10, | ||
cachesize = 500, scale = FALSE, type = "C-classification", | ||
kernel = "linear") | ||
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV)) | ||
rsi <- abs(B) | ||
Weight <- v * w + (1-v) * (rsi/qsi) | ||
#rankingCriteria <- v * w + (1-v) * (rsi/qsi) | ||
GeneRankedList <- sort(-Weight, index.return = TRUE)$ix | ||
#GeneRankedList <- sort(-rankingCriteria, index.return = TRUE)$ix | ||
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix | ||
rankscore <- (n1 + 1 - rankvalue)/(n1) | ||
M1[, j] <- as.vector(rankscore) | ||
} | ||
|
||
Weight <- as.vector(rowSums((M1), na.rm = FALSE, | ||
dims = 1)) | ||
|
||
|
||
#genes <- names(weights) | ||
w1 <- as.vector(Weight) | ||
gene.id <- sort(-w1, index.return = TRUE)$ix | ||
temp <- gene.id[1:s] | ||
select.gene <- genes[temp] | ||
class(select.gene) <- "Informative geneset" | ||
return(select.gene) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
########################################################### | ||
requireNamespace("e1071") | ||
requireNamespace("BootMRMR") | ||
|
||
IGST.pval.bootmrmrsvm<-function (x, y, re, Q, v) | ||
{ | ||
|
||
this.call = match.call() | ||
if ((!class(x) == "data.frame")) { | ||
warning("x must be a data frame and rows as gene names") | ||
} | ||
if ((!class(y) == "numeric")) { | ||
warning("y must be a vector of 1/-1's for two class problems") | ||
} | ||
if (!length(y) == ncol(x)) { | ||
warning("Number of samples in x must have same number of sample labels in y") | ||
} | ||
if (re < 0 & re <= 50) { | ||
warning("s must be numeric and sufficiently large") | ||
} | ||
if (Q < 0 & Q > 1) { | ||
warning("Q is the quartile value of rank scores and must be within 0 and 1") | ||
} | ||
if (missing(Q)) { | ||
Q <- 0.5 | ||
} | ||
if (v < 0 & v > 1) { | ||
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ") | ||
} | ||
|
||
|
||
|
||
cls <- as.numeric(y) | ||
genes <- rownames(x) | ||
g <- as.matrix(x) | ||
n1 <- nrow(g) | ||
M <- ncol(x) | ||
GeneRankedList <- vector(length = n1) | ||
M1 <- matrix(0, n1, re) | ||
for (j in 1:re) { | ||
samp <- sample(M, M, replace = TRUE) | ||
x1 <- g[, samp] | ||
y1 <- cls[samp] | ||
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson", | ||
use = "p") - diag(n1)), 1, sum))/(n1 - 1)) | ||
idx <- which(y1 == 1) | ||
idy <- which(y1 == -1) | ||
B = vector(mode = "numeric", n1) | ||
for (i in 1:nrow(x1)) { | ||
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) + | ||
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i, | ||
idx]) + var(x1[i, idy])) | ||
B[i] <- f.mes | ||
} | ||
svmModeli = svm(t(x1), as.matrix(y1), cost = 10, | ||
cachesize = 500, scale = FALSE, type = "C-classification", | ||
kernel = "linear") | ||
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV)) | ||
rsi <- abs(B) | ||
rankingCriteria <- v * w + (1-v) * (rsi/qsi) | ||
GeneRankedList <- sort(-rankingCriteria, index.return = TRUE)$ix | ||
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix | ||
rankscore <- (n1 + 1 - rankvalue)/(n1) | ||
M1[, j] <- as.vector(rankscore) | ||
} | ||
rankscore <- as.matrix(M1) | ||
mu <- Q | ||
R <- rankscore - mu | ||
sam <- nrow(R) | ||
pval.vec <- vector(mode = "numeric", length = nrow(rankscore)) | ||
for (i in 1:sam) { | ||
z <- R[i, ] | ||
z <- z[z != 0] | ||
n11 <- length(z) | ||
r <- rank(abs(z)) | ||
tplus <- sum(r[z > 0]) | ||
etplus <- n11 * (n11 + 1)/4 | ||
vtplus <- n11 * (n11 + 1) * (2 * n11 + 1)/24 | ||
p.value = pnorm(tplus, etplus, sqrt(vtplus), lower.tail = FALSE) | ||
pval.vec[i] = p.value | ||
} | ||
|
||
class(pval.vec) <- "p values" | ||
return(pval.vec) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
################################################################# | ||
requireNamespace("e1071") | ||
requireNamespace("BootMRMR") | ||
|
||
IGST.weight.bootmrmrsvm<-function (x, y, re, v) | ||
{ | ||
|
||
this.call = match.call() | ||
if ((!class(x) == "data.frame")) { | ||
warning("x must be a data frame and rows as gene names") | ||
} | ||
if ((!class(y) == "numeric")) { | ||
warning("y must be a vector of 1/-1's for two class problems") | ||
} | ||
if (!length(y) == ncol(x)) { | ||
warning("Number of samples in x must have same number of sample labels in y") | ||
} | ||
if (re < 0 & re <= 50) { | ||
warning("s must be numeric and sufficiently large") | ||
} | ||
|
||
if (v < 0 & v > 1) { | ||
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ") | ||
} | ||
|
||
|
||
|
||
cls <- as.numeric(y) | ||
genes <- rownames(x) | ||
g <- as.matrix(x) | ||
n1 <- nrow(g) | ||
M <- ncol(x) | ||
GeneRankedList <- vector(length = n1) | ||
M1 <- matrix(0, n1, re) | ||
for (j in 1:re) { | ||
samp <- sample(M, M, replace = TRUE) | ||
x1 <- g[, samp] | ||
y1 <- cls[samp] | ||
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson", | ||
use = "p") - diag(n1)), 1, sum))/(n1 - 1)) | ||
idx <- which(y1 == 1) | ||
idy <- which(y1 == -1) | ||
B = vector(mode = "numeric", n1) | ||
for (i in 1:nrow(x1)) { | ||
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) + | ||
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i, | ||
idx]) + var(x1[i, idy])) | ||
B[i] <- f.mes | ||
} | ||
svmModeli = svm(t(x1), as.matrix(y1), cost = 10, | ||
cachesize = 500, scale = FALSE, type = "C-classification", | ||
kernel = "linear") | ||
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV)) | ||
rsi <- abs(B) | ||
Weight <- v * w + (1-v) * (rsi/qsi) | ||
|
||
GeneRankedList <- sort(-Weight, index.return = TRUE)$ix | ||
|
||
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix | ||
rankscore <- (n1 + 1 - rankvalue)/(n1) | ||
M1[, j] <- as.vector(rankscore) | ||
} | ||
#rankingCriteria=Weight | ||
class(Weight) <- "Weight values" | ||
return(Weight) | ||
} |
Binary file not shown.
Oops, something went wrong.