Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
nitzzzzzzz authored and cran-robot committed Jan 31, 2020
0 parents commit 50dc0a6
Show file tree
Hide file tree
Showing 13 changed files with 576 additions and 0 deletions.
16 changes: 16 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,16 @@
Package: IGST
Type: Package
Title: Informative Gene Selection Tool
Version: 0.1.0
Author: Nitesh Kumar Sharma, Dwijesh Chandra Mishra, Neeraj Budhlakoti and Md. Samir Farooqi
Maintainer: Nitesh Kumar Sharma <sharmanitesh.iasri@gmail.com>
Description: Mining informative genes with certain biological meanings are important for clinical diagnosis of disease and discovery of disease mechanisms in plants and animals. This process involves identification of relevant genes and removal of redundant genes as much as possible from a whole gene set. This package selects the informative genes related to a specific trait using gene expression dataset. These trait specific genes are considered as informative genes. This package returns the informative gene set from the high dimensional gene expression data using a combination of methods SVM and MRMR (for feature selection) with bootstrapping procedure.
Depends: R (>= 3.5)
Imports: e1071, BootMRMR
License: GPL-3
Encoding: UTF-8
LazyData: true
NeedsCompilation: no
Packaged: 2020-01-21 10:19:01 UTC; Nitesh-PC
Repository: CRAN
Date/Publication: 2020-01-31 16:00:06 UTC
12 changes: 12 additions & 0 deletions MD5
@@ -0,0 +1,12 @@
d26c3f949d3f5ccac0b0fd254c6d364c *DESCRIPTION
015891f2d765b557316761f08fa3a009 *NAMESPACE
aa99b1857cd75fb3ef05c5bc4210fed9 *R/IGST.bootmrmrsvm.pval.cutoff.R
cb009fa34bb2b7f9e0e2b7049e4bd567 *R/IGST.bootmrmrsvm.weight.cutoff.R
0fc6cc84658cee09c4a6f47777b737c7 *R/IGST.pval.bootmrmrsvm.R
4b6a2b8c81b83a5ba3a2f97aa1e0b4c4 *R/IGST.weight.bootmrmrsvm.R
0e85bb93ea49f3120097f34d2140f192 *data/rice_cold.RData
ca71f92cb5014e547d6301a96ad23db1 *man/IGST.bootmrmrsvm.pval.cutoff.Rd
70dc9368346ed32f9b6a3a508372d4ab *man/IGST.bootmrmrsvm.weight.cutoff.Rd
f2e3022332700f01bd428559940fc646 *man/IGST.pval.bootmrmrsvm.Rd
890e3a3568777ce85dd7697529da377f *man/IGST.weight.bootmrmrsvm.Rd
39ed6bf4ce380848969cf6e1e6fcbd75 *man/rice_cold.Rd
8 changes: 8 additions & 0 deletions NAMESPACE
@@ -0,0 +1,8 @@
exportPattern("^[[:alpha:]]+")
importFrom("stats", "cor", "pnorm", "var")
import(e1071)
import(BootMRMR)




101 changes: 101 additions & 0 deletions R/IGST.bootmrmrsvm.pval.cutoff.R
@@ -0,0 +1,101 @@
###########################################
requireNamespace("e1071")
requireNamespace("BootMRMR")

IGST.bootmrmrsvm.pval.cutoff<-function (x, y, s, Q, v,re)
{


this.call = match.call()
if ((!class(x) == "data.frame")) {
warning("x must be a data frame and rows as gene names")
}
if ((!class(y) == "numeric")) {
warning("y must be a vector of 1/-1's for two class problems")
}
if (!length(y) == ncol(x)) {
warning("Number of samples in x must have same number of sample labels in y")
}
if (re < 0 & re <= 50) {
warning("s must be numeric and sufficiently large")
}
if (Q < 0 & Q > 1) {
warning("Q is the quartile value of rank scores and must be within 0 and 1")
}
if (missing(Q)) {
Q <- 0.5
}
if (v < 0 & v > 1) {
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ")
}
if (s > nrow(x)) {
stop("Number of informative genes to be selected must be less than total number of genes")
}



stopifnot(!is.null(x) == TRUE, !is.null(y) == TRUE)
cls <- as.numeric(y)
genes <- rownames(x)
g1<- as.matrix(x)
g <- as.matrix(x)
n1 <- nrow(g)
M <- ncol(x)

GeneRankedList <- vector(length = n1)
M1 <- matrix(0, n1, re)
for (j in 1:re) {
samp <- sample(M, M, replace = TRUE)
x1 <- g[, samp]
y1 <- cls[samp]
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson",
use = "p") - diag(n1)), 1, sum))/(n1 - 1))
idx <- which(y1 == 1)
idy <- which(y1 == -1)
B = vector(mode = "numeric", n1)
for (i in 1:nrow(x1)) {
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) +
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i,
idx]) + var(x1[i, idy]))
B[i] <- f.mes
}
svmModeli = svm(t(x1), as.matrix(y1), cost = 10,
cachesize = 500, scale = FALSE, type = "C-classification",
kernel = "linear")
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV))
rsi <- abs(B)
rankingCriteria <- v * w + (1-v) * (rsi/qsi)
GeneRankedList <- sort(rankingCriteria, index.return = TRUE)$ix
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix
rankscore <- (n1 + 1 - rankvalue)/(n1)
M1[, j] <- as.vector(rankscore)
}
rankscore <- as.matrix(M1)
mu <- Q
R <- rankscore - mu
sam <- nrow(R)
pval.vec <- vector(mode = "numeric", length = nrow(rankscore))
for (i in 1:sam) {
z <- R[i, ]
z <- z[z != 0]
n11 <- length(z)
r <- rank(abs(z))
tplus <- sum(r[z > 0])
etplus <- n11 * (n11 + 1)/4
vtplus <- n11 * (n11 + 1) * (2 * n11 + 1)/24
p.value = pnorm(tplus, etplus, sqrt(vtplus), lower.tail = FALSE)
pval.vec[i] = p.value
}


pvalue <- pval.vec
#genes <- names(pvalue)
w11 <- as.vector(pvalue)
gene.id <- sort(w11, index.return = TRUE)$ix
temp <- gene.id[1:s]
select.gene <- genes[temp]
class(select.gene) <- "Informative geneset"
return(select.gene)


}
79 changes: 79 additions & 0 deletions R/IGST.bootmrmrsvm.weight.cutoff.R
@@ -0,0 +1,79 @@
################################################
requireNamespace("e1071")
requireNamespace("BootMRMR")

IGST.bootmrmrsvm.weight.cutoff<-function (x, y, s, v, re)
{


this.call = match.call()
if ((!class(x) == "data.frame")) {
warning("x must be a data frame and rows as gene names")
}
if ((!class(y) == "numeric")) {
warning("y must be a vector of 1/-1's for two class problems")
}
if (!length(y) == ncol(x)) {
warning("Number of samples in x must have same number of sample labels in y")
}
if (re < 0 & re <= 50) {
warning("s must be numeric and sufficiently large")
}

if (v < 0 & v > 1) {
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ")
}
if (s > nrow(x)) {
stop("Number of informative genes to be selected must be less than total number of genes")
}



cls <- as.numeric(y)
genes <- rownames(x)
g <- as.matrix(x)
n1 <- nrow(g)
M <- ncol(x)
GeneRankedList <- vector(length = n1)
M1 <- matrix(0, n1, re)
for (j in 1:re) {
samp <- sample(M, M, replace = TRUE)
x1 <- g[, samp]
y1 <- cls[samp]
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson",
use = "p") - diag(n1)), 1, sum))/(n1 - 1))
idx <- which(y1 == 1)
idy <- which(y1 == -1)
B = vector(mode = "numeric", n1)
for (i in 1:nrow(x1)) {
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) +
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i,
idx]) + var(x1[i, idy]))
B[i] <- f.mes
}
svmModeli = svm(t(x1), as.matrix(y1), cost = 10,
cachesize = 500, scale = FALSE, type = "C-classification",
kernel = "linear")
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV))
rsi <- abs(B)
Weight <- v * w + (1-v) * (rsi/qsi)
#rankingCriteria <- v * w + (1-v) * (rsi/qsi)
GeneRankedList <- sort(-Weight, index.return = TRUE)$ix
#GeneRankedList <- sort(-rankingCriteria, index.return = TRUE)$ix
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix
rankscore <- (n1 + 1 - rankvalue)/(n1)
M1[, j] <- as.vector(rankscore)
}

Weight <- as.vector(rowSums((M1), na.rm = FALSE,
dims = 1))


#genes <- names(weights)
w1 <- as.vector(Weight)
gene.id <- sort(-w1, index.return = TRUE)$ix
temp <- gene.id[1:s]
select.gene <- genes[temp]
class(select.gene) <- "Informative geneset"
return(select.gene)
}
85 changes: 85 additions & 0 deletions R/IGST.pval.bootmrmrsvm.R
@@ -0,0 +1,85 @@
###########################################################
requireNamespace("e1071")
requireNamespace("BootMRMR")

IGST.pval.bootmrmrsvm<-function (x, y, re, Q, v)
{

this.call = match.call()
if ((!class(x) == "data.frame")) {
warning("x must be a data frame and rows as gene names")
}
if ((!class(y) == "numeric")) {
warning("y must be a vector of 1/-1's for two class problems")
}
if (!length(y) == ncol(x)) {
warning("Number of samples in x must have same number of sample labels in y")
}
if (re < 0 & re <= 50) {
warning("s must be numeric and sufficiently large")
}
if (Q < 0 & Q > 1) {
warning("Q is the quartile value of rank scores and must be within 0 and 1")
}
if (missing(Q)) {
Q <- 0.5
}
if (v < 0 & v > 1) {
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ")
}



cls <- as.numeric(y)
genes <- rownames(x)
g <- as.matrix(x)
n1 <- nrow(g)
M <- ncol(x)
GeneRankedList <- vector(length = n1)
M1 <- matrix(0, n1, re)
for (j in 1:re) {
samp <- sample(M, M, replace = TRUE)
x1 <- g[, samp]
y1 <- cls[samp]
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson",
use = "p") - diag(n1)), 1, sum))/(n1 - 1))
idx <- which(y1 == 1)
idy <- which(y1 == -1)
B = vector(mode = "numeric", n1)
for (i in 1:nrow(x1)) {
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) +
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i,
idx]) + var(x1[i, idy]))
B[i] <- f.mes
}
svmModeli = svm(t(x1), as.matrix(y1), cost = 10,
cachesize = 500, scale = FALSE, type = "C-classification",
kernel = "linear")
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV))
rsi <- abs(B)
rankingCriteria <- v * w + (1-v) * (rsi/qsi)
GeneRankedList <- sort(-rankingCriteria, index.return = TRUE)$ix
rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix
rankscore <- (n1 + 1 - rankvalue)/(n1)
M1[, j] <- as.vector(rankscore)
}
rankscore <- as.matrix(M1)
mu <- Q
R <- rankscore - mu
sam <- nrow(R)
pval.vec <- vector(mode = "numeric", length = nrow(rankscore))
for (i in 1:sam) {
z <- R[i, ]
z <- z[z != 0]
n11 <- length(z)
r <- rank(abs(z))
tplus <- sum(r[z > 0])
etplus <- n11 * (n11 + 1)/4
vtplus <- n11 * (n11 + 1) * (2 * n11 + 1)/24
p.value = pnorm(tplus, etplus, sqrt(vtplus), lower.tail = FALSE)
pval.vec[i] = p.value
}

class(pval.vec) <- "p values"
return(pval.vec)
}
66 changes: 66 additions & 0 deletions R/IGST.weight.bootmrmrsvm.R
@@ -0,0 +1,66 @@
#################################################################
requireNamespace("e1071")
requireNamespace("BootMRMR")

IGST.weight.bootmrmrsvm<-function (x, y, re, v)
{

this.call = match.call()
if ((!class(x) == "data.frame")) {
warning("x must be a data frame and rows as gene names")
}
if ((!class(y) == "numeric")) {
warning("y must be a vector of 1/-1's for two class problems")
}
if (!length(y) == ncol(x)) {
warning("Number of samples in x must have same number of sample labels in y")
}
if (re < 0 & re <= 50) {
warning("s must be numeric and sufficiently large")
}

if (v < 0 & v > 1) {
warning("v is the tradeoff value between svm and mrmr and must be within 0 and 1 ")
}



cls <- as.numeric(y)
genes <- rownames(x)
g <- as.matrix(x)
n1 <- nrow(g)
M <- ncol(x)
GeneRankedList <- vector(length = n1)
M1 <- matrix(0, n1, re)
for (j in 1:re) {
samp <- sample(M, M, replace = TRUE)
x1 <- g[, samp]
y1 <- cls[samp]
qsi <- as.vector((apply(abs(cor(t(x1), method = "pearson",
use = "p") - diag(n1)), 1, sum))/(n1 - 1))
idx <- which(y1 == 1)
idy <- which(y1 == -1)
B = vector(mode = "numeric", n1)
for (i in 1:nrow(x1)) {
f.mes <- (((mean(x1[i, idx]) - mean(x1[i, ]))^2) +
((mean(x1[i, idy]) - mean(x1[i, ]))^2))/(var(x1[i,
idx]) + var(x1[i, idy]))
B[i] <- f.mes
}
svmModeli = svm(t(x1), as.matrix(y1), cost = 10,
cachesize = 500, scale = FALSE, type = "C-classification",
kernel = "linear")
w = abs(as.vector(t(svmModeli$coefs) %*% svmModeli$SV))
rsi <- abs(B)
Weight <- v * w + (1-v) * (rsi/qsi)

GeneRankedList <- sort(-Weight, index.return = TRUE)$ix

rankvalue <- sort(GeneRankedList, index.return = TRUE)$ix
rankscore <- (n1 + 1 - rankvalue)/(n1)
M1[, j] <- as.vector(rankscore)
}
#rankingCriteria=Weight
class(Weight) <- "Weight values"
return(Weight)
}
Binary file added data/rice_cold.RData
Binary file not shown.

0 comments on commit 50dc0a6

Please sign in to comment.