-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3d54f69
Showing
36 changed files
with
1,984 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
Package: M2SMJF | ||
Title: Multi-Modal Similarity Matrix Joint Factorization | ||
Version: 1.0 | ||
Authors@R: person("Xiaoyao", "Yin", email = "yinxy1992@sina.com", role = c("aut", "cre")) | ||
Description: A new method to implement clustering from multiple modality data of certain samples, | ||
the function M2SMjF() jointly factorizes multiple similarity matrices into a shared sub-matrix | ||
and several modality private sub-matrices, which is further used for clustering. Along with | ||
this method, we also provide function to calculate the similarity matrix and function to | ||
evaluate the best cluster number from the original data. | ||
Imports: dplyr, MASS, stats | ||
Depends: R (>= 3.4.0) | ||
License: GPL (>= 2) | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.1.1 | ||
Suggests: knitr, rmarkdown | ||
VignetteBuilder: knitr | ||
NeedsCompilation: no | ||
Packaged: 2020-11-20 00:49:57 UTC; DELL | ||
Author: Xiaoyao Yin [aut, cre] | ||
Maintainer: Xiaoyao Yin <yinxy1992@sina.com> | ||
Repository: CRAN | ||
Date/Publication: 2020-11-23 08:40:06 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
0c144947c22cb86d28a3e8019dfad1a8 *DESCRIPTION | ||
321dda5c58fc307aa9c82250e286269f *NAMESPACE | ||
8d8239d90fee1f0fdeff0d9dd7f3dd9a *R/Cal_NMI.R | ||
0f4d1e0f64648cf3560b5548e7a1fa8d *R/M2SMJF.R | ||
460ebd9b20101bdf93eddbfad7abb0d4 *R/Standard_Normalization.R | ||
48e45a970f0497430edd675c2ee7b3c5 *R/affinityMatrix.R | ||
cd0867a38b2f9591c471fd90dc7099b8 *R/cost.R | ||
2d6f15baa103c9f74d0f5dad0132f967 *R/dist2bin.R | ||
6a67c51711542a9ad6847739ccce0dab *R/dist2chi.R | ||
b10b4def4b5d466c0d356e4d3d4da943 *R/dist2eu.R | ||
ff51d39ce5eb9db7291961e72f2cc854 *R/initialization.R | ||
897a30e8b26b5dbb79236da735986176 *R/initialize_WL.R | ||
89664a65d91574f54eb15457e2c04144 *R/new_modularity.R | ||
1dbd5ddc2361ea82f336e34843564ddd *R/simu_data_gen.R | ||
1da7d8ca2b21337c375db9cdc932b358 *R/update_L.R | ||
8bb0a4aa64915d9d273812b7beb1c9c9 *R/update_alpha.R | ||
575a3975ffd3213ca4cbfc8c95e28c32 *build/vignette.rds | ||
3238ab2475e4eda0c7f0d9e3cd406daf *inst/doc/introduction.R | ||
8caf5d73bd739b0bbea1dd5da88421ac *inst/doc/introduction.Rmd | ||
ed87cb375c2a87afcd0e0ed198269e7f *inst/doc/introduction.html | ||
0ea9ff08b4dba6aaf12d555005691c19 *man/Cal_NMI.Rd | ||
c3eeb7120f0ba916c83f3232d65cf204 *man/M2SMJF.Rd | ||
7b3b5e7c8539b89951c63b8b58f2330f *man/Standard_Normalization.Rd | ||
114dc3134be0eef025fda4254db983bd *man/affinityMatrix.Rd | ||
edd535b7b028ad42e3cbfccd713d6d49 *man/cost.Rd | ||
081abdcd3ca4869da5565779db1fc0ca *man/dist2bin.Rd | ||
88910a2ab9c51553d55b3904c23e8b86 *man/dist2chi.Rd | ||
aa52dc00dd69beb490748c1d70a96ecf *man/dist2eu.Rd | ||
9b23641ab445a83d3749fc768cf26877 *man/initialization.Rd | ||
717c946279064b8237116e216d39bd0d *man/initialize_WL.Rd | ||
cb900189b8823ac0c286f2c4ea920b95 *man/new_modularity.Rd | ||
961dbfb0b6b602c1d1a23467de548843 *man/simu_data_gen.Rd | ||
4ddf67926edb14c885c072bc14b4787e *man/update_L.Rd | ||
9a9f0feb15b5c93d790bf6e07dd1ec68 *man/update_alpha.Rd | ||
8caf5d73bd739b0bbea1dd5da88421ac *vignettes/introduction.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(Cal_NMI) | ||
export(M2SMJF) | ||
export(Standard_Normalization) | ||
export(affinityMatrix) | ||
export(cost) | ||
export(dist2bin) | ||
export(dist2chi) | ||
export(dist2eu) | ||
export(initialization) | ||
export(initialize_WL) | ||
export(new_modularity) | ||
export(simu_data_gen) | ||
export(update_L) | ||
export(update_alpha) | ||
import(dplyr) | ||
importFrom(MASS,ginv) | ||
importFrom(stats,kmeans) | ||
importFrom(stats,rnorm) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#' calculate the normalized mutual information. | ||
#' | ||
#' calculate the normalized mutual information of two vectors x and y. | ||
#' | ||
#' @param x A vector | ||
#' | ||
#' @param y A vector as long as x | ||
#' | ||
#' @return A number between 0 and 1 indicating the normalized mutual information | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' x <- c(0.1,0.2,0.3,0.4) | ||
#' y <- c(0.1,0.2,0.3,0.4) | ||
#' NMI <- Cal_NMI(x,y) | ||
#' @rdname Cal_NMI | ||
#' @export | ||
Cal_NMI <- function(x, y) { | ||
x = as.vector(x) | ||
y = as.vector(y) | ||
return(max(0, mutualInformation(x, y)/sqrt(entropy(x) * entropy(y)), na.rm=TRUE)) | ||
} | ||
|
||
# Calculate the mutual information between vectors x and y. | ||
mutualInformation <- function(x, y) { | ||
classx <- unique(x) | ||
classy <- unique(y) | ||
nx <- length(x) | ||
ncx <- length(classx) | ||
ncy <- length(classy) | ||
|
||
probxy <- matrix(NA, ncx, ncy) | ||
for (i in 1:ncx) { | ||
for (j in 1:ncy) { | ||
probxy[i, j] <- sum((x == classx[i]) & (y == classy[j])) / nx | ||
} | ||
} | ||
|
||
probx <- matrix(rowSums(probxy), ncx, ncy) | ||
proby <- matrix(colSums(probxy), ncx, ncy, byrow=TRUE) | ||
result <- sum(probxy * log(probxy / (probx * proby), 2), na.rm=TRUE) | ||
return(result) | ||
} | ||
|
||
# Calculate the entropy of vector x. | ||
entropy <- function(x) { | ||
class <- unique(x) | ||
nx <- length(x) | ||
nc <- length(class) | ||
|
||
prob <- rep.int(NA, nc) | ||
for (i in 1:nc) { | ||
prob[i] <- sum(x == class[i])/nx | ||
} | ||
|
||
result <- -sum(prob * log(prob, 2)) | ||
return(result) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#' the main part for M2SMJF and clustering result | ||
#' | ||
#' jointly factorize multiple matrices into a shared sub-matrix and multiple private sub-matrices | ||
#' | ||
#' @param WL A list of similarity matrices | ||
#' | ||
#' @param lambda A parameter to set the relative weight of the group sparsity constraints | ||
#' | ||
#' @param theta A parameter to determine the convergence | ||
#' | ||
#' @param k A parameter to specify the cluster number | ||
#' @return A list containing the clustering result | ||
#' \item{sub_matrices}{a list containing all the sub-matrices} | ||
#' \item{cluster_res}{the clustering result which is as long as the number of samples} | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' WL <- simu_data_gen() | ||
#' res <- M2SMJF(WL,0.25,10^-4,5) | ||
#' @rdname M2SMJF | ||
#' @export | ||
#' @importFrom MASS ginv | ||
#' @import dplyr | ||
#' @importFrom stats kmeans rnorm | ||
M2SMJF <- function(WL,lambda=0.25,theta=10^-4,k){ | ||
if (!is.list(WL)) | ||
{ | ||
stop('Error:please provide a data list by WL') | ||
} | ||
if (is.na(k)) | ||
{ | ||
stop('Error:please provide a cluster number by k') | ||
} | ||
N <- length(WL) ##length of the similarity matrices list | ||
new_WL_list <- initialize_WL(WL) ## factorize the each of the similairty matrix Si into Ci*t(Ci) | ||
init_list <- initialization(new_WL_list,k) ## initialize the sub-matrix Ci into alpha*Li | ||
### minimize the objective function until convergence | ||
divergence <- 1 ##initialize the divergence | ||
iter_num <- 1 ##initialize the iteration number | ||
old_cost <- 0 ##initialize the cost | ||
while (divergence>theta) ##while not convergent | ||
{ | ||
update_L_list <- update_L(new_WL_list,init_list) ##update all the L with update_L | ||
update_alpha_list <- update_alpha(new_WL_list,update_L_list,lambda) ##update all the alpha with update_alpha | ||
init_list <- update_alpha_list | ||
new_cost <- cost(new_WL_list,init_list,lambda) ## calculate the cost of objective function | ||
divergence <- abs((new_cost-old_cost)/new_cost) ## calculate the relative variation of cost | ||
old_cost <- new_cost | ||
iter_num <- iter_num+1 | ||
} | ||
alpha <- init_list[[(N+1)]] ## sub_matrix alpha is the last one in init_list | ||
clu_res <- kmeans(alpha,k,1000,20) ## clustering the samples by kmeans on the sub_matrix alpha, which is a conventional method in matrix factorization based clustering. | ||
cluster_res <- clu_res$cluster ## get the clustering result, the ith element indicates the cluster of the ith sample | ||
result <- vector("list",2) | ||
names(result) <- c("sub_matrices","cluster_res") | ||
sub_result <- init_list | ||
names(sub_result) <- c(paste("sub_matrix_L",c(1:N),sep='_'),'alpha') | ||
result[[1]] <- sub_result | ||
result[[2]] <- cluster_res | ||
return(result) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#' Normalize the input matrix by column | ||
#' | ||
#' Normalize each column of x to have mean 0 and standard deviation 1. | ||
#' | ||
#' @param x A sample-feature matrix with rows as samples and columns as features | ||
#' | ||
#' @return A sample-feature matrix with rows as samples and columns as features,each column of the matrix have mean 0 and standard deviation 1 | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' data_list <- simu_data_gen() | ||
#' x <- data_list[[1]] | ||
#' data_matrix <- Standard_Normalization(x) | ||
#' @rdname Standard_Normalization | ||
#' @export | ||
Standard_Normalization = function(x) { | ||
x = as.matrix(x); | ||
mean = apply(x, 2, mean) | ||
sd = apply(x, 2, sd) | ||
sd[sd==0] = 1 | ||
xNorm = t((t(x) - mean) / sd) | ||
return(xNorm) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#' To calculate the similarity matrix | ||
#' | ||
#' calculate the affinity matrix from the diff matrix with 20 neighbors | ||
#' | ||
#' @param Diff A diff matrix | ||
#' | ||
#' @param K The number of neighbors in consideration | ||
#' | ||
#' @param sigma A parameter to determine the scale | ||
#' | ||
#' @return W The similarity matrix | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' data_list <- simu_data_gen() | ||
#' Diff <- dist2eu(Standard_Normalization(data_list[[1]]),Standard_Normalization(data_list[[1]])) | ||
#' simi <- affinityMatrix(Diff,20,0.5) | ||
#' @rdname affinityMatrix | ||
#' @export | ||
affinityMatrix <- function(Diff,K=20,sigma=0.5) { | ||
N = nrow(Diff) | ||
|
||
Diff = (Diff + t(Diff)) / 2 | ||
diag(Diff) = 0; | ||
sortedColumns = as.matrix(t(apply(Diff,2,sort))) | ||
finiteMean <- function(x) { mean(x[is.finite(x)]) } | ||
means = apply(sortedColumns[,1:K+1],1,finiteMean)+.Machine$double.eps; | ||
|
||
avg <- function(x,y) (x+y) | ||
Sig = outer(means,means,avg)/3 + Diff/3 + .Machine$double.eps; | ||
Sig[Sig <= .Machine$double.eps] = .Machine$double.eps | ||
#densities = dnorm(Diff,0,sigma*Sig,log = FALSE) | ||
densities <- exp(-Diff/(sigma*Sig)) | ||
|
||
normalize <- function(X) | ||
{ | ||
D <- apply(X,1,sum) | ||
Y <- diag(1/sqrt(D)) | ||
Z <- Y%*%X%*%Y | ||
return (Z) | ||
} | ||
W = normalize(densities) | ||
return(W) | ||
# | ||
# return(densities) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#' Calculate the cost | ||
#' | ||
#' A function to calculate the cost of the objective function | ||
#' | ||
#' @param new_WL_list A list of matrices factorized from the similarity matrices list WL | ||
#' | ||
#' @param init_list A list containing the updated result in this iteration | ||
#' | ||
#' @param lambda A parameter to set the relative weight of the group sparsity constraints | ||
#' | ||
#' @return A number indicating the total cost of the objective function | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' WL <- simu_data_gen() | ||
#' WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) | ||
#' WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) | ||
#' new_WL_list <- initialize_WL(WL) | ||
#' k <- 5 | ||
#' lambda <- 0.25 | ||
#' init_list <- initialization(new_WL_list,k) | ||
#' update_L_list <- update_L(new_WL_list,init_list) | ||
#' update_alpha_list <- update_alpha(new_WL_list,update_L_list,lambda) | ||
#' init_list <- update_alpha_list | ||
#' new_loss <- cost(new_WL_list,init_list,lambda) | ||
#' @rdname cost | ||
#' @export | ||
cost <- function(new_WL_list,init_list,lambda) | ||
{ | ||
res <- 0 | ||
NN <- length(new_WL_list) | ||
alpha <- init_list[[(NN+1)]] | ||
for (i in 1:NN) | ||
{ | ||
matr_loss <- new_WL_list[[i]]-alpha%*%init_list[[i]] | ||
res <- res+(norm(matr_loss,"F")^2)/2 | ||
} | ||
res <- res+lambda*init_list[[(NN+2)]] | ||
return(res) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#' Calculate the agreement-based measurement | ||
#' | ||
#' Calculate the agreement-based measurement of two any pair-wise samples x_i and x_j for binary variables | ||
#' | ||
#' @param X A sample-feature matrix with rows as samples and columns as features | ||
#' | ||
#' @param C The same as X | ||
#' | ||
#' @return A matrix whose elements at (i,j) is the agreement-based measurement of two any pair-wise samples x_i and x_j | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' data_list <- simu_data_gen() | ||
#' X <- data_list[[1]] | ||
#' C <- X | ||
#' Diff <- dist2bin(X,C) | ||
#' @rdname dist2bin | ||
#' @export | ||
dist2bin <- function(X,C) { | ||
ndata = nrow(X) | ||
ncentres = nrow(C) | ||
|
||
res = matrix(0,ndata,ncentres) | ||
for (i in 1:ndata) | ||
{ | ||
for (j in 1:ncentres) | ||
{ | ||
res[i,j] = sum(X[i,]!=C[j,]) | ||
} | ||
} | ||
return(res) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#' Calculate the chi-squared distance | ||
#' | ||
#' Calculate the chi-squared distance of two any pair-wise samples x_i and x_j for discrete variables | ||
#' | ||
#' @param X A sample-feature matrix with rows as samples and columns as features | ||
#' | ||
#' @param C The same as X | ||
#' | ||
#' @return A matrix whose elements at (i,j) is the chi-squared distance of two any pair-wise samples x_i and x_j | ||
#' @author Xiaoyao Yin | ||
#' @examples | ||
#' | ||
#' data_list <- simu_data_gen() | ||
#' X <- data_list[[1]] | ||
#' C <- X | ||
#' Diff <- dist2chi(X,C) | ||
#' @rdname dist2chi | ||
#' @export | ||
dist2chi <- function(X,C) { | ||
ndata = nrow(X) | ||
ncentres = nrow(C) | ||
meanX = apply(X,2,mean) | ||
meanC = apply(C,2,mean) | ||
|
||
res = matrix(0,ndata,ncentres) | ||
for (i in 1:ndata) | ||
{ | ||
for (j in 1:ncentres) | ||
{ | ||
res[i,j] = sum((X[i,]-C[j,])^2/(X[i,]+C[j,]))/2 | ||
} | ||
} | ||
return(res) | ||
} |
Oops, something went wrong.