Skip to content

Commit

Permalink
version 0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Solenne Gaucher authored and cran-robot committed Dec 9, 2020
1 parent a4ec7ad commit 1e3096d
Show file tree
Hide file tree
Showing 15 changed files with 721 additions and 80 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
@@ -1,7 +1,7 @@
Package: gsbm
Type: Package
Title: Estimate Parameters in the Generalized SBM
Version: 0.1.1
Version: 0.2.1
Authors@R: c(person("Genevieve", "Robin", role = c("aut"), email = "genevieve.robin@inria.fr"),
person("Solenne", "Gaucher", role = c("aut", "cre"), email = "solenne.gaucher@math.u-psud.fr"))
Maintainer: Solenne Gaucher <solenne.gaucher@math.u-psud.fr>
Expand All @@ -11,12 +11,13 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
Depends: R (>= 3.5.0)
Imports: softImpute, RSpectra
Suggests: knitr, rmarkdown, igraph, missSBM, RColorBrewer, Matrix
Imports: softImpute, RSpectra, doParallel, Matrix, foreach
Suggests: knitr, rmarkdown, igraph, missSBM, RColorBrewer, combinat,
testthat
VignetteBuilder: knitr
NeedsCompilation: no
Packaged: 2020-07-09 14:58:34 UTC; Mathilde
Packaged: 2020-12-08 11:27:58 UTC; Mathilde
Author: Genevieve Robin [aut],
Solenne Gaucher [aut, cre]
Repository: CRAN
Date/Publication: 2020-07-09 15:30:03 UTC
Date/Publication: 2020-12-09 10:30:02 UTC
26 changes: 14 additions & 12 deletions MD5
@@ -1,30 +1,32 @@
835ff16c981970368443155b54be336e *DESCRIPTION
95e4d40dcbac84615de626357dbc6025 *NAMESPACE
6662814dad71b280d85b7a269e2a3191 *NEWS.md
ad8f5978ec55d4c4b2f0ae1716a77c97 *DESCRIPTION
148eb1c6412f98c43cc1768d4756667e *NAMESPACE
eb8f1bd259e9d4c1018dc9cf587f362b *NEWS.md
8cdccfe9a53a8a5f74161a795652fdc2 *R/PrimarySchool.R
3320634fb68f02db006eabd5c2a181dc *R/blogosphere.R
aea7baf9d052143808421a1f0019da37 *R/cross_validation.R
a2035fd85404e3262ce8b4529e400fde *R/gsbm_mcgd.R
b2175eb09ba8852b09ef7b25445c0628 *R/gsbm_mcgd.R
0f81378c38c49403bbb99d5550973494 *R/gsbm_mcgd_parallel.R
e30e19116c3fbe577515de4126558fca *R/les_miserables.R
08237c350054a60addcb4cc151fd0c40 *README.md
707c46ee4a9f262a6749cd8555d8a9cd *build/vignette.rds
744ed95ada5aa4e5a67c7f42db396936 *data/PrimarySchool.RData
ae50a8f2a07c919a73bb03be9793c5c8 *data/blogosphere.RData
7ee26a6f1cfe9a0bf05d1aa40157c96b *data/les_miserables.RData
8f92876665be71b05cc23f92f057a6de *inst/doc/PrimarySchool.R
3ea91d2031c50ab397dece245e77cc34 *inst/doc/PrimarySchool.Rmd
018895fe51c1d2848de14504046ed044 *inst/doc/PrimarySchool.html
ea91f5b567103d674ea4808600a93c3f *inst/doc/PrimarySchool.R
ee0b9cb0e76c0e2e7d1431f8e4eac063 *inst/doc/PrimarySchool.Rmd
75ddd567fa5b2b2269d932357391a15d *inst/doc/PrimarySchool.html
d774debe9d53de46fb848857145028d1 *inst/doc/blogosphere.R
8a2a3d1db74e3b89111cde36777492fa *inst/doc/blogosphere.Rmd
9fef1ede66e885c5f1720aa767293c82 *inst/doc/blogosphere.html
9be011b77a215825543bc5ac850587e0 *inst/doc/les_miserables.R
2f967a4a35b04bfe06e82bf3447d17be *inst/doc/les_miserables.Rmd
5452fe681d95d93a435f620f3c3fe353 *inst/doc/les_miserables.html
caf92677d5e9e88cef4147020b6846ff *inst/doc/les_miserables.R
3eaf210648454500932b413b019771b8 *inst/doc/les_miserables.Rmd
7cdd9e75f730dec98ac0addb8703e1a5 *inst/doc/les_miserables.html
387d3146c267defe49f511cd5fb3f3f5 *man/PrimarySchool.Rd
834669cfcb12d36058568906af07b265 *man/blogosphere.Rd
c1198af29f06d597b2ab152f8b3fd0f0 *man/crossval.Rd
21958ba85a99eb0570e20609164b6a17 *man/gsbm_mcgd.Rd
7299c273ba49ecfbaeb0f23840926357 *man/gsbm_mcgd_parallel.Rd
9d7704008efb5d4e32db59f35cb7a08e *man/les_miserables.Rd
3ea91d2031c50ab397dece245e77cc34 *vignettes/PrimarySchool.Rmd
ee0b9cb0e76c0e2e7d1431f8e4eac063 *vignettes/PrimarySchool.Rmd
8a2a3d1db74e3b89111cde36777492fa *vignettes/blogosphere.Rmd
2f967a4a35b04bfe06e82bf3447d17be *vignettes/les_miserables.Rmd
3eaf210648454500932b413b019771b8 *vignettes/les_miserables.Rmd
6 changes: 6 additions & 0 deletions NAMESPACE
Expand Up @@ -2,6 +2,12 @@

export(crossval)
export(gsbm_mcgd)
export(gsbm_mcgd_parallel)
import(Matrix)
import(doParallel)
import(foreach)
import(parallel)
importFrom(RSpectra,eigs_sym)
importFrom(softImpute,softImpute)
importFrom(stats,aggregate)
importFrom(utils,globalVariables)
4 changes: 1 addition & 3 deletions NEWS.md
Expand Up @@ -2,6 +2,4 @@

## Minor update

Computationally more efficient version of the function gsbm_mcgd

New vignette on the application of GSBM to analyze interactions in a primary school
Computationally more efficient version of the function gsbm_mcgd.
8 changes: 5 additions & 3 deletions R/gsbm_mcgd.R
Expand Up @@ -70,9 +70,10 @@ gsbm_mcgd <- function(A, lambda1, lambda2, epsilon=0.1, U = NULL, maxit = 100, t
Omega <- !is.na(A)
if(is.null(S0)) {
S0 <- matrix(0,n,n)
#S0 <- as(Matrix(0,n,n, sparse=T), "dgCMatrix")
}
if(is.null(L0)) L0 <- matrix(0,n,n)
if(is.null(R0)) R0 <- sqrt((eigs_sym(L0, 1)$values)^2)
if(is.null(L0)) L0 <- matrix(0,n,n)#L0 <- as(Matrix(0,n,n, sparse=T), "dgCMatrix")
if(is.null(R0)) R0 <- 0
if(is.null(U)) {
U <- (1/2)*sum((A - S0)^2, na.rm = T)/lambda1
}
Expand All @@ -95,6 +96,7 @@ gsbm_mcgd <- function(A, lambda1, lambda2, epsilon=0.1, U = NULL, maxit = 100, t
flag <- TRUE
while(flag){
step <- 0.5*step
#mat <- t(pmax(1-step*lambda2/sqrt(colSums((S-step*G_S)^2, na.rm=T)),0)*t(S-G_S))
mat <- sapply(1:n, function(j){
max(1-step*lambda2/sqrt(sum((S[,j]-step*G_S[,j])^2, na.rm=T)),0)*(S[,j]-step*G_S[,j])
})
Expand All @@ -103,7 +105,7 @@ gsbm_mcgd <- function(A, lambda1, lambda2, epsilon=0.1, U = NULL, maxit = 100, t
flag <- obj > obj0
}
S <- mat
G_L <- -Omega*(A - S - t(S) - L) + epsilon*L
G_L <- -1*Omega*(A - S - t(S) - L) + epsilon*L
obj0 <- (1/2)*sum((A0-S-t(S)-L)^2, na.rm = T)+lambda1*R+lambda2*sum(sqrt(colSums(S^2)))+epsilon*(norm(L, type="F")^2+norm(S, type="F")^2)
step <- 2
flag <- TRUE
Expand Down
101 changes: 101 additions & 0 deletions R/gsbm_mcgd_parallel.R
@@ -0,0 +1,101 @@
#' Fit a Generalized Stochastic Block Model
#'
#' @description Given an adjacency matrix with missing observations, the function \code{gsbm_mgcd}
#' robustly estimates the probabilities of connections between nodes.
#'
#' @param A nxn adjacency matrix
#' @param lambda1 regularization parameter for nuclear norm penalty (positive number)
#' @param lambda2 regularization parameter for 2,1-norm penalty (positive number)
#' @param epsilon regularization parameter for the L2-norm penalty (positive number, if NULL, default method is applied)
#' @param maxit maximum number of iterations (positive integer, if NULL, default method is applied)
#' @param trace.it whether messages about convergence should be printed (boolean, if NULL, default is FALSE)
#' @param step_S step size for the gradient step of S parameter (positive number)
#' @param step_L step size for the gradient step of L parameter (positive number)
#' @param n_cores number of cores to parallellize on (integer number, default is set with detectCores())
#' @param save whether or not value of current estimates should be saved at each iteration (boolean)
#' @param file if save is set to TRUE, name of the folder where current estimates should be saved (character string, file saved in file/L_iter.txt at iteration iter)
#'
#' @return The estimate for the nxn matrix of probabilities of connections between nodes. It is
#' given as the sum of a low-rank nxn matrix L, corresponding to connections between inlier
#' nodes, and a column sparse nxn matrix S, corresponding to connections between outlier
#' nodes and the rest of the network. The matrices L and S are such that
#'
#' E(A) = L - diag(L) + S + S'
#'
#' where E(A) is the expectation of the adjacency matrix, diag(L) is a nxn diagonal
#' matrix with diagonal entries equal to those of L, and S' means S transposed.
#'
#' The return value is a list of components
#' \itemize{
#' \item{\code{A}}{ the adjacency matrix.}
#' \item{\code{L}}{ estimate for the low-rank component.}
#' \item{\code{S}}{ estimate for the column-sparse component.}
#' \item{\code{objective}}{ the value of the objective function.}
#' \item{\code{R}}{ a bound on the nuclear norm of the low-rank component.}
#' \item{\code{iter}}{ number of iterations between convergence of the objective function.}
#' }
#'
#' @importFrom stats aggregate
#' @importFrom RSpectra eigs_sym
#' @importFrom utils globalVariables
#' @import parallel
#' @import foreach
#' @import doParallel
#' @import Matrix
#' @export
#'

gsbm_mcgd_parallel <- function(A, lambda1, lambda2, epsilon=0.1, maxit = 100,
step_L = 1e-2, step_S=0.1, trace.it = FALSE,
n_cores=detectCores(), save = FALSE, file=NULL){
n <- nrow(A)
S <- Matrix(0, n, n, sparse=TRUE)
L <- Matrix(0, n, n, sparse=TRUE)
R <- 0
U <- (1/2)*sum((A)^2, na.rm = T)/lambda1
iter <- 0
Omega <- mcmapply(function(i){!is.na(A[,i])}, 1:n, mc.cores=n_cores)
A[is.na(A)] <- 0 #mcmapply(function(i){A[i,][is.na(A[i,])] <- 0; return(A[i,])}, 1:n, mc.cores=n_cores)
cl <- makeForkCluster(nnodes = n_cores)
registerDoParallel(cl)
obj <- foreach(i = 1:n,.combine=c) %dopar% {sum(0.5*Omega[i,]*(A[i,])^2, na.rm=T)}
objective <- sum(obj)+lambda1*R
while(iter < maxit){
iter <- iter + 1
G_S <- mcmapply(function(i){-2*Omega[,i]*(A[,i] - L[,i]-S[i,]-S[,i])+epsilon*S[,i]}, 1:n, mc.cores=n_cores)
S <- mcmapply(function(i){
max(0,1-step_S*lambda2/norm(S[,i]-step_S*G_S[,i], type="2"))*(S[,i]-step_S*G_S[,i])
}, 1:n, mc.cores = n_cores)
G_L <- mcmapply(function(i){-Omega[,i]*(A[,i] - L[,i]-S[,i]-S[i,])+epsilon*L[,i]}, 1:n, mc.cores=n_cores)
svdL <- eigs_sym(G_L, 1)
D_t <- - sign(svdL$value)*svdL$vectors%*%t(svdL$vectors)
if(lambda1 >= - sum(D_t*G_L)){
R_tilde <- 0
L_tilde <- Matrix(0,n,n, sparse=TRUE)
L <- L + step_L*(L_tilde - L)
R <- R + step_L*(R_tilde - R)
} else{
R_tilde <- U
L_tilde <- U*D_t
L <- L + step_L*(L_tilde - L)
R <- R + step_L*(R_tilde - R)
}
obj <- foreach(i = 1:n,.combine=c) %dopar% {sum(0.5*Omega[i,]*(A[i,]-S[i,]-S[,i]-L[i,])^2, na.rm=T)+lambda2*sqrt(sum(S[,i]^2))+epsilon*sum(L[i,]^2)+epsilon*sum(S[,i]^2)}
obj <- sum(obj)+lambda1*R
objective <- c(objective, obj)
U <- obj/lambda1
if(trace.it && (iter%%10 == 0) ){
print(paste("iter ", iter))
}
if(save){
save(S, file=paste(file, "/S_", iter,".Rdata", sep=""))
save(L, file=paste(file, "/L_", iter,".Rdata", sep=""))
save(objective, file=paste(file, "/objective_", iter,".Rdata", sep=""))
}

}
stopCluster(cl)
return(list(A = A, L = L, S = S, objective = objective, R = R))
}

globalVariables(c("i"))
103 changes: 102 additions & 1 deletion inst/doc/PrimarySchool.R
Expand Up @@ -9,7 +9,108 @@ library(Matrix)
library(gsbm)
library(igraph)
library(RColorBrewer)
library(combinat)

data(PrimarySchool)
summary(PrimarySchool)
A <- PrimarySchool$A
class <- PrimarySchool$class
n <- dim(A)[1]
class.names <- levels(class)
class.effectifs <- table(class)

## ----compute frequency of interaction-----------------------------------------
# Compute frequency of interactions
K = length(class.names)
Q = matrix(rep(0, K*K), ncol = K, nrow = K)
for (k1 in 1:K){
com1 = class.names[k1]
for (k2 in 1:K){
com2 = class.names[k2]
Q[k1, k2] <- mean(A[class == com1, class == com2], na.rm = TRUE)
Q[k2, k1] <- Q[k1, k2]
}
}
Q <- round(Q, 2)
colnames(Q) <- class.names
rownames(Q) <- class.names
print(Q)

## ----Plot network-------------------------------------------------------------
# Vertex labels
v.label <- rep(NA, n)

# Graph without NA's
A.noNA <- matrix(0, ncol = n, nrow = n)
A.noNA[!is.na(A)] <- A[!is.na(A)]
g <- graph_from_adjacency_matrix(A.noNA, mode = "undirected")

# Layout
W<- matrix(0, n, n)
for (i in 1:n){
for (j in 1:n){
if ((!is.na(A[i,j])) && A[i,j]==1){
if (class[i]==class[j] && class[i]!= "teachers"){
W[i,j] <- 2
}
else{
W[i,j] <- 1
}
}
}
}
gw <- graph_from_adjacency_matrix(W, mode = "undirected")
lay <- layout_with_fr(gw)
pal <- brewer.pal(11, "Paired")

# Plot network
plot(g, vertex.label = v.label, vertex.size = (2 + colSums(A, na.rm =T)/5), vertex.color = class, layout = lay, palette = pal, edge.width = 0.3, edge.curved=0.1)

legend("bottomleft", legend = class.names, col = pal, pch = 1, bty = 'n', cex = 0.5, ncol = 1)

## ----run GSBM-----------------------------------------------------------------
# Investigate these outliers
T1<-Sys.time()
res <- gsbm_mcgd(A, 8.5, 8, maxit = 100)
T2<-Sys.time()
Tdiff= difftime(T1, T2)
print(paste0("running time : ", Tdiff))
outliers <- which(colSums(res$S)>0)
no <- length(outliers)
outliers.class <- class[outliers]
outliers.Q <- matrix(0,nrow = no, ncol = K)
for (o in 1:no){
for (k in 1:K){
comk = class.names[k]
outliers.Q[o,k] <- mean(A[outliers[o],class == comk], na.rm = T)
}
print(paste0("node ", outliers[o], " is in class ", outliers.class[o]))
print(outliers.Q[o,], 2)
print("")
}

## ----recover the classes------------------------------------------------------
# Compute svd and run k-means
L.U <- svd(res$L[-outliers, -outliers], nu = 10)$u
est_class <- kmeans(L.U, 10, nstart = 100)$cluster
length(est_class)

# Recover the class among the permutations of labels
equ_label = combinat::permn(1:10)
error <- rep(0, length(equ_label))
for (i in 1:length(equ_label)){
error[i] <- sum(class[-outliers] != (class.names[unlist(equ_label[i])])[est_class])
}
error[which.min(error)]
class_est <- class.names[unlist(equ_label[which.min(error)])][est_class]
class_est_out <- rep(NA, n)
class_est_out[outliers] <- "outlier"
class_est_out[-outliers] <- class_est
class_est_out <- as.factor(class_est_out)

# Plot network
plot(g, vertex.label = v.label, vertex.size = (2 + colSums(A, na.rm =T)/5), vertex.color = class_est_out, layout = lay, palette = pal, edge.width = 0.3, edge.curved=0.1)

class_est_out <- as.factor(class_est_out)



0 comments on commit 1e3096d

Please sign in to comment.