-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit baca314
Showing
10 changed files
with
468 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Package: clusEvol | ||
Type: Package | ||
Title: A Procedure for Cluster Evolution Analytics | ||
Version: 1.0.0 | ||
Date: 2024-02-19 | ||
Authors@R: c(person("Víctor", "Morales-Oñate",role=c("aut","cre"),email="victor.morales@uv.cl",comment = c(ORCID = "0000-0003-1922-6571")),person("Bolívar", "Morales-Oñate", role = "aut",email = "bmoralesonate@gmail.com",comment = c(ORCID = "0000-0003-4980-8759"))) | ||
Maintainer: Víctor Morales-Oñate <victor.morales@uv.cl> | ||
Description: Cluster Evolution Analytics allows us to use exploratory what if questions in the sense that the present information of an object is plugged-in a dataset in a previous time frame so that we can explore its evolution (and of its neighbors) to the present. See the URL for the papers associated with this package, as for instance, Morales-Oñate and Morales-Oñate (2024) <https://mpra.ub.uni-muenchen.de/120220>. | ||
Depends: R (>= 4.1.0) | ||
License: GPL (>= 3) | ||
Encoding: UTF-8 | ||
Imports: ggplot2,plotly,cluster,fpc,viridis,clusterSim,dplyr | ||
Repository: CRAN | ||
URL: https://github.com/vmoprojs/clusEvol | ||
BugReports: https://github.com/vmoprojs/clusEvol/issues | ||
LazyData: true | ||
NeedsCompilation: no | ||
Packaged: 2024-02-23 23:27:37 UTC; victormorales | ||
Author: Víctor Morales-Oñate [aut, cre] | ||
(<https://orcid.org/0000-0003-1922-6571>), | ||
Bolívar Morales-Oñate [aut] (<https://orcid.org/0000-0003-4980-8759>) | ||
Date/Publication: 2024-02-26 18:50:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
60665db9e6325e3ac65fd43b1871179e *DESCRIPTION | ||
0550f6ae158da25b920fe71c0ebd0a44 *NAMESPACE | ||
f5a6d11e305f15d8dff3dccdcb4e9162 *R/clusEvol.R | ||
9af57303507736d5e81d7777f16d16c6 *data/actpas.RData | ||
899ff227b3d63fec5e613739eda8d3cb *data/pwt1001.RData | ||
d21ee51e8ce6673757f54d9eded0e4b5 *man/actpas.Rd | ||
84305660e4106539a8ca77f30b84caa4 *man/clusEvol.Rd | ||
325ad04944ce4e4c5b4a9f8cdd74f8ce *man/plot.clusEvol.Rd | ||
c71d3326e0e4041c3ec6ec53bd4a79e4 *man/pwt1001.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
export(clusEvol) | ||
importFrom("stats", "formula","terms","dist") | ||
importFrom("utils", "stack") | ||
importFrom("fpc", "cluster.stats") | ||
importFrom("cluster", "pam") | ||
importFrom("viridis", "scale_fill_viridis") | ||
importFrom("ggplot2", "ggplot", "aes", "stat_ecdf", "geom_boxplot","geom_point","geom_line","aes_string","geom_tile") | ||
importFrom("plotly","ggplotly") | ||
S3method(print,clusEvol) | ||
S3method(plot,clusEvol) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,279 @@ | ||
#################################################### | ||
### File name: clusEvol.r | ||
#################################################### | ||
|
||
|
||
clusEvol <- function (x=NULL,objects=NULL, time = NULL,target.vars = NULL, | ||
time.base=NULL,sel.obj=NULL,init = NULL,logscale = FALSE, | ||
ng = NULL,clm = "pam",scale=TRUE,clstats = FALSE,...){ | ||
|
||
# x: dataframe | ||
# objects: variable name of objects | ||
# time: variable name of time | ||
# labels: objects labels (same lenght as objects) | ||
# target.vars: selected variables for CEA | ||
# time.base: selected year for CEA | ||
# sel.obj: selected object for CEA | ||
# init: initiation year | ||
# logscale: TRUE if data should be logscaled | ||
# ng: number of desired clusters | ||
# clm: (pam,kmeans,choose) | ||
# ... parameters of the clm chosen | ||
|
||
# revisar paquete que recomienda el clustering del TASK VIEW | ||
|
||
datos <- x[,c(objects,time,target.vars)] | ||
|
||
# ST: if init is missing, take minimum year | ||
if(is.null(init)) { | ||
init <- min(datos[,time]) | ||
}else{ | ||
datos <- datos[which(datos[time]>=init),] | ||
} | ||
# END: if init is missing, take minimum year | ||
|
||
# yrs.base <- input$yrs1 | ||
summary(datos) | ||
|
||
|
||
nas <- apply(datos,2,function(x) sum(is.na(x))) | ||
if(any(nas>0)){ | ||
warning("Selected variables have missing values, use a balaced panel data,NA's will be replaced with mean values") | ||
|
||
if(length(target.vars)>1) | ||
{ | ||
mm <- colMeans(datos[,target.vars],na.rm = TRUE) | ||
}else{ | ||
mm <- mean(datos[,target.vars],na.rm = TRUE) | ||
} | ||
|
||
for(tv in 1:length(target.vars)) | ||
{ | ||
anas <- is.na(datos[,target.vars[tv]]) | ||
datos[which(anas),target.vars[tv]] <- mm[tv] | ||
} | ||
|
||
} | ||
|
||
yrs <- unique(datos[,time]) | ||
|
||
sol.yrs <- NULL #cheks if sel.obj is in everytime period | ||
for(t in 1:length(yrs)) | ||
{ | ||
fil <- datos[which(datos[,time]==yrs[t]),] | ||
sol.yrs <- c(sol.yrs,sel.obj %in% fil[,objects]) | ||
} | ||
if(!all(sol.yrs)) | ||
{ | ||
# warning("Times with no selected object: \n") | ||
# print(yrs[which(sol.yrs)]) | ||
stop("Selected object must be included in all analyzed time periods") | ||
} | ||
|
||
# ST: dataframe for time.base: | ||
refdatos <- datos[datos[,time]==time.base,c(objects,time,target.vars)] | ||
if(logscale & length(target.vars)>1){ | ||
refdatos[,target.vars] <- apply(refdatos[,target.vars],2,log) | ||
} | ||
if(logscale & length(target.vars)==1){ | ||
refdatos[,target.vars] <- log(refdatos[,target.vars]) | ||
} | ||
# END: dataframe for time.base | ||
|
||
# ST: ECbase dataframe for sel.obj | ||
ECbase <- refdatos[refdatos[,objects]==sel.obj,c(objects,time,target.vars)] | ||
# END: ECbase dataframe for sel.obj | ||
|
||
results <- list() | ||
contador <- 1 | ||
|
||
Ksol = NULL | ||
nbelongTot <- NULL | ||
kmodelSol <- NULL | ||
clusterStats <- NULL | ||
for (j in 1:length(yrs)) | ||
{ | ||
|
||
refdatos <- datos[which(datos[,time]==yrs[j]),c(objects,time,target.vars)] | ||
|
||
if(logscale & length(target.vars)>1){ | ||
refdatos[,target.vars] <- apply(refdatos[,target.vars],2,log) | ||
} | ||
if(logscale & length(target.vars)==1){ | ||
refdatos[,target.vars] <- log(refdatos[,target.vars]) | ||
} | ||
|
||
|
||
refdatos[which(refdatos[,objects]==sel.obj),] <- ECbase #replace ECU values | ||
# Include data imputation here if needed | ||
|
||
temp1 <- as.matrix(refdatos[,target.vars],ncol = length(target.vars)) | ||
rownames(temp1) <- refdatos[,objects] | ||
|
||
|
||
# Clustering: | ||
if(clm =="pam") | ||
{ | ||
if(scale) | ||
{ | ||
clm.args <- list(x=scale(temp1),k=ng,...) | ||
}else{ | ||
clm.args <- list(x=temp1,k=ng,...) | ||
} | ||
kmodel <- do.call(cluster::pam,args = clm.args) | ||
kmodelSol[[j]] <- kmodel | ||
|
||
if(clstats) | ||
{ | ||
clusterStats[[j]] <- fpc::cluster.stats(kmodel$diss,kmodel$clustering) | ||
} | ||
|
||
clase <- kmodel$clustering | ||
} | ||
|
||
if(clm =="kmeans") | ||
{ | ||
if(scale) | ||
{ | ||
clm.args <- list(x=scale(temp1),centers=ng,...) | ||
}else{ | ||
clm.args <- list(x=temp1,centers=ng,...) | ||
} | ||
|
||
kmodel <- do.call(stats::kmeans,args = clm.args) | ||
kmodelSol[[j]] <- kmodel | ||
|
||
if(clstats) | ||
{ | ||
if(scale) | ||
{ | ||
clusterStats[[j]] <- fpc::cluster.stats(stats::dist(temp1),kmodel$cluster) | ||
}else{ | ||
clusterStats[[j]] <- fpc::cluster.stats((temp1),kmodel$cluster) | ||
} | ||
} | ||
|
||
|
||
clase <- kmodel$cluster | ||
} | ||
if(clm == "choose") | ||
{ | ||
clm.args <- list(x=temp1,...) | ||
kmodel <- do.call(clusterSim::cluster.Sim,args = clm.args) | ||
kmodelSol[[j]] <- kmodel | ||
clase <- kmodel$optClustering | ||
} | ||
|
||
|
||
auxK = cbind(clase,yrs[j]) | ||
Ksol = rbind(Ksol,auxK ) | ||
|
||
# ST: Find sel.obj's group and data | ||
gbase <- list() | ||
for(i in 1:ng) | ||
{ | ||
gbase[[i]] <- as.vector(refdatos[,1][clase==i]) | ||
} | ||
|
||
nbelong <- grep(sel.obj,gbase)# number of group which sel.obj belongs to | ||
nbelongTot <- c(nbelongTot,nbelong) | ||
g.fin <- refdatos[match(gbase[[nbelong]],refdatos[,objects]),]# Data in sel.obj group | ||
g.fin$time = yrs[j] | ||
results[[contador]] <- g.fin | ||
contador <- contador+1 | ||
} | ||
|
||
Ksol = data.frame( objects = rownames(Ksol), cluster = Ksol[,1],time = Ksol[,2]) | ||
# datos: input data | ||
# target.vars: selected variables | ||
# results: data frame of negighbours of sel.obj | ||
# ECk: cluster that sel.obj belongs to | ||
# sumdat: summary statistics of "datos" | ||
# kmodelSol: output of clustering algorithm in each iteration | ||
# kmodelSol: cluster statistics in each clustering | ||
# sl: cluster evolution in time | ||
|
||
sl <- dplyr::bind_rows(results, .id = "time") | ||
|
||
sumdat <- summary(datos[,target.vars]) | ||
|
||
clusEvol <- list(datos = datos,target.vars=target.vars, | ||
results = results,ECk = nbelong,ECkTot = nbelongTot, | ||
Clus = Ksol,sumdat=sumdat,kmodelSol=kmodelSol, | ||
clusterStats=clusterStats,sl = sl,sel=c(objects,time),sel.obj=sel.obj) | ||
structure(c(clusEvol, call = call), class = c("clusEvol")) | ||
} | ||
|
||
print.clusEvol <- function(x, digits = max(3, getOption("digits") - 3), ...) | ||
{ | ||
if(!inherits(x,"clusEvol")) stop("Enter an object obtained from the function clusEvol\n") | ||
|
||
tn <- unique(x$Clus$time) | ||
sel.obj <- x$sel.obj | ||
# number of elements in sel.obj cluster: | ||
nc <- sapply(x$results,nrow)-1 | ||
names(nc) <- tn | ||
# Cluster that sel.obj belongs to: | ||
# xs <- data.frame(time = unique(x$Clus$time),clus = x$ECkTot) | ||
xs <- x$ECkTot | ||
names(xs) <- tn | ||
# Clusters in time: | ||
tt <- table(x$Clus$time,x$Clus$cluster) | ||
|
||
cat('\n##################################################################') | ||
cat('\nclusEvol: Cluster Evolution Analytics\n') | ||
cat('\n\nNumber of neighbours ', sel.obj,'is a group member: ', '\n') | ||
print(nc) | ||
cat('\n\nCluster that ', sel.obj, 'belongs to: ','\n') | ||
print(xs) | ||
options(digits = digits) | ||
cat('\nClusters in time:\n') | ||
print.default(tt, digits = digits, print.gap = 2, | ||
quote = FALSE) | ||
} | ||
|
||
|
||
plot.clusEvol <- function(x,target,type = "heat",plotly=FALSE,...) | ||
{ | ||
if(!inherits(x,"clusEvol")) stop("Enter an object obtained from the function clusEvol\n") | ||
oldop <- options() | ||
on.exit(options(oldop)) | ||
sl <- x$sl | ||
time <- x$sel[2] | ||
objs <- x$sel[1] | ||
if(type=="heat" | type == "all") | ||
{ | ||
p1 <-ggplot(sl,aes_string(time,objs,fill =target ))+ | ||
geom_tile(color= "white") + | ||
scale_fill_viridis(option ="C") | ||
|
||
if(plotly) | ||
{ | ||
suppressWarnings(print(plotly::ggplotly(p1))) | ||
}else{print(p1)} | ||
} | ||
|
||
if(type=="line" | type == "all") | ||
{ | ||
p2 <- ggplot(sl, aes_string(x=time, y=target, group=objs)) + | ||
geom_line(aes_string(color=objs))+ | ||
geom_point(aes_string(color=objs)) | ||
if(plotly) | ||
{ | ||
print(plotly::ggplotly(p2)) | ||
}else{print(p2)} | ||
} | ||
if(type=="boxplot" | type == "all") | ||
{ | ||
p3 <- ggplot(sl, aes_string(x=time, y=target, color=objs)) + | ||
geom_boxplot() | ||
if(plotly) | ||
{ | ||
print(plotly::ggplotly(p3)) | ||
}else{print(p3)} | ||
} | ||
|
||
} | ||
|
||
|
||
|
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
\name{actpas} | ||
\docType{data} | ||
\alias{actpas} | ||
\title{Assets and liabilities operations} | ||
\description{ | ||
Ecuador's amount of Assets and Liabilities Operations of the National Financial System: https://contenido.bce.fin.ec/home1/economia/tasas/IndiceSFN.htm | ||
} | ||
\usage{actpas} | ||
\format{A dataframe containing 358 observations and 25 columns.} | ||
\references{ | ||
Morales-Oñate, V., and Morales-Oñate, B. (2024). \emph{Cluster Evolution Analytics}. \url{https://mpra.ub.uni-muenchen.de/120220/} | ||
} | ||
\keyword{datasets} |
Oops, something went wrong.