Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
vmoprojs authored and cran-robot committed Feb 27, 2024
0 parents commit baca314
Show file tree
Hide file tree
Showing 10 changed files with 468 additions and 0 deletions.
22 changes: 22 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Package: clusEvol
Type: Package
Title: A Procedure for Cluster Evolution Analytics
Version: 1.0.0
Date: 2024-02-19
Authors@R: c(person("Víctor", "Morales-Oñate",role=c("aut","cre"),email="victor.morales@uv.cl",comment = c(ORCID = "0000-0003-1922-6571")),person("Bolívar", "Morales-Oñate", role = "aut",email = "bmoralesonate@gmail.com",comment = c(ORCID = "0000-0003-4980-8759")))
Maintainer: Víctor Morales-Oñate <victor.morales@uv.cl>
Description: Cluster Evolution Analytics allows us to use exploratory what if questions in the sense that the present information of an object is plugged-in a dataset in a previous time frame so that we can explore its evolution (and of its neighbors) to the present. See the URL for the papers associated with this package, as for instance, Morales-Oñate and Morales-Oñate (2024) <https://mpra.ub.uni-muenchen.de/120220>.
Depends: R (>= 4.1.0)
License: GPL (>= 3)
Encoding: UTF-8
Imports: ggplot2,plotly,cluster,fpc,viridis,clusterSim,dplyr
Repository: CRAN
URL: https://github.com/vmoprojs/clusEvol
BugReports: https://github.com/vmoprojs/clusEvol/issues
LazyData: true
NeedsCompilation: no
Packaged: 2024-02-23 23:27:37 UTC; victormorales
Author: Víctor Morales-Oñate [aut, cre]
(<https://orcid.org/0000-0003-1922-6571>),
Bolívar Morales-Oñate [aut] (<https://orcid.org/0000-0003-4980-8759>)
Date/Publication: 2024-02-26 18:50:02 UTC
9 changes: 9 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
60665db9e6325e3ac65fd43b1871179e *DESCRIPTION
0550f6ae158da25b920fe71c0ebd0a44 *NAMESPACE
f5a6d11e305f15d8dff3dccdcb4e9162 *R/clusEvol.R
9af57303507736d5e81d7777f16d16c6 *data/actpas.RData
899ff227b3d63fec5e613739eda8d3cb *data/pwt1001.RData
d21ee51e8ce6673757f54d9eded0e4b5 *man/actpas.Rd
84305660e4106539a8ca77f30b84caa4 *man/clusEvol.Rd
325ad04944ce4e4c5b4a9f8cdd74f8ce *man/plot.clusEvol.Rd
c71d3326e0e4041c3ec6ec53bd4a79e4 *man/pwt1001.Rd
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export(clusEvol)
importFrom("stats", "formula","terms","dist")
importFrom("utils", "stack")
importFrom("fpc", "cluster.stats")
importFrom("cluster", "pam")
importFrom("viridis", "scale_fill_viridis")
importFrom("ggplot2", "ggplot", "aes", "stat_ecdf", "geom_boxplot","geom_point","geom_line","aes_string","geom_tile")
importFrom("plotly","ggplotly")
S3method(print,clusEvol)
S3method(plot,clusEvol)
279 changes: 279 additions & 0 deletions R/clusEvol.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
####################################################
### File name: clusEvol.r
####################################################


clusEvol <- function (x=NULL,objects=NULL, time = NULL,target.vars = NULL,
time.base=NULL,sel.obj=NULL,init = NULL,logscale = FALSE,
ng = NULL,clm = "pam",scale=TRUE,clstats = FALSE,...){

# x: dataframe
# objects: variable name of objects
# time: variable name of time
# labels: objects labels (same lenght as objects)
# target.vars: selected variables for CEA
# time.base: selected year for CEA
# sel.obj: selected object for CEA
# init: initiation year
# logscale: TRUE if data should be logscaled
# ng: number of desired clusters
# clm: (pam,kmeans,choose)
# ... parameters of the clm chosen

# revisar paquete que recomienda el clustering del TASK VIEW

datos <- x[,c(objects,time,target.vars)]

# ST: if init is missing, take minimum year
if(is.null(init)) {
init <- min(datos[,time])
}else{
datos <- datos[which(datos[time]>=init),]
}
# END: if init is missing, take minimum year

# yrs.base <- input$yrs1
summary(datos)


nas <- apply(datos,2,function(x) sum(is.na(x)))
if(any(nas>0)){
warning("Selected variables have missing values, use a balaced panel data,NA's will be replaced with mean values")

if(length(target.vars)>1)
{
mm <- colMeans(datos[,target.vars],na.rm = TRUE)
}else{
mm <- mean(datos[,target.vars],na.rm = TRUE)
}

for(tv in 1:length(target.vars))
{
anas <- is.na(datos[,target.vars[tv]])
datos[which(anas),target.vars[tv]] <- mm[tv]
}

}

yrs <- unique(datos[,time])

sol.yrs <- NULL #cheks if sel.obj is in everytime period
for(t in 1:length(yrs))
{
fil <- datos[which(datos[,time]==yrs[t]),]
sol.yrs <- c(sol.yrs,sel.obj %in% fil[,objects])
}
if(!all(sol.yrs))
{
# warning("Times with no selected object: \n")
# print(yrs[which(sol.yrs)])
stop("Selected object must be included in all analyzed time periods")
}

# ST: dataframe for time.base:
refdatos <- datos[datos[,time]==time.base,c(objects,time,target.vars)]
if(logscale & length(target.vars)>1){
refdatos[,target.vars] <- apply(refdatos[,target.vars],2,log)
}
if(logscale & length(target.vars)==1){
refdatos[,target.vars] <- log(refdatos[,target.vars])
}
# END: dataframe for time.base

# ST: ECbase dataframe for sel.obj
ECbase <- refdatos[refdatos[,objects]==sel.obj,c(objects,time,target.vars)]
# END: ECbase dataframe for sel.obj

results <- list()
contador <- 1

Ksol = NULL
nbelongTot <- NULL
kmodelSol <- NULL
clusterStats <- NULL
for (j in 1:length(yrs))
{

refdatos <- datos[which(datos[,time]==yrs[j]),c(objects,time,target.vars)]

if(logscale & length(target.vars)>1){
refdatos[,target.vars] <- apply(refdatos[,target.vars],2,log)
}
if(logscale & length(target.vars)==1){
refdatos[,target.vars] <- log(refdatos[,target.vars])
}


refdatos[which(refdatos[,objects]==sel.obj),] <- ECbase #replace ECU values
# Include data imputation here if needed

temp1 <- as.matrix(refdatos[,target.vars],ncol = length(target.vars))
rownames(temp1) <- refdatos[,objects]


# Clustering:
if(clm =="pam")
{
if(scale)
{
clm.args <- list(x=scale(temp1),k=ng,...)
}else{
clm.args <- list(x=temp1,k=ng,...)
}
kmodel <- do.call(cluster::pam,args = clm.args)
kmodelSol[[j]] <- kmodel

if(clstats)
{
clusterStats[[j]] <- fpc::cluster.stats(kmodel$diss,kmodel$clustering)
}

clase <- kmodel$clustering
}

if(clm =="kmeans")
{
if(scale)
{
clm.args <- list(x=scale(temp1),centers=ng,...)
}else{
clm.args <- list(x=temp1,centers=ng,...)
}

kmodel <- do.call(stats::kmeans,args = clm.args)
kmodelSol[[j]] <- kmodel

if(clstats)
{
if(scale)
{
clusterStats[[j]] <- fpc::cluster.stats(stats::dist(temp1),kmodel$cluster)
}else{
clusterStats[[j]] <- fpc::cluster.stats((temp1),kmodel$cluster)
}
}


clase <- kmodel$cluster
}
if(clm == "choose")
{
clm.args <- list(x=temp1,...)
kmodel <- do.call(clusterSim::cluster.Sim,args = clm.args)
kmodelSol[[j]] <- kmodel
clase <- kmodel$optClustering
}


auxK = cbind(clase,yrs[j])
Ksol = rbind(Ksol,auxK )

# ST: Find sel.obj's group and data
gbase <- list()
for(i in 1:ng)
{
gbase[[i]] <- as.vector(refdatos[,1][clase==i])
}

nbelong <- grep(sel.obj,gbase)# number of group which sel.obj belongs to
nbelongTot <- c(nbelongTot,nbelong)
g.fin <- refdatos[match(gbase[[nbelong]],refdatos[,objects]),]# Data in sel.obj group
g.fin$time = yrs[j]
results[[contador]] <- g.fin
contador <- contador+1
}

Ksol = data.frame( objects = rownames(Ksol), cluster = Ksol[,1],time = Ksol[,2])
# datos: input data
# target.vars: selected variables
# results: data frame of negighbours of sel.obj
# ECk: cluster that sel.obj belongs to
# sumdat: summary statistics of "datos"
# kmodelSol: output of clustering algorithm in each iteration
# kmodelSol: cluster statistics in each clustering
# sl: cluster evolution in time

sl <- dplyr::bind_rows(results, .id = "time")

sumdat <- summary(datos[,target.vars])

clusEvol <- list(datos = datos,target.vars=target.vars,
results = results,ECk = nbelong,ECkTot = nbelongTot,
Clus = Ksol,sumdat=sumdat,kmodelSol=kmodelSol,
clusterStats=clusterStats,sl = sl,sel=c(objects,time),sel.obj=sel.obj)
structure(c(clusEvol, call = call), class = c("clusEvol"))
}

print.clusEvol <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
if(!inherits(x,"clusEvol")) stop("Enter an object obtained from the function clusEvol\n")

tn <- unique(x$Clus$time)
sel.obj <- x$sel.obj
# number of elements in sel.obj cluster:
nc <- sapply(x$results,nrow)-1
names(nc) <- tn
# Cluster that sel.obj belongs to:
# xs <- data.frame(time = unique(x$Clus$time),clus = x$ECkTot)
xs <- x$ECkTot
names(xs) <- tn
# Clusters in time:
tt <- table(x$Clus$time,x$Clus$cluster)

cat('\n##################################################################')
cat('\nclusEvol: Cluster Evolution Analytics\n')
cat('\n\nNumber of neighbours ', sel.obj,'is a group member: ', '\n')
print(nc)
cat('\n\nCluster that ', sel.obj, 'belongs to: ','\n')
print(xs)
options(digits = digits)
cat('\nClusters in time:\n')
print.default(tt, digits = digits, print.gap = 2,
quote = FALSE)
}


plot.clusEvol <- function(x,target,type = "heat",plotly=FALSE,...)
{
if(!inherits(x,"clusEvol")) stop("Enter an object obtained from the function clusEvol\n")
oldop <- options()
on.exit(options(oldop))
sl <- x$sl
time <- x$sel[2]
objs <- x$sel[1]
if(type=="heat" | type == "all")
{
p1 <-ggplot(sl,aes_string(time,objs,fill =target ))+
geom_tile(color= "white") +
scale_fill_viridis(option ="C")

if(plotly)
{
suppressWarnings(print(plotly::ggplotly(p1)))
}else{print(p1)}
}

if(type=="line" | type == "all")
{
p2 <- ggplot(sl, aes_string(x=time, y=target, group=objs)) +
geom_line(aes_string(color=objs))+
geom_point(aes_string(color=objs))
if(plotly)
{
print(plotly::ggplotly(p2))
}else{print(p2)}
}
if(type=="boxplot" | type == "all")
{
p3 <- ggplot(sl, aes_string(x=time, y=target, color=objs)) +
geom_boxplot()
if(plotly)
{
print(plotly::ggplotly(p3))
}else{print(p3)}
}

}



Binary file added data/actpas.RData
Binary file not shown.
Binary file added data/pwt1001.RData
Binary file not shown.
13 changes: 13 additions & 0 deletions man/actpas.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
\name{actpas}
\docType{data}
\alias{actpas}
\title{Assets and liabilities operations}
\description{
Ecuador's amount of Assets and Liabilities Operations of the National Financial System: https://contenido.bce.fin.ec/home1/economia/tasas/IndiceSFN.htm
}
\usage{actpas}
\format{A dataframe containing 358 observations and 25 columns.}
\references{
Morales-Oñate, V., and Morales-Oñate, B. (2024). \emph{Cluster Evolution Analytics}. \url{https://mpra.ub.uni-muenchen.de/120220/}
}
\keyword{datasets}

0 comments on commit baca314

Please sign in to comment.