Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Wolfgang Tschacher authored and cran-robot committed Nov 7, 2023
0 parents commit ebf0b25
Show file tree
Hide file tree
Showing 9 changed files with 526 additions and 0 deletions.
21 changes: 21 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Package: mvSUSY
Version: 0.1.0
Title: Multivariate Surrogate Synchrony
Authors@R: c(
person("Wolfgang","Tschacher", role=c("aut","cre"), email="wolfgang.tschacher@unibe.ch"),
person("Deborah","Meier", role="aut", email="meier_deborah@gmx.net"),
person("Jan","Gorecki", role="ctb"))
Description: Multivariate Surrogate Synchrony ('mvSUSY') estimates the synchrony within datasets that contain more than two time series. 'mvSUSY' was developed from Surrogate Synchrony ('SUSY') with respect to implementing surrogate controls, and extends synchrony estimation to multivariate data. 'mvSUSY' works as described in Meier & Tschacher (2021).
Imports: data.table, RcppAlgos, ggplot2, ggsci
Suggests: plotly
License: GPL-2
URL: https://wtschacher.github.io/mvSUSY/
BugReports: https://github.com/wtschacher/mvSUSY/issues
NeedsCompilation: no
Packaged: 2023-11-07 15:57:57 UTC; jan
Author: Wolfgang Tschacher [aut, cre],
Deborah Meier [aut],
Jan Gorecki [ctb]
Maintainer: Wolfgang Tschacher <wolfgang.tschacher@unibe.ch>
Repository: CRAN
Date/Publication: 2023-11-07 19:30:02 UTC
8 changes: 8 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
3cb939b116ba7b54077663afdcdc5b32 *DESCRIPTION
ca1ec829cfbae2c3bf105242254724a4 *NAMESPACE
4d5b87af99dbec7461a884e61c1b2d7a *R/mvsusy.R
728bc0c11abac1090b03ea8421e2733a *README.md
de30684f0d91f101c013d639f7f27b26 *man/as.data.frame.Rd
0aaeea1a418290a738910c01cbaac848 *man/mvsusy.Rd
09bf27d1bd7310f24a483adb4c1620d6 *man/plot.Rd
2af4e0c6153b6aef6941e73606dbdc29 *man/print.Rd
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
importFrom(stats, cov, cor, na.omit, t.test, wilcox.test, sd)
importFrom(RcppAlgos, permuteSample)
importFrom(data.table, as.data.table, dcast, ":=", melt)
import(ggplot2)
importFrom(ggsci, pal_npg)
importFrom(grDevices, colorRampPalette)
export(mvsusy)
S3method(plot, mvsusy)
S3method(print, mvsusy)
S3method(as.data.frame, mvsusy)
279 changes: 279 additions & 0 deletions R/mvsusy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
isTRUEorFALSE = function(x) {
isTRUE(x) || isFALSE(x)
}
omega = function(x, ...) {
Hact = det(abs(cov(x)))
Hpot = prod(diag(cov(x)))
1-(Hact/Hpot)
}
lambda_max = function(x, ...) {
corx = cor(x)
corx[is.na(corx)] = 0
eigenwerte = eigen(corx)
eigenv = eigenwerte$values
max(eigenv) / (sum(eigenv)*.01)
}
eigenvalue = function(x) {
eigenwerte = cor(x)
eigenwerte[is.na(eigenwerte)] = 0
eigen(eigenwerte)$values
}
as.mvsusy = function(x) {
if (inherits(x, "mvsusy"))
return(x)
if (!is.list(x))
stop("only list class objects can be turned into mvsusy objects")
class(x) = unique(c("mvsusy", class(x)))
x
}

mvsusy = function(x, segment, Hz,
method = c("lambda_max","omega"),
max_pseudo = 1000,
seed = 1) {
## 2.1 Warnings - check parameter settings
if (!is.data.frame(x))
stop("'x' must be a data.frame")
if (is.null(names(x)))
stop("'x' must have named columns")
if (!all(vapply(x, is.numeric, FALSE, USE.NAMES=FALSE)))
stop("'x' must have numeric columns")
if (!is.numeric(segment) || length(segment)!=1L || is.na(segment))
stop("'segment' must be scalar non-NA numeric")
if (!is.numeric(Hz) || length(Hz)!=1L || is.na(Hz))
stop("'Hz' must be scalar non-NA numeric")
cols = names(x)
N = length(cols)
x = na.omit(x)
data = x ## return for plot type time series
nx = ncol(x)
if (nx < 2L)
stop("'x' must have at least 2 columns")
segment = as.integer(segment)
Hz = as.integer(Hz)
method = match.arg(method)
segmentHz = segment*Hz
nsegment = floor(nrow(x)/segmentHz)
if (segmentHz > ((nsegment*segmentHz) / ncol(x)))
stop("Segment size is invalid: maximum segment size is ", (nsegment*segmentHz) / (ncol(x)*Hz))
# 3.0 Real synchrony
## 3.1 Reshape data for segmenting
if (nrow(x)%%segmentHz) { ## cut
x = x[seq_len(nsegment*segmentHz),, drop=FALSE]
}
x$segment = rep(seq_len(nsegment), each=segmentHz)
x$row = rep(seq_len(segmentHz), nsegment)
xx = as.data.frame(dcast(as.data.table(x), row ~ segment, value.var=cols))
xx$row = NULL
## 3.2 Generate list of real segment-combinations
comb_real = paste0(cols, "_", rep(seq_len(nsegment), each=nx))
comb_real = split(comb_real, ceiling(seq_along(comb_real)/nx))
names(comb_real) = paste0('segment_combo_', seq_along(comb_real))
## 3.3 Expand data based on list of real segment-combinations
df_real = lapply(comb_real, function(cols, x) x[cols], x=xx)
## 3.4 Define mv-synchrony function (based on method)
method_fun = if (method=="lambda_max") lambda_max else if (method=="omega") omega else stop("internal error: unsupported method should ba cought by now")
## 3.5 Apply synchrony function per real segment-combination
synchrony_real = sapply(df_real, method_fun)
## 3.6 Define eigenvalue function (based on method)
### done in global package namespace
# 4.0 Pseudo synchrony
## 4.1 Generate list of pseudo segment-combinations
err = character()
tryCatch(comb_pseudo <- permuteSample(
v = seq_len(nsegment), m = nx, n = max_pseudo, seed = seed,
FUN = function(i) paste(cols, i, sep="_")
), error = function(e) err <<- e$message)
if (length(err)) {
if (err=="n exceeds the maximum number of possible results")
stop("'max_pseudo' argument value exceeds the maximum number of permutations, decrease value of the argument")
else
stop("RcppAlgos::permuteSample returned error:\n", err)
}
## 4.2 Expand data based on list of pseudo segment-combinations
df_pseudo = lapply(comb_pseudo, function(cols, x) x[cols], x=xx)
names(df_pseudo) = paste0("segment_combo_pseudo_", seq_along(df_pseudo))
## 4.3 Apply synchrony function per pseudo segment-combination
synchrony_pseudo = sapply(df_pseudo, method_fun)
# 5.0 Summarize synchrony data
## 5.1 Reshape synchrony data
matrix_pseudo = data.frame(variable = "synchrony_pseudo", value=synchrony_pseudo)
matrix_real = data.frame(variable = "synchrony_real", value=synchrony_real)
synchrony = rbind(matrix_pseudo, matrix_real)
rownames(synchrony) = NULL
synchrony$variable = as.factor(synchrony$variable)
### 5.1.1 Parametric and nonparametric tests
t_tests = t.test(value ~ variable, data = synchrony)
wilcox_tests = wilcox.test(value ~ variable, data = synchrony)
### 5.1.2 Real and pseudo synchrony indices
real_mean = mean(matrix_real$value)
real_sd = sd(matrix_real$value)
pseudo_mean = mean(matrix_pseudo$value)
pseudo_sd = sd(matrix_pseudo$value)
### 5.1.3 Effect size
ES_synchrony = (real_mean - pseudo_mean) / pseudo_sd
## 5.2 Generate eigenvalue data
if (method=="lambda_max") {
eigenvalue_real = sapply(df_real, eigenvalue)
eigenvalue_pseudo = sapply(df_pseudo, eigenvalue)
eigenvalue_real = data.frame(data="real", segment=rep(colnames(eigenvalue_real), each=nx), value=c(eigenvalue_real))
eigenvalue_pseudo = data.frame(data="pseudo", segment=rep(colnames(eigenvalue_pseudo), each=nx), value=c(eigenvalue_pseudo))
ev = rbind(eigenvalue_pseudo, eigenvalue_real)
value = .N = NULL ## resolve NSE check notes only
ev = as.data.frame(as.data.table(ev)[order(-value), "segment_num" := seq_len(.N), by=c("data","segment")])
} else {
ev = as.data.frame(NULL)
}
ans = list(
method=method, n_col=nx, n_row=nrow(x), seed=seed, n_pseudo=length(comb_pseudo),
segment_size_s = segment, data_per_segment = segmentHz*nx,
real_mean=real_mean, real_sd=real_sd, pseudo_mean=pseudo_mean, pseudo_sd=pseudo_sd, ES_synchrony=ES_synchrony, EV=ev,
t_tests = t_tests, wilcox_tests = wilcox_tests,
nsegment = nsegment, max_pseudo = max_pseudo,
synchrony = synchrony, data = data, segmentHz = segmentHz
)
as.mvsusy(ans)
}

as.data.frame.mvsusy = function(x, row.names=NULL, optional=FALSE, ...) {
if (!inherits(x, "mvsusy"))
stop("'x' must be an object of class 'mvsusy'")
ans = data.frame(
x$method, x$n_col, x$n_row, x$seed, x$n_pseudo, x$segment_size_s, x$data_per_segment,
x$real_mean, x$real_sd, x$pseudo_mean, x$pseudo_sd, x$ES_synchrony,
x$t_tests$statistic, x$t_tests$p.value,
x$wilcox_tests$statistic, x$wilcox_tests$p.value
)
colnames(ans) = c(
"method", "ncol", "nrow", "seed", "npseudo", "segment_size_s", "data_per_segment",
"real_mean", "real_sd", "pseudo_mean", "pseudo_sd", "ES",
"t_statistic", "p_value", "statistic_nonpar", "p_value_nonpar"
)
as.data.frame(ans)
}

print.mvsusy = function(x, ...) {
if (!inherits(x, "mvsusy"))
stop("'x' must be an object of class 'mvsusy'")
df = as.data.frame(x)
df$real_mean = round(df$real_mean,5)
df$real_sd = round(df$real_sd,5)
df$pseudo_mean = round(df$pseudo_mean,5)
df$pseudo_sd = round(df$pseudo_sd,5)
df$ES = round(df$ES,5)
df$t_statistic = round(df$t_statistic,5)*-1 ## we want t-statistic to be positive unlike when surrogates set is bigger than real dataset
df$p_value = format(df$p_value, scientific=FALSE)
df$statistic_nonpar = format(df$statistic_nonpar, scientific=FALSE)
df$p_value_nonpar = format(df$p_value_nonpar, scientific=FALSE)
colnames(df) = c(
"method", "n(col)", "n(row)", "seed", "n(pseudo)", "segment size (s)", "data per segment",
"mean(synchrony real)", "sd(synchrony real)", "mean(synchrony pseudo)", "sd(synchrony pseudo)", "ES",
"t statistic", "p-value", "statistic nonpar", "p-value nonpar"
)
print(df, ...=...)
invisible(x)
}

## plot various types of plots using mvsusy object
plot.mvsusy = function(x, type=c("eigenvalue","density","free scale","segment-wise","time series"), ..., bins, plotly) {
if (!inherits(x, "mvsusy"))
stop("'x' must be an object of class 'mvsusy'")
type = match.arg(type)
if (!missing(plotly)) {
if (!(identical(plotly, TRUE) || identical(plotly, FALSE)))
stop("Argument 'plotly' must be TRUE or FALSE")
if (isTRUE(plotly) && type!="time series")
stop("Using 'plotly' is only implemented for time series type of mvSUSY plot")
}
if (!missing(bins) && type!="free scale") {
stop("Argument 'bins' is only used for free scale type of mvSUSY plot")
}
# check NSE notes
segment_num = segment = variable = . = mean.var = segment_id= NULL
if (type=="eigenvalue") {
if (x$method!="lambda_max")
stop("plot mvSUSY of type eigenvalue is only for mvSUSY computed using 'lambda_max' method")
p = ggplot(x$EV, aes(x = as.factor(segment_num), group = segment)) +
geom_line(aes(y = value, colour = data)) +
scale_color_manual(label = c('pseudo'= "surrogates", 'real'="real"),
values = c('real' = 'chartreuse4', 'pseudo' = 'brown3'))+
scale_y_continuous(breaks = seq(0, max(x$EV$value), by = 1)) +
facet_wrap(~data, labeller = as_labeller(c(`pseudo` = sprintf("%s surrogates", x$max_pseudo), `real` = sprintf("%s segments", x$nsegment)))) +
labs(y = "eigenvalue", x = "dimension",
title = "Eigenvalues for real and surrogate data", subtitle = paste( "segment-size =", x$segment_size_s)) +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), text = element_text(vjust = 0, size = 15, family="serif"), strip.text = element_text(size=15))
} else if (type=="density") {
p = ggplot(x$synchrony, aes(x = value, colour = variable))+
geom_density(aes(x = value, fill = variable), alpha = .1, linewidth = 1, show.legend = FALSE)+
geom_rug(aes(x = value, y = 0))+
scale_colour_manual('data',
label = c('synchrony_pseudo'= "surrogates", 'synchrony_real'="real"),
values = c('synchrony_real' = 'chartreuse4', 'synchrony_pseudo' = 'brown3'))+
scale_fill_manual('data',
values = c('synchrony_real' = 'chartreuse4', 'synchrony_pseudo' = 'brown3'))+
labs(y = "density", x = "synchrony") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), text = element_text(vjust = 0, size = 12, family="serif"))+
labs(title = "Density: real vs. surrogate synchrony")
} else if (type=="free scale") {
value = NULL ## fix check NSE notes
vlines = as.data.frame(as.data.table(x$synchrony)[, .(mean.var=mean(value)), by="variable"])
rm(value)
p = ggplot(x$synchrony, aes(x = value, fill = variable))+
geom_histogram(color="#e9ecef",alpha=0.8, position = 'identity', bins = if (missing(bins)) x$nsegment*.5 else bins)+
geom_vline(data = vlines, aes(xintercept = mean.var), linetype = "dashed", linewidth = 0.8, alpha = 0.9)+
facet_wrap(~variable, scales ="free", labeller = as_labeller(c('synchrony_pseudo'= sprintf("%s surrogates", x$max_pseudo), 'synchrony_real'=sprintf("%s segments", x$nsegment))))+
theme_light()+
scale_fill_manual(values = c("brown3","chartreuse4"))+
theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
theme(text = element_text(vjust = 0, size = 15, family="serif"), strip.text = element_text(size=15))+
labs(x="synchrony", title = "Histogram of multivariate synchrony", subtitle = paste0("segment-size = ", x$segment_size_s, ", method = ", x$method))
} else if (type=="segment-wise") {
real = x$synchrony[x$synchrony$variable == "synchrony_real", , drop=FALSE]
real$segment_id = seq_len(nrow(real))
p = ggplot(real, aes(x=segment_id, y=value))+
geom_bar(stat = "identity", fill = "chartreuse4", alpha=.8, width=.9)+
geom_hline(aes(yintercept = x$pseudo_mean,
linetype = "mean surrogate synchrony"), color = "brown3", linewidth=1.5, alpha=.5)+
geom_hline(aes(yintercept = x$pseudo_mean-x$pseudo_sd,
linetype = "SD surrogate synchrony"), color = "brown3", linewidth=1, alpha=.5)+
geom_hline(aes(yintercept = x$pseudo_mean+x$pseudo_sd,
linetype = "SD surrogate synchrony"), color = "brown3", linewidth=1, alpha=.5, show.legend=FALSE)+
labs(title="Synchrony per segment", x = "segment", y = "synchrony")+
theme_minimal()+
theme(text = element_text(vjust = 0, size = 12, family="serif"), strip.text = element_text(size=12))+
theme(legend.position = "bottom", legend.title = element_blank())+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
} else if (type=="time series") {
data = x$data
data$counter = seq_len(nrow(data))
counter = column = NULL ## check NSE notes
data_ts = as.data.frame(melt(as.data.table(data), id.vars="counter", measure.vars=colnames(x$data), variable.name = "column")[order(counter, column)])
rm(counter, column)
sci_palette = pal_npg("nrc", alpha = 0.7)(9)
sci_palette = colorRampPalette(sci_palette)(ncol(x$data))
update_geom_defaults("line", list(color = sci_palette))
p = ggplot(data_ts, aes(x=counter, y=value, group=column))+
geom_line(linewidth=0.8, alpha=.5)+
geom_vline(xintercept = seq(0, nrow(data), by = x$segmentHz), alpha=0.25, linetype="longdash")+
labs(x="time", y="value", colour=NULL)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
theme(text = element_text(vjust = 0, size = 15, family="serif"))+
theme(legend.position = "none")
if (missing(plotly)) plotly = TRUE
if (plotly && requireNamespace("plotly", quietly=TRUE)) {
p = plotly::layout(
plotly::ggplotly(p),
title=paste("time series with segment size =", x$segment_size_s)
)
} else {
if (plotly) { ## verbose message disabled when plotly=FALSE
cat("For interactive mvSUSY time series plot install 'plotly' package\n")
}
p = p + ggtitle(paste("time series with segment size =", x$segment_size_s))
}
}
p
}
59 changes: 59 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
mvSUSY
----

Multivariate Surrogate Synchrony (mvSUSY) estimates the synchrony within datasets that contain more than two time series. mvSUSY was developed from Surrogate Synchrony (SUSY) with respect to implementing surrogate controls, and extends synchrony estimation to multivariate data. 'mvSUSY' works as described in Meier & Tschacher (2021).

[R package website](https://wtschacher.github.io/mvSUSY/)

----

Installation
----

```r
#install.packages("mvSUSY") ## not yet on CRAN

## development version
install.packages("mvSUSY", repos=c("https://wtschacher.github.io/mvSUSY/","https://cloud.r-project.org"))
```

Usage
----

Note that the following example assumes that the source data are in a flat file and it has particular structure (column names in first row, whitespace as field separator). If you do not have such, then use the command in the comment below to mockup random data.

```r
library(mvSUSY)

## read in data from a flat file
data = read.csv(file.choose(), header=TRUE, sep=" ", na.strings=".")

## mockup random data if needed
#data = as.data.frame(replicate(5, sample(10, 5000, TRUE)))

## compute mvSUSY using 'lambda_max' method
res = mvsusy(data, segment=10, Hz=10)
res

## plot
plot(res, type="eigenvalue")
plot(res, type="density")
plot(res, type="free scale")
plot(res, type="segment-wise")
plot(res, type="time series")

## compute mvSUSY using 'omega' method
res = mvsusy(data, segment=10, Hz=10, method="omega")
res

plot(res, type="density")
plot(res, type="free scale")
plot(res, type="segment-wise")
plot(res, type="time series")

## export to flat file via data.frame and write.csv
df = as.data.frame(res)
df
```

[`mvsusy` function manual](https://wtschacher.github.io/mvSUSY/library/mvSUSY/html/mvsusy.html)

0 comments on commit ebf0b25

Please sign in to comment.