Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] updated dba to support custom parameters #454

Merged
merged 15 commits into from
May 27, 2023
48 changes: 0 additions & 48 deletions aeon/clustering/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from aeon.clustering.metrics.averaging import _resolve_average_callable
from aeon.clustering.partitioning import TimeSeriesLloyds
from aeon.distances import pairwise_distance


class TimeSeriesKMeans(TimeSeriesLloyds):
Expand Down Expand Up @@ -87,19 +86,6 @@ def __init__(
self._average_params = average_params
if self.average_params is None:
self._average_params = {}
if averaging_method == "dba":
self._dba_medoids_distance_metric = "dtw"
self._precomputed_pairwise = None
if "medoids_distance_metric" in self._average_params:
self._dba_medoids_distance_metric = self._average_params[
"medoids_distance_metric"
]
if "averaging_distance_metric" in self._average_params:
average_dist = self._average_params["averaging_distance_metric"]
if average_dist == "ddtw":
self._average_params["averaging_distance_metric"] = "dtw"
if average_dist == "wddtw":
self._average_params["averaging_distance_metric"] = "wdtw"

super(TimeSeriesKMeans, self).__init__(
n_clusters,
Expand All @@ -113,27 +99,6 @@ def __init__(
distance_params,
)

def _fit(self, X: np.ndarray, y=None) -> np.ndarray:
"""Fit time series clusterer to training data.

Parameters
----------
X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape
(n_instances, n_dimensions, series_length))
Training time series instances to cluster.
y: ignored, exists for API consistency reasons.

Returns
-------
self:
Fitted estimator.
"""
if self.averaging_method == "dba":
self._precomputed_pairwise = pairwise_distance(
X, metric=self._dba_medoids_distance_metric, **self._average_params
)
return super()._fit(X, y)

def _compute_new_cluster_centers(
self, X: np.ndarray, assignment_indexes: np.ndarray
) -> np.ndarray:
Expand All @@ -155,19 +120,6 @@ def _compute_new_cluster_centers(
for i in range(self.n_clusters):
curr_indexes = np.where(assignment_indexes == i)[0]

if self.averaging_method == "dba":
distance_matrix = np.zeros((len(curr_indexes), len(curr_indexes)))
for j in range(len(curr_indexes)):
curr_j = curr_indexes[j]
for k in range(len(curr_indexes)):
distance_matrix[j, k] = self._precomputed_pairwise[
curr_j, curr_indexes[k]
]

self._average_params[
"precomputed_medoids_pairwise_distance"
] = distance_matrix

result = self._averaging_method(X[curr_indexes], **self._average_params)
if result.shape[0] > 0:
new_centers[i, :] = result
Expand Down
4 changes: 2 additions & 2 deletions aeon/clustering/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
"""Metric for clustering."""
__all__ = ["medoids", "dba", "mean_average"]
from aeon.clustering.metrics.averaging import dba, mean_average
__all__ = ["medoids", "elastic_barycenter_average", "mean_average"]
from aeon.clustering.metrics.averaging import elastic_barycenter_average, mean_average
from aeon.clustering.metrics.medoids import medoids
4 changes: 2 additions & 2 deletions aeon/clustering/metrics/averaging/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
"""Time series averaging metrics."""
__all__ = ["dba", "mean_average", "_resolve_average_callable"]
__all__ = ["elastic_barycenter_average", "mean_average", "_resolve_average_callable"]
from aeon.clustering.metrics.averaging._averaging import (
_resolve_average_callable,
dba,
elastic_barycenter_average,
mean_average,
)
10 changes: 8 additions & 2 deletions aeon/clustering/metrics/averaging/_averaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

import numpy as np

from aeon.clustering.metrics.averaging._dba import dba
from aeon.clustering.metrics.averaging._barycenter_averaging import (
elastic_barycenter_average,
)


def mean_average(X: np.ndarray, **kwargs) -> np.ndarray:
Expand All @@ -27,7 +29,11 @@ def mean_average(X: np.ndarray, **kwargs) -> np.ndarray:
return X.mean(axis=0)


_AVERAGE_DICT = {"mean": mean_average, "dba": dba}
_AVERAGE_DICT = {
"mean": mean_average,
"dba": elastic_barycenter_average, # Kept for backwards compatibility
"ba": elastic_barycenter_average,
}


def _resolve_average_callable(
Expand Down
143 changes: 143 additions & 0 deletions aeon/clustering/metrics/averaging/_barycenter_averaging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*-
__author__ = ["chrisholder"]

from typing import Tuple

import numpy as np
from numba import njit

from aeon.clustering.metrics.medoids import medoids
from aeon.distances import (
ddtw_alignment_path,
dtw_alignment_path,
edr_alignment_path,
erp_alignment_path,
msm_alignment_path,
squared_distance,
twe_alignment_path,
wddtw_alignment_path,
wdtw_alignment_path,
)


def elastic_barycenter_average(
X: np.ndarray,
metric: str = "dtw",
max_iters: int = 30,
tol=1e-5,
precomputed_medoids_pairwise_distance: np.ndarray = None,
verbose: bool = False,
**kwargs,
) -> np.ndarray:
"""Compute the barycenter average of time series using a elastic distance.

This implements an adapted version of 'petitjean' (original) DBA algorithm [1]_.

Parameters
----------
X: np.ndarray, of shape (n_instances, n_channels, n_timepoints) or
(n_instances, n_timepoints)
A collection of time series instances to take the average from.
metric: str or Callable, defaults = 'dtw'
String that is the distance metric to use for averaging.
If Callable provided must be of the form (x, y) -> (float, np.ndarray)
where the first element is the distance and the second is the alignment path.
max_iters: int, defaults = 30
Maximum number iterations for dba to update over.
tol : float (default: 1e-5)
Tolerance to use for early stopping: if the decrease in cost is lower
than this value, the Expectation-Maximization procedure stops.
precomputed_medoids_pairwise_distance: np.ndarray (of shape (len(X), len(X)),
defaults = None
Precomputed medoids pairwise.
verbose: bool, defaults = False
Boolean that controls the verbosity.
**kwargs
Keyword arguments to pass to the distance metric.

Returns
-------
np.ndarray of shape (n_channels, n_timepoints)
Time series that is the average of the collection of instances provided.

References
----------
.. [1] F. Petitjean, A. Ketterlin & P. Gancarski. A global averaging method
for dynamic time warping, with applications to clustering. Pattern
Recognition, Elsevier, 2011, Vol. 44, Num. 3, pp. 678-693
"""
if len(X) <= 1:
return X

# center = X.mean(axis=0)
center = medoids(
X,
distance_metric=metric,
precomputed_pairwise_distance=precomputed_medoids_pairwise_distance,
**kwargs,
)

cost_prev = np.inf
if metric == "wdtw" or metric == "wddtw":
if "g" not in kwargs:
kwargs["g"] = 0.05
for i in range(max_iters):
center, cost = _ba_update(center, X, metric, **kwargs)
if abs(cost_prev - cost) < tol:
break
elif cost_prev < cost:
break
else:
cost_prev = cost

if verbose:
print(f"[DBA aeon] epoch {i}, cost {cost}") # noqa: T001, T201
return center


@njit(cache=True, fastmath=True)
def _ba_update(
center: np.ndarray,
X: np.ndarray,
metric: str = "dtw",
window: float = None,
g: float = 0.0,
epsilon: float = None,
nu: float = 0.001,
lmbda: float = 1.0,
independent: bool = True,
c: float = 1.0,
) -> Tuple[np.ndarray, float]:
X_size, X_dims, X_timepoints = X.shape
sum = np.zeros(X_timepoints)

alignment = np.zeros((X_dims, X_timepoints))
cost = 0.0
for i in range(X_size):
curr_ts = X[i]
if metric == "dtw":
curr_alignment, _ = dtw_alignment_path(curr_ts, center, window)
elif metric == "ddtw":
curr_alignment, _ = ddtw_alignment_path(curr_ts, center, window)
elif metric == "wdtw":
curr_alignment, _ = wdtw_alignment_path(curr_ts, center, window, g)
elif metric == "wddtw":
curr_alignment, _ = wddtw_alignment_path(curr_ts, center, window, g)
elif metric == "erp":
curr_alignment, _ = erp_alignment_path(curr_ts, center, window, g)
elif metric == "edr":
curr_alignment, _ = edr_alignment_path(curr_ts, center, window, epsilon)
elif metric == "twe":
curr_alignment, _ = twe_alignment_path(curr_ts, center, window, nu, lmbda)
elif metric == "msm":
curr_alignment, _ = msm_alignment_path(
curr_ts, center, window, independent, c
)
else:
raise ValueError(f"Metric must be a known string, got {metric}")
for j, k in curr_alignment:
alignment[:, k] += curr_ts[:, j]
sum[k] += 1
cost += squared_distance(curr_ts[:, j], center[:, k])

return alignment / sum, cost / X_timepoints
124 changes: 0 additions & 124 deletions aeon/clustering/metrics/averaging/_dba.py

This file was deleted.