Merge pull request #31 from hichamjanati/dtw-diffsize

[WIP] DTW computation for time series with different lengths
johannfaouzi · Sep 22, 2019 · 706f302 · 706f302
2 parents 5b4310f + ca519b5
commit 706f302
Show file tree

Hide file tree

Showing 8 changed files with 638 additions and 228 deletions.
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -4,6 +4,14 @@
 Change Log
 ==========
 
+
+Version 0.10.0
+--------------
+
+- Adapt `pyts.metrics.dtw` to compare time series with different lengths (Hicham Janati)
+
+
+
 Version 0.9.0
 -------------
 

diff --git a/examples/metrics/plot_dtw.py b/examples/metrics/plot_dtw.py
@@ -9,6 +9,9 @@
 as :func:`pyts.metrics.dtw`.
 """
 
+# Author: Johann Faouzi <johann.faouzi@gmail.com>
+# License: BSD-3-Clause
+
 import numpy as np
 import matplotlib.pyplot as plt
 from pyts.datasets import load_gunpoint
@@ -19,19 +22,25 @@
 # Parameters
 X, _, _, _ = load_gunpoint(return_X_y=True)
 x, y = X[0], X[1]
-n_timestamps = x.size
 
-plt.figure(figsize=(13, 14))
-timestamps = np.arange(n_timestamps + 1)
+# To compare time series of different lengths, we remove some observations
+mask = np.ones(x.size)
+mask[::5] = 0
+x = x[mask.astype(bool)]
+n_timestamps_1, n_timestamps_2 = x.size, y.size
+
+plt.figure(figsize=(10, 15))
+timestamps_1 = np.arange(n_timestamps_1 + 1)
+timestamps_2 = np.arange(n_timestamps_2 + 1)
 
 # Dynamic Time Warping: classic
 dtw_classic, path_classic = dtw(x, y, dist='square',
                                 method='classic', return_path=True)
-matrix_classic = np.zeros((n_timestamps + 1, n_timestamps + 1))
+matrix_classic = np.zeros((n_timestamps_2 + 1, n_timestamps_1 + 1))
 matrix_classic[tuple(path_classic)[::-1]] = 1.
 
 plt.subplot(2, 2, 1)
-plt.pcolor(timestamps, timestamps, matrix_classic,
+plt.pcolor(timestamps_1, timestamps_2, matrix_classic,
            edgecolors='k', cmap='Greys')
 plt.xlabel('x', fontsize=16)
 plt.ylabel('y', fontsize=16)
@@ -44,60 +53,67 @@
     x, y, dist='square', method='sakoechiba',
     options={'window_size': window_size}, return_path=True
 )
-band = sakoe_chiba_band(n_timestamps, window_size=window_size)
-matrix_sakoechiba = np.zeros((n_timestamps + 1, n_timestamps + 1))
-for i in range(n_timestamps):
+band = sakoe_chiba_band(n_timestamps_1, n_timestamps_2,
+                        window_size=window_size)
+matrix_sakoechiba = np.zeros((n_timestamps_1 + 1, n_timestamps_2 + 1))
+for i in range(n_timestamps_1):
     matrix_sakoechiba[i, np.arange(*band[:, i])] = 0.5
-matrix_sakoechiba[tuple(path_sakoechiba)[::-1]] = 1.
+matrix_sakoechiba[tuple(path_sakoechiba)] = 1.
 
 plt.subplot(2, 2, 2)
-plt.pcolor(timestamps, timestamps, matrix_sakoechiba,
+plt.pcolor(timestamps_1, timestamps_2, matrix_sakoechiba.T,
            edgecolors='k', cmap='Greys')
 plt.xlabel('x', fontsize=16)
 plt.ylabel('y', fontsize=16)
 plt.title("{0}\nDTW(x, y) = {1:.2f}".format('sakoechiba', dtw_sakoechiba),
           fontsize=16)
 
 # Dynamic Time Warping: itakura
+slope = 1.2
 dtw_itakura, path_itakura = dtw(
     x, y, dist='square', method='itakura',
-    options={'max_slope': 2.}, return_path=True
+    options={'max_slope': slope}, return_path=True
 )
-parallelogram = itakura_parallelogram(n_timestamps, max_slope=2.)
-matrix_itakura = np.zeros((n_timestamps + 1, n_timestamps + 1))
-for i in range(n_timestamps):
+parallelogram = itakura_parallelogram(n_timestamps_1, n_timestamps_2,
+                                      max_slope=slope)
+matrix_itakura = np.zeros((n_timestamps_1 + 1, n_timestamps_2 + 1))
+for i in range(n_timestamps_1):
     matrix_itakura[i, np.arange(*parallelogram[:, i])] = 0.5
-matrix_itakura[tuple(path_itakura)[::-1]] = 1.
-
+matrix_itakura[tuple(path_itakura)] = 1.
 plt.subplot(2, 2, 3)
-plt.pcolor(timestamps, timestamps, matrix_itakura,
+plt.pcolor(timestamps_1, timestamps_2, matrix_itakura.T,
            edgecolors='k', cmap='Greys')
 plt.xlabel('x', fontsize=16)
 plt.ylabel('y', fontsize=16)
 plt.title("{0}\nDTW(x, y) = {1:.2f}".format('itakura', dtw_itakura),
           fontsize=16)
 
 # Dynamic Time Warping: multiscale
-resolution, radius = 5, 4
+resolution, radius = 5, 2
 dtw_multiscale, path_multiscale = dtw(
     x, y, dist='square', method='multiscale',
     options={'resolution': resolution, 'radius': radius}, return_path=True
 )
+
 x_padded = x.reshape(-1, resolution).mean(axis=1)
 y_padded = y.reshape(-1, resolution).mean(axis=1)
+
 cost_mat_res = cost_matrix(x_padded, y_padded, dist='square', region=None)
 acc_cost_mat_res = accumulated_cost_matrix(cost_mat_res)
 path_res = _return_path(acc_cost_mat_res)
+
 multiscale_region = _multiscale_region(
-    n_timestamps, resolution, x_padded.size, path_res, radius=radius
+    n_timestamps_1, n_timestamps_2, resolution, x_padded.size, y_padded.size,
+    path_res,
+    radius=radius
 )
-matrix_multiscale = np.zeros((n_timestamps + 1, n_timestamps + 1))
-for i in range(n_timestamps):
+matrix_multiscale = np.zeros((n_timestamps_1 + 1, n_timestamps_2 + 1))
+for i in range(n_timestamps_1):
     matrix_multiscale[i, np.arange(*multiscale_region[:, i])] = 0.5
-matrix_multiscale[tuple(path_multiscale)[::-1]] = 1.
+matrix_multiscale[tuple(path_multiscale)] = 1.
 
 plt.subplot(2, 2, 4)
-plt.pcolor(timestamps, timestamps, matrix_multiscale,
+plt.pcolor(timestamps_1, timestamps_2, matrix_multiscale.T,
            edgecolors='k', cmap='Greys')
 plt.xlabel('x', fontsize=16)
 plt.ylabel('y', fontsize=16)

diff --git a/examples/metrics/plot_itakura.py b/examples/metrics/plot_itakura.py
@@ -0,0 +1,101 @@
+"""
+=====================
+Itakura parallelogram
+=====================
+
+This example explains how to set the `max_slope` parameter of the itakura
+parallelogram when computing the Dynamic Time Warping (DTW) with
+`method` == "itakura". The Itakura parallelogram is defined through a
+`max_slope` parameter which determines the slope of the steeper side. It is
+implemented in :func:`pyts.metrics.dtw.itakura_parallelogram`. The slope of the
+other side is set to "1 / max_slope". For a feasible region, `max_slope`
+must be larger than 1. This example visualizes the itakura parallelogram with
+different slopes and temporal dimensions.
+"""
+
+# Author: Hicham Janati <hicham.janati@inria.fr>
+#         Johann Faouzi <johann.faouzi@gmail.com>
+# License: BSD-3-Clause
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+from pyts.metrics import itakura_parallelogram
+from pyts.metrics.dtw import _get_itakura_slopes
+
+# #####################################################################
+# We write a function to visualize the itakura parallelogram for different
+# time series lengths.
+
+
+def plot_itakura(n_timestamps_1, n_timestamps_2, max_slope=1.,
+                 ax=None):
+
+    region = itakura_parallelogram(n_timestamps_1, n_timestamps_2, max_slope)
+    max_slope, min_slope = _get_itakura_slopes(n_timestamps_1,
+                                               n_timestamps_2,
+                                               max_slope)
+
+    if ax is None:
+        _, ax = ax.subplots(1, 1, figsize=(n_timestamps_1 / 2,
+                                           n_timestamps_2 / 2))
+    mask = np.zeros((n_timestamps_2, n_timestamps_1))
+    for i, (j, k) in enumerate(region.T):
+        mask[j:k, i] = 1.
+
+    ax.imshow(mask, origin='lower', cmap='Wistia')
+
+    sz = max(n_timestamps_1, n_timestamps_2)
+    x = np.arange(-1, sz + 1)
+
+    low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
+        max_slope * np.arange(-1, sz + 1)
+    up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
+        min_slope * np.arange(-1, sz + 1)
+    diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
+    ax.plot(x, diag, 'black', lw=1)
+    ax.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
+    ax.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
+    ax.plot(x, low_max_line, 'g', lw=1.5)
+    ax.plot(x, up_min_line, 'y', lw=1.5)
+
+    for i in range(n_timestamps_1):
+        for j in range(n_timestamps_2):
+            ax.plot(i, j, 'o', color='green', ms=1)
+
+    ax.set_xticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
+                  minor=True)
+    ax.set_yticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
+                  minor=True)
+    ax.grid(which='minor', color='b', linestyle='--', linewidth=1)
+
+    ax.set_xlim((-0.5, n_timestamps_1 - 0.5))
+    ax.set_ylim((-0.5, n_timestamps_2 - 0.5))
+
+    return ax
+
+
+slopes = [1., 1.5, 3.]
+rc = {"font.size": 20, "axes.titlesize": 16,
+      "xtick.labelsize": 14, "ytick.labelsize": 14}
+plt.rcParams.update(rc)
+
+
+lengths = [(20, 20), (20, 10), (10, 20)]
+fig = plt.figure(constrained_layout=True, figsize=(16, 16))
+gs = gridspec.GridSpec(12, 12, figure=fig)
+spans = [(4, 4), (2, 4), (4, 2)]
+locs = [[[0, 0], [0, 4], [0, 8]],
+        [[5, 0], [5, 4], [5, 8]],
+        [[8, 1], [8, 5], [8, 9]]]
+for i, (n1, n2) in enumerate(lengths):
+    for j, slope in enumerate(slopes):
+        loc_x, loc_y = locs[i][j]
+        ax = fig.add_subplot(gs[loc_x: loc_x + spans[i][0],
+                                loc_y: loc_y + spans[i][1]])
+        if i == 0:
+            ax.set_title("Slope = {}".format(slope))
+        plot_itakura(n1, n2, max_slope=slope, ax=ax)
+        ax.set_xticks(np.arange(0, n1, 2))
+        ax.set_yticks(np.arange(0, n2, 2))
+plt.show()
diff --git a/examples/metrics/plot_sakoe_chiba.py b/examples/metrics/plot_sakoe_chiba.py
@@ -0,0 +1,103 @@
+"""
+================
+Sakoe-Chiba band
+================
+
+This example explains how to set the `window_size` parameter of the Sakoe-Chiba
+band when computing the Dynamic Time Warping (DTW) with
+`method` == "sakoechiba". The Sakoe-Chiba region is defined through a
+`window_size` parameter which determines the largest temporal shift allowed
+from the diagonal in the direction of the longest time series. It is
+implemented in :func:`pyts.metrics.dtw.sakoe_chiba_band`. The window size can
+be either set relatively to the length of the longest time series as a ratio
+between 0 and 1, or manually if an integer is given.
+
+This example visualizes the Sakoe-Chiba band in different scenarios: the
+degenerate case `window_size` = 0, a relative size `window_size` 0.2 and the
+absolute `window_size` = 4. The last two cases are equivalent since
+0.2 * 20 = 4.
+"""
+
+
+# Author: Hicham Janati <hicham.janati@inria.fr>
+#         Johann Faouzi <johann.faouzi@gmail.com>
+# License: BSD-3-Clause
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+
+from pyts.metrics import sakoe_chiba_band
+from pyts.metrics.dtw import _check_sakoe_chiba_params
+
+
+# #####################################################################
+# We write a function to visualize the sakoe-chiba band for different
+# time series lengths.
+
+
+def plot_sakoe_chiba(n_timestamps_1, n_timestamps_2, window_size=0.5,
+                     ax=None):
+
+    region = sakoe_chiba_band(n_timestamps_1, n_timestamps_2, window_size)
+    scale, horizontal_shift, vertical_shift = \
+        _check_sakoe_chiba_params(n_timestamps_1, n_timestamps_2, window_size)
+    mask = np.zeros((n_timestamps_2, n_timestamps_1))
+    for i, (j, k) in enumerate(region.T):
+        mask[j:k, i] = 1.
+
+    if ax is None:
+        _, ax = plt.subplots(1, 1, figsize=(n_timestamps_1 / 2,
+                                            n_timestamps_2 / 2))
+    ax.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)
+
+    sz = max(n_timestamps_1, n_timestamps_2)
+    x = np.arange(-1, sz + 1)
+    lower_bound = scale * (x - horizontal_shift) - vertical_shift
+    upper_bound = scale * (x + horizontal_shift) + vertical_shift
+    ax.plot(x, lower_bound, 'b', lw=2)
+    ax.plot(x, upper_bound, 'g', lw=2)
+    diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
+    ax.plot(x, diag, 'black', lw=1)
+
+    for i in range(n_timestamps_1):
+        for j in range(n_timestamps_2):
+            ax.plot(i, j, 'o', color='green', ms=1)
+
+    ax.set_xticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
+                  minor=True)
+    ax.set_yticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
+                  minor=True)
+    ax.grid(which='minor', color='b', linestyle='--', linewidth=1)
+
+    ax.set_xlim((-0.5, n_timestamps_1 - 0.5))
+    ax.set_ylim((-0.5, n_timestamps_2 - 0.5))
+
+    return ax
+
+
+window_sizes = [0, 0.2, 4]
+
+rc = {"font.size": 20, "axes.titlesize": 16,
+      "xtick.labelsize": 14, "ytick.labelsize": 14}
+plt.rcParams.update(rc)
+
+
+lengths = [(20, 20), (20, 10), (10, 20)]
+fig = plt.figure(constrained_layout=True, figsize=(16, 16))
+gs = gridspec.GridSpec(12, 12, figure=fig)
+spans = [(4, 4), (2, 4), (4, 2)]
+locs = [[[0, 0], [0, 4], [0, 8]],
+        [[5, 0], [5, 4], [5, 8]],
+        [[8, 1], [8, 5], [8, 9]]]
+for i, (n1, n2) in enumerate(lengths):
+    for j, window_size in enumerate(window_sizes):
+        loc_x, loc_y = locs[i][j]
+        ax = fig.add_subplot(gs[loc_x: loc_x + spans[i][0],
+                                loc_y: loc_y + spans[i][1]])
+        if i == 0:
+            ax.set_title("Window size = {}".format(window_size))
+        plot_sakoe_chiba(n1, n2, window_size, ax=ax)
+        ax.set_xticks(np.arange(0, n1, 2))
+        ax.set_yticks(np.arange(0, n2, 2))
+plt.show()
diff --git a/pyts/classification/knn.py b/pyts/classification/knn.py
@@ -142,7 +142,8 @@ def fit(self, X, y):
                     window_size = 0.1
                 else:
                     window_size = self.metric_params['window_size']
-                region = sakoe_chiba_band(n_timestamps, window_size)
+                region = sakoe_chiba_band(n_timestamps,
+                                          window_size=window_size)
             self._clf = SklearnKNN(
                 n_neighbors=self.n_neighbors, weights=self.weights,
                 algorithm='brute', metric=dtw_region,
@@ -159,7 +160,8 @@ def fit(self, X, y):
                     max_slope = 2.
                 else:
                     max_slope = self.metric_params['max_slope']
-                region = itakura_parallelogram(n_timestamps, max_slope)
+                region = itakura_parallelogram(n_timestamps,
+                                               max_slope=max_slope)
             self._clf = SklearnKNN(
                 n_neighbors=self.n_neighbors, weights=self.weights,
                 algorithm='brute', metric=dtw_region,

diff --git a/pyts/classification/tests/test_knn.py b/pyts/classification/tests/test_knn.py
@@ -22,7 +22,7 @@
      ({'metric': 'dtw_classic'}),
      ({'metric': 'dtw_sakoechiba'}),
      ({'metric': 'dtw_sakoechiba', 'metric_params': {}}),
-     ({'metric': 'dtw_sakoechiba', 'metric_params': {'window_size': 2}}),
+     ({'metric': 'dtw_sakoechiba', 'metric_params': {'window_size': 0.5}}),
      ({'metric': 'dtw_itakura'}),
      ({'metric': 'dtw_itakura', 'metric_params': {}}),
      ({'metric': 'dtw_itakura', 'metric_params': {'max_slope': 3.}}),