Skip to content

Commit

Permalink
Merge pull request #31 from hichamjanati/dtw-diffsize
Browse files Browse the repository at this point in the history
[WIP] DTW computation for time series with different lengths
  • Loading branch information
johannfaouzi committed Sep 22, 2019
2 parents 5b4310f + ca519b5 commit 706f302
Show file tree
Hide file tree
Showing 8 changed files with 638 additions and 228 deletions.
8 changes: 8 additions & 0 deletions doc/changelog.rst
Expand Up @@ -4,6 +4,14 @@
Change Log
==========


Version 0.10.0
--------------

- Adapt `pyts.metrics.dtw` to compare time series with different lengths (Hicham Janati)



Version 0.9.0
-------------

Expand Down
62 changes: 39 additions & 23 deletions examples/metrics/plot_dtw.py
Expand Up @@ -9,6 +9,9 @@
as :func:`pyts.metrics.dtw`.
"""

# Author: Johann Faouzi <johann.faouzi@gmail.com>
# License: BSD-3-Clause

import numpy as np
import matplotlib.pyplot as plt
from pyts.datasets import load_gunpoint
Expand All @@ -19,19 +22,25 @@
# Parameters
X, _, _, _ = load_gunpoint(return_X_y=True)
x, y = X[0], X[1]
n_timestamps = x.size

plt.figure(figsize=(13, 14))
timestamps = np.arange(n_timestamps + 1)
# To compare time series of different lengths, we remove some observations
mask = np.ones(x.size)
mask[::5] = 0
x = x[mask.astype(bool)]
n_timestamps_1, n_timestamps_2 = x.size, y.size

plt.figure(figsize=(10, 15))
timestamps_1 = np.arange(n_timestamps_1 + 1)
timestamps_2 = np.arange(n_timestamps_2 + 1)

# Dynamic Time Warping: classic
dtw_classic, path_classic = dtw(x, y, dist='square',
method='classic', return_path=True)
matrix_classic = np.zeros((n_timestamps + 1, n_timestamps + 1))
matrix_classic = np.zeros((n_timestamps_2 + 1, n_timestamps_1 + 1))
matrix_classic[tuple(path_classic)[::-1]] = 1.

plt.subplot(2, 2, 1)
plt.pcolor(timestamps, timestamps, matrix_classic,
plt.pcolor(timestamps_1, timestamps_2, matrix_classic,
edgecolors='k', cmap='Greys')
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
Expand All @@ -44,60 +53,67 @@
x, y, dist='square', method='sakoechiba',
options={'window_size': window_size}, return_path=True
)
band = sakoe_chiba_band(n_timestamps, window_size=window_size)
matrix_sakoechiba = np.zeros((n_timestamps + 1, n_timestamps + 1))
for i in range(n_timestamps):
band = sakoe_chiba_band(n_timestamps_1, n_timestamps_2,
window_size=window_size)
matrix_sakoechiba = np.zeros((n_timestamps_1 + 1, n_timestamps_2 + 1))
for i in range(n_timestamps_1):
matrix_sakoechiba[i, np.arange(*band[:, i])] = 0.5
matrix_sakoechiba[tuple(path_sakoechiba)[::-1]] = 1.
matrix_sakoechiba[tuple(path_sakoechiba)] = 1.

plt.subplot(2, 2, 2)
plt.pcolor(timestamps, timestamps, matrix_sakoechiba,
plt.pcolor(timestamps_1, timestamps_2, matrix_sakoechiba.T,
edgecolors='k', cmap='Greys')
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
plt.title("{0}\nDTW(x, y) = {1:.2f}".format('sakoechiba', dtw_sakoechiba),
fontsize=16)

# Dynamic Time Warping: itakura
slope = 1.2
dtw_itakura, path_itakura = dtw(
x, y, dist='square', method='itakura',
options={'max_slope': 2.}, return_path=True
options={'max_slope': slope}, return_path=True
)
parallelogram = itakura_parallelogram(n_timestamps, max_slope=2.)
matrix_itakura = np.zeros((n_timestamps + 1, n_timestamps + 1))
for i in range(n_timestamps):
parallelogram = itakura_parallelogram(n_timestamps_1, n_timestamps_2,
max_slope=slope)
matrix_itakura = np.zeros((n_timestamps_1 + 1, n_timestamps_2 + 1))
for i in range(n_timestamps_1):
matrix_itakura[i, np.arange(*parallelogram[:, i])] = 0.5
matrix_itakura[tuple(path_itakura)[::-1]] = 1.

matrix_itakura[tuple(path_itakura)] = 1.
plt.subplot(2, 2, 3)
plt.pcolor(timestamps, timestamps, matrix_itakura,
plt.pcolor(timestamps_1, timestamps_2, matrix_itakura.T,
edgecolors='k', cmap='Greys')
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
plt.title("{0}\nDTW(x, y) = {1:.2f}".format('itakura', dtw_itakura),
fontsize=16)

# Dynamic Time Warping: multiscale
resolution, radius = 5, 4
resolution, radius = 5, 2
dtw_multiscale, path_multiscale = dtw(
x, y, dist='square', method='multiscale',
options={'resolution': resolution, 'radius': radius}, return_path=True
)

x_padded = x.reshape(-1, resolution).mean(axis=1)
y_padded = y.reshape(-1, resolution).mean(axis=1)

cost_mat_res = cost_matrix(x_padded, y_padded, dist='square', region=None)
acc_cost_mat_res = accumulated_cost_matrix(cost_mat_res)
path_res = _return_path(acc_cost_mat_res)

multiscale_region = _multiscale_region(
n_timestamps, resolution, x_padded.size, path_res, radius=radius
n_timestamps_1, n_timestamps_2, resolution, x_padded.size, y_padded.size,
path_res,
radius=radius
)
matrix_multiscale = np.zeros((n_timestamps + 1, n_timestamps + 1))
for i in range(n_timestamps):
matrix_multiscale = np.zeros((n_timestamps_1 + 1, n_timestamps_2 + 1))
for i in range(n_timestamps_1):
matrix_multiscale[i, np.arange(*multiscale_region[:, i])] = 0.5
matrix_multiscale[tuple(path_multiscale)[::-1]] = 1.
matrix_multiscale[tuple(path_multiscale)] = 1.

plt.subplot(2, 2, 4)
plt.pcolor(timestamps, timestamps, matrix_multiscale,
plt.pcolor(timestamps_1, timestamps_2, matrix_multiscale.T,
edgecolors='k', cmap='Greys')
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
Expand Down
101 changes: 101 additions & 0 deletions examples/metrics/plot_itakura.py
@@ -0,0 +1,101 @@
"""
=====================
Itakura parallelogram
=====================
This example explains how to set the `max_slope` parameter of the itakura
parallelogram when computing the Dynamic Time Warping (DTW) with
`method` == "itakura". The Itakura parallelogram is defined through a
`max_slope` parameter which determines the slope of the steeper side. It is
implemented in :func:`pyts.metrics.dtw.itakura_parallelogram`. The slope of the
other side is set to "1 / max_slope". For a feasible region, `max_slope`
must be larger than 1. This example visualizes the itakura parallelogram with
different slopes and temporal dimensions.
"""

# Author: Hicham Janati <hicham.janati@inria.fr>
# Johann Faouzi <johann.faouzi@gmail.com>
# License: BSD-3-Clause

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from pyts.metrics import itakura_parallelogram
from pyts.metrics.dtw import _get_itakura_slopes

# #####################################################################
# We write a function to visualize the itakura parallelogram for different
# time series lengths.


def plot_itakura(n_timestamps_1, n_timestamps_2, max_slope=1.,
ax=None):

region = itakura_parallelogram(n_timestamps_1, n_timestamps_2, max_slope)
max_slope, min_slope = _get_itakura_slopes(n_timestamps_1,
n_timestamps_2,
max_slope)

if ax is None:
_, ax = ax.subplots(1, 1, figsize=(n_timestamps_1 / 2,
n_timestamps_2 / 2))
mask = np.zeros((n_timestamps_2, n_timestamps_1))
for i, (j, k) in enumerate(region.T):
mask[j:k, i] = 1.

ax.imshow(mask, origin='lower', cmap='Wistia')

sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)

low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
max_slope * np.arange(-1, sz + 1)
up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
min_slope * np.arange(-1, sz + 1)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
ax.plot(x, diag, 'black', lw=1)
ax.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
ax.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
ax.plot(x, low_max_line, 'g', lw=1.5)
ax.plot(x, up_min_line, 'y', lw=1.5)

for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
ax.plot(i, j, 'o', color='green', ms=1)

ax.set_xticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
minor=True)
ax.set_yticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
minor=True)
ax.grid(which='minor', color='b', linestyle='--', linewidth=1)

ax.set_xlim((-0.5, n_timestamps_1 - 0.5))
ax.set_ylim((-0.5, n_timestamps_2 - 0.5))

return ax


slopes = [1., 1.5, 3.]
rc = {"font.size": 20, "axes.titlesize": 16,
"xtick.labelsize": 14, "ytick.labelsize": 14}
plt.rcParams.update(rc)


lengths = [(20, 20), (20, 10), (10, 20)]
fig = plt.figure(constrained_layout=True, figsize=(16, 16))
gs = gridspec.GridSpec(12, 12, figure=fig)
spans = [(4, 4), (2, 4), (4, 2)]
locs = [[[0, 0], [0, 4], [0, 8]],
[[5, 0], [5, 4], [5, 8]],
[[8, 1], [8, 5], [8, 9]]]
for i, (n1, n2) in enumerate(lengths):
for j, slope in enumerate(slopes):
loc_x, loc_y = locs[i][j]
ax = fig.add_subplot(gs[loc_x: loc_x + spans[i][0],
loc_y: loc_y + spans[i][1]])
if i == 0:
ax.set_title("Slope = {}".format(slope))
plot_itakura(n1, n2, max_slope=slope, ax=ax)
ax.set_xticks(np.arange(0, n1, 2))
ax.set_yticks(np.arange(0, n2, 2))
plt.show()
103 changes: 103 additions & 0 deletions examples/metrics/plot_sakoe_chiba.py
@@ -0,0 +1,103 @@
"""
================
Sakoe-Chiba band
================
This example explains how to set the `window_size` parameter of the Sakoe-Chiba
band when computing the Dynamic Time Warping (DTW) with
`method` == "sakoechiba". The Sakoe-Chiba region is defined through a
`window_size` parameter which determines the largest temporal shift allowed
from the diagonal in the direction of the longest time series. It is
implemented in :func:`pyts.metrics.dtw.sakoe_chiba_band`. The window size can
be either set relatively to the length of the longest time series as a ratio
between 0 and 1, or manually if an integer is given.
This example visualizes the Sakoe-Chiba band in different scenarios: the
degenerate case `window_size` = 0, a relative size `window_size` 0.2 and the
absolute `window_size` = 4. The last two cases are equivalent since
0.2 * 20 = 4.
"""


# Author: Hicham Janati <hicham.janati@inria.fr>
# Johann Faouzi <johann.faouzi@gmail.com>
# License: BSD-3-Clause

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from pyts.metrics import sakoe_chiba_band
from pyts.metrics.dtw import _check_sakoe_chiba_params


# #####################################################################
# We write a function to visualize the sakoe-chiba band for different
# time series lengths.


def plot_sakoe_chiba(n_timestamps_1, n_timestamps_2, window_size=0.5,
ax=None):

region = sakoe_chiba_band(n_timestamps_1, n_timestamps_2, window_size)
scale, horizontal_shift, vertical_shift = \
_check_sakoe_chiba_params(n_timestamps_1, n_timestamps_2, window_size)
mask = np.zeros((n_timestamps_2, n_timestamps_1))
for i, (j, k) in enumerate(region.T):
mask[j:k, i] = 1.

if ax is None:
_, ax = plt.subplots(1, 1, figsize=(n_timestamps_1 / 2,
n_timestamps_2 / 2))
ax.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)

sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
lower_bound = scale * (x - horizontal_shift) - vertical_shift
upper_bound = scale * (x + horizontal_shift) + vertical_shift
ax.plot(x, lower_bound, 'b', lw=2)
ax.plot(x, upper_bound, 'g', lw=2)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
ax.plot(x, diag, 'black', lw=1)

for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
ax.plot(i, j, 'o', color='green', ms=1)

ax.set_xticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
minor=True)
ax.set_yticks(np.arange(-.5, max(n_timestamps_1, n_timestamps_2), 1),
minor=True)
ax.grid(which='minor', color='b', linestyle='--', linewidth=1)

ax.set_xlim((-0.5, n_timestamps_1 - 0.5))
ax.set_ylim((-0.5, n_timestamps_2 - 0.5))

return ax


window_sizes = [0, 0.2, 4]

rc = {"font.size": 20, "axes.titlesize": 16,
"xtick.labelsize": 14, "ytick.labelsize": 14}
plt.rcParams.update(rc)


lengths = [(20, 20), (20, 10), (10, 20)]
fig = plt.figure(constrained_layout=True, figsize=(16, 16))
gs = gridspec.GridSpec(12, 12, figure=fig)
spans = [(4, 4), (2, 4), (4, 2)]
locs = [[[0, 0], [0, 4], [0, 8]],
[[5, 0], [5, 4], [5, 8]],
[[8, 1], [8, 5], [8, 9]]]
for i, (n1, n2) in enumerate(lengths):
for j, window_size in enumerate(window_sizes):
loc_x, loc_y = locs[i][j]
ax = fig.add_subplot(gs[loc_x: loc_x + spans[i][0],
loc_y: loc_y + spans[i][1]])
if i == 0:
ax.set_title("Window size = {}".format(window_size))
plot_sakoe_chiba(n1, n2, window_size, ax=ax)
ax.set_xticks(np.arange(0, n1, 2))
ax.set_yticks(np.arange(0, n2, 2))
plt.show()
6 changes: 4 additions & 2 deletions pyts/classification/knn.py
Expand Up @@ -142,7 +142,8 @@ def fit(self, X, y):
window_size = 0.1
else:
window_size = self.metric_params['window_size']
region = sakoe_chiba_band(n_timestamps, window_size)
region = sakoe_chiba_band(n_timestamps,
window_size=window_size)
self._clf = SklearnKNN(
n_neighbors=self.n_neighbors, weights=self.weights,
algorithm='brute', metric=dtw_region,
Expand All @@ -159,7 +160,8 @@ def fit(self, X, y):
max_slope = 2.
else:
max_slope = self.metric_params['max_slope']
region = itakura_parallelogram(n_timestamps, max_slope)
region = itakura_parallelogram(n_timestamps,
max_slope=max_slope)
self._clf = SklearnKNN(
n_neighbors=self.n_neighbors, weights=self.weights,
algorithm='brute', metric=dtw_region,
Expand Down
2 changes: 1 addition & 1 deletion pyts/classification/tests/test_knn.py
Expand Up @@ -22,7 +22,7 @@
({'metric': 'dtw_classic'}),
({'metric': 'dtw_sakoechiba'}),
({'metric': 'dtw_sakoechiba', 'metric_params': {}}),
({'metric': 'dtw_sakoechiba', 'metric_params': {'window_size': 2}}),
({'metric': 'dtw_sakoechiba', 'metric_params': {'window_size': 0.5}}),
({'metric': 'dtw_itakura'}),
({'metric': 'dtw_itakura', 'metric_params': {}}),
({'metric': 'dtw_itakura', 'metric_params': {'max_slope': 3.}}),
Expand Down

0 comments on commit 706f302

Please sign in to comment.