In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from numba import njit
import os

from aeon.utils.numba.general import z_normalise_series_2d

# from aeon.distances import euclidean_distance
# from aeon.distances import dtw_distance
from aeon.distances import get_distance_function

In [None]:
%load_ext autoreload
%autoreload 2

from ksfdtw.distance_measures import (
    usdtw_prime as ksfdtw_usdtw_prime, 
    psdtw_prime_vanilla as psdtw_prime_vanilla, 
    psedd_prime as psedd_prime, 
    psedd_prime_test as psedd_prime_test,
    cut_based_distance as cut_based_distance)
from ksfdtw.utils import precision_at_k

# Import Dataset

In [3]:
# A neat way to load the dataset, but more complicated to use
# data = np.load("../data_intermediate/GunPoint_preprocessed_P_3_l_2.0_len_150.npz")
# data_dict = {key: data[key] for key in data.files}

# A old way to load the dataset
data = np.load(
    "../data_intermediate/GunPoint_ps_P_3_l_2.0_len_150.npz",
    allow_pickle=True,
)
X_train_scaled = data["X_train_scaled"]
X_train_ps = data["X_train_ps"]
X_train_ps_noise = data["X_train_ps_noise"]
y_train = data["y_train"]
X_test_scaled = data["X_test_scaled"]
X_test_ps = data["X_test_ps"]
X_test_ps_noise = data["X_test_ps_noise"]
y_test = data["y_test"]
X_train_cuts = data["X_train_cuts"].tolist()
X_train_ps_cuts = data["X_train_ps_cuts"].tolist()
X_test_cuts = data["X_test_cuts"].tolist()
X_test_ps_cuts = data["X_test_ps_cuts"].tolist()

In [4]:
# X_train_scaled.shape, X_train_ps.shape, X_train_ps_noise.shape

In [5]:
X_train_scaled_norm = z_normalise_series_2d(X_train_scaled)
X_train_ps_norm = z_normalise_series_2d(X_train_ps)
X_train_ps_noise_norm = z_normalise_series_2d(X_train_ps_noise)

# Querying

In [22]:
# *** Change here 1 ***
# Query set
# query_set = X_train_ps
query_set = X_train_ps_norm
# query_set = X_train_ps_noise
# query_set = X_train_ps_noise_norm


# Target set
# target_set = X_train_scaled
target_set = X_train_scaled_norm
if len(query_set) != len(target_set):
    raise ValueError("query_set and target_set have different sizes!")

# Pruning (Only for PSED, and PSDW)
Since PSEDD uses the cut of PSED as input, their "total_count_dist_calls" are the same.

In [7]:
data = np.load(
    "../results_temp/X_train_ps_noise_norm_psdtw.npz",
    allow_pickle=True,
)

all_distances = np.ascontiguousarray(data["all_distances"], dtype=np.float64)
all_count_dist_calls = np.ascontiguousarray(
    data["all_count_dist_calls"], dtype=np.float64
)
all_cuts = np.ascontiguousarray(data["all_cuts"], dtype=np.float64)

print("precision@1 =", data["precision_at_1"])
print("precision@3 =", data["precision_at_3"])
print("precision@5 =", data["precision_at_5"])
print("precision@7 =", data["precision_at_7"])
print("elapsed time =", data["elapsed_time"])

precision@1 = 0.42
precision@3 = 0.48
precision@5 = 0.64
precision@7 = 0.72
elapsed time = 39486.585753917694


In [8]:
# Check total count_dist_calls
total_count_dist_calls = 0
for r in all_count_dist_calls:
    total_count_dist_calls += np.sum(r)
print(total_count_dist_calls)

4141475524.0


In [9]:
precision_at_1, precision_at_3, precision_at_5, precision_at_7 = 0, 0, 0, 0
for i in range(0, len(query_set)):
    distances = np.array(
        [
            cut_based_distance(
                query_set[i],
                target_set[j],
                2,
                3,
                0.1,
                dist_method=15,
                cuts=all_cuts[i][j],
            )
            for j in range(0, len(target_set))
        ]
    )
    precision_at_1 += precision_at_k(distances, i, 1)
    precision_at_3 += precision_at_k(distances, i, 3)
    precision_at_5 += precision_at_k(distances, i, 5)
    precision_at_7 += precision_at_k(distances, i, 7)
print(
    f"{precision_at_1 / len(query_set):.2f},",
    f"{precision_at_3 / len(query_set):.2f},",
    f"{precision_at_5 / len(query_set):.2f},",
    f"{precision_at_7 / len(query_set):.2f}",
)

0.40, 0.50, 0.64, 0.74


# Testing

In [10]:
# Q = query_set[0]
# C = target_set[0]
# psdtw_prime(Q, C, l=2, P=3, r=0.1)

In [11]:
# all_cuts[0][0]

In [12]:
# X_train_ps_norm.shape, X_train_ps_noise_norm.shape

In [32]:
# %%time
psedd_prime_test(X_train_ps_norm[2], X_train_scaled_norm[3], 2, 3, 0.1)

[[  0  52   0  37]
 [ 52 112  37 103]
 [112 150 103 150]]


1.729257404108439

In [88]:
%%time
print(psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0, 0))

(3.3884497705100824, 1799848, array([[  0,  40,   0,  48],
       [ 40, 109,  48, 107],
       [109, 150, 107, 150]]))
CPU times: user 1.17 s, sys: 9.12 ms, total: 1.18 s
Wall time: 1.19 s


In [92]:
%%time
print(psdtw_prime_vanilla_test(query_set[0], target_set[0], 2, 3, 0, 0))

psdtw_prime_vanilla_test 8
(3.3884497705100824, 1679732, array([[  0,  40,   0,  48],
       [ 40, 109,  48, 107],
       [109, 150, 107, 150]]))
CPU times: user 1.3 s, sys: 8.06 ms, total: 1.31 s
Wall time: 1.31 s


In [177]:
%%time
print(psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0.1, 1))
# print(psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0, 0))


(2.616443648118869, 1790932, array([[  0,  44,   0,  44],
       [ 44, 114,  44, 112],
       [114, 150, 112, 150]]))
CPU times: user 11.7 s, sys: 65.9 ms, total: 11.8 s
Wall time: 12.1 s


In [180]:
%%time
%load_ext autoreload
%autoreload 2
from ksfdtw.distance_measures import psdtw_prime_vanilla_test as psdtw_prime_vanilla_test
print(psdtw_prime_vanilla_test(query_set[0], target_set[0], 2, 3, 0.1, 1))
# print(psdtw_prime_vanilla_test(query_set[0], target_set[0], 2, 3, 0, 0))
# 1175051
# 957706

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
psdtw_prime_vanilla_test
(2.616443648118869, 823404, array([[  0,  44,   0,  44],
       [ 44, 114,  44, 112],
       [114, 150, 112, 150]]))
CPU times: user 6.07 s, sys: 40.6 ms, total: 6.11 s
Wall time: 6.12 s


In [15]:
import datetime

print(f"This notebook was last run end-to-end on: {datetime.datetime.now()}\n")
###
###
###

This notebook was last run end-to-end on: 2025-09-23 16:01:56.726246

