In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from numba import njit
import os

from aeon.utils.numba.general import z_normalise_series_2d

# from aeon.distances import euclidean_distance
# from aeon.distances import dtw_distance
from aeon.distances import get_distance_function

In [2]:
%load_ext autoreload
%autoreload 2

from ksfdtw.distance_measures import (
    usdtw_prime as ksfdtw_usdtw_prime, 
    psdtw_prime_vanilla as psdtw_prime_vanilla, 
    psdtw_prime_vanilla_lb as psdtw_prime_vanilla_lb, 
    # psedd_prime as psedd_prime, 
    # psedd_prime_test as psedd_prime_test,
    cut_based_distance as cut_based_distance)
from ksfdtw.utils import precision_at_k

# Import Dataset

In [3]:
# A neat way to load the dataset, but more complicated to use
# data = np.load("../data_intermediate/GunPoint_preprocessed_P_3_l_2.0_len_150.npz")
# data_dict = {key: data[key] for key in data.files}

# A old way to load the dataset
data = np.load(
    "../data_intermediate/GunPoint_ps_P_3_l_2.0_len_150.npz",
    allow_pickle=True,
)
X_train_scaled = data["X_train_scaled"]
X_train_ps = data["X_train_ps"]
X_train_ps_noise = data["X_train_ps_noise"]
y_train = data["y_train"]
X_test_scaled = data["X_test_scaled"]
X_test_ps = data["X_test_ps"]
X_test_ps_noise = data["X_test_ps_noise"]
y_test = data["y_test"]
X_train_cuts = data["X_train_cuts"].tolist()
X_train_ps_cuts = data["X_train_ps_cuts"].tolist()
X_test_cuts = data["X_test_cuts"].tolist()
X_test_ps_cuts = data["X_test_ps_cuts"].tolist()

In [4]:
# X_train_scaled.shape, X_train_ps.shape, X_train_ps_noise.shape

In [5]:
X_train_scaled_norm = z_normalise_series_2d(X_train_scaled)
X_train_ps_norm = z_normalise_series_2d(X_train_ps)
X_train_ps_noise_norm = z_normalise_series_2d(X_train_ps_noise)

# Querying

In [6]:
# *** Change here 1 ***
# Query set
# query_set = X_train_ps
query_set = X_train_ps_norm
# query_set = X_train_ps_noise
# query_set = X_train_ps_noise_norm


# Target set
# target_set = X_train_scaled
target_set = X_train_scaled_norm
if len(query_set) != len(target_set):
    raise ValueError("query_set and target_set have different sizes!")

# Pruning (Only for PSED, and PSDW)
Since PSEDD uses the cut of PSED as input, their "total_count_dist_calls" are the same.

In [7]:
data = np.load(
    "../results_temp/X_train_ps_noise_norm_psdtw.npz",
    allow_pickle=True,
)

all_distances = np.ascontiguousarray(data["all_distances"], dtype=np.float64)
all_count_dist_calls = np.ascontiguousarray(
    data["all_count_dist_calls"], dtype=np.float64
)
all_cuts = np.ascontiguousarray(data["all_cuts"], dtype=np.float64)

print("precision@1 =", data["precision_at_1"])
print("precision@3 =", data["precision_at_3"])
print("precision@5 =", data["precision_at_5"])
print("precision@7 =", data["precision_at_7"])
print("elapsed time =", data["elapsed_time"])

precision@1 = 0.42
precision@3 = 0.48
precision@5 = 0.64
precision@7 = 0.72
elapsed time = 39486.585753917694


In [8]:
# Check total count_dist_calls
total_count_dist_calls = 0
for r in all_count_dist_calls:
    total_count_dist_calls += np.sum(r)
print(total_count_dist_calls)

4141475524.0


In [9]:
precision_at_1, precision_at_3, precision_at_5, precision_at_7 = 0, 0, 0, 0
for i in range(0, len(query_set)):
    distances = np.array(
        [
            cut_based_distance(
                query_set[i],
                target_set[j],
                2,
                3,
                0.1,
                dist_method=15,
                cuts=all_cuts[i][j],
            )
            for j in range(0, len(target_set))
        ]
    )
    precision_at_1 += precision_at_k(distances, i, 1)
    precision_at_3 += precision_at_k(distances, i, 3)
    precision_at_5 += precision_at_k(distances, i, 5)
    precision_at_7 += precision_at_k(distances, i, 7)
print(
    f"{precision_at_1 / len(query_set):.2f},",
    f"{precision_at_3 / len(query_set):.2f},",
    f"{precision_at_5 / len(query_set):.2f},",
    f"{precision_at_7 / len(query_set):.2f}",
)

0.36, 0.52, 0.62, 0.76


# Testing

In [10]:
# Q = query_set[0]
# C = target_set[0]
# psdtw_prime(Q, C, l=2, P=3, r=0.1)

In [11]:
# all_cuts[0][0]

In [12]:
# X_train_ps_norm.shape, X_train_ps_noise_norm.shape

In [13]:
# %%time
# psedd_prime_test(X_train_ps_norm[2], X_train_scaled_norm[3], 2, 3, 0.1)

In [55]:
%load_ext autoreload
%autoreload 2
from ksfdtw.distance_measures import psdtw_prime_vanilla_lb  as psdtw_prime_vanilla_lb, psdtw_prime_vanilla_lb_cache as psdtw_prime_vanilla_lb_cache, psdtw_prime_vanilla_lb_testing as psdtw_prime_vanilla_lb_testing, psdtw_prime_vanilla_lb_testing2 as psdtw_prime_vanilla_lb_testing2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
%%time
print(psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0, 0))
# Wall time: 1.22 s

(3.3884497705100824, 1799848, array([[  0,  40,   0,  48],
       [ 40, 109,  48, 107],
       [109, 150, 107, 150]]))
CPU times: user 2.93 s, sys: 31.4 ms, total: 2.96 s
Wall time: 2.97 s


In [16]:
%%time
print(psdtw_prime_vanilla_lb(query_set[0], target_set[0], 2, 3, 0, 0))
# Wall time: 1.2 s

(3.3884497705100824, 1435449, array([[  0,  40,   0,  48],
       [ 40, 109,  48, 107],
       [109, 150, 107, 150]]))
CPU times: user 2.21 s, sys: 17.7 ms, total: 2.23 s
Wall time: 2.24 s


In [17]:
%%time
print(psdtw_prime_vanilla_lb_cache(query_set[0], target_set[0], 2, 3, 0, 0))

psdtw_prime_vanilla_lb_cache
(3.3884497705100824, 1181015, array([[  0,  40,   0,  48],
       [ 40, 109,  48, 107],
       [109, 150, 107, 150]]))
CPU times: user 2.78 s, sys: 1.77 s, total: 4.56 s
Wall time: 5.39 s


In [18]:
%%time
print(psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0.1, 1))
# Wall time: 11.9 s

(2.616443648118869, 1790932, array([[  0,  44,   0,  44],
       [ 44, 114,  44, 112],
       [114, 150, 112, 150]]))
CPU times: user 12.5 s, sys: 105 ms, total: 12.6 s
Wall time: 12.7 s


In [56]:
%%time
print(psdtw_prime_vanilla_lb(query_set[0], target_set[0], 2, 3, 0.1, 1))

psdtw_prime_vanilla_lb 3
(2.616443648118869, 1215285, array([[  0,  44,   0,  44],
       [ 44, 114,  44, 112],
       [114, 150, 112, 150]]))
CPU times: user 11.7 s, sys: 110 ms, total: 11.8 s
Wall time: 12 s


In [20]:
psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0.1, 1)[0]

2.616443648118869

In [21]:
np.isclose(
    psdtw_prime_vanilla(query_set[0], target_set[0], 2, 3, 0.1, 1)[0],
    psdtw_prime_vanilla_lb(query_set[0], target_set[0], 2, 3, 0.1, 1)[0],
)

np.True_

In [None]:
psdtw_prime_vanilla(query_set[0], target_set[3], 2, 3, 0.1, 1)

(1.9306479431060048,
 1628673,
 array([[  0,  49,   0,  38],
        [ 49, 109,  38, 106],
        [109, 150, 106, 150]]))

In [None]:
psdtw_prime_vanilla_lb(query_set[0], target_set[3], 2, 3, 0.1, 1)

psdtw_prime_vanilla_lb 3


(1.971639583410792,
 1141890,
 array([[  0,  49,   0,  39],
        [ 49, 109,  39, 107],
        [109, 150, 107, 150]]))

In [46]:
psdtw_prime_vanilla_lb_testing(query_set[0], target_set[3], 2, 3, 0.1, 1)

psdtw_prime_vanilla_lb_testing


(2.027574738758849,
 1014854,
 array([[  0,  49,   0,  37],
        [ 49, 110,  37, 107],
        [110, 150, 107, 150]]))

In [51]:
psdtw_prime_vanilla_lb_testing2(query_set[0], target_set[3], 2, 3, 0.1, 1)

psdtw_prime_vanilla_lb_testing


(1.9306479431060048,
 1628673,
 array([[  0,  49,   0,  38],
        [ 49, 109,  38, 106],
        [109, 150, 106, 150]]))

In [None]:
# (2.027574738758849,
#  1014854,
#  array([[  0,  49,   0,  37],
#         [ 49, 110,  37, 107],
#         [110, 150, 107, 150]]))

In [58]:
def test_assert(a, b):
    assert a == b
    print("Assertion passed")

In [60]:
test_assert(1, 5)

AssertionError: 

In [31]:
len(target_set)

50

In [None]:
for i in range(0, 1):
    for j in range(0, 10):
        if np.isclose(
            psdtw_prime_vanilla(query_set[i], target_set[j], 2, 3, 0.1, 1)[0],
            psdtw_prime_vanilla_lb(query_set[i], target_set[j], 2, 3, 0.1, 1)[0],
        ):
            continue
        else:
            print(f"Values differ at i={i}, j={j}")
# Values differ at i=0, j=3

Values differ at i=0, j=3


SystemError: CPUDispatcher(<function psdtw_prime_vanilla_lb at 0x153a7f740>) returned a result with an exception set

In [25]:
psdtw_prime_vanilla(query_set[0], target_set[1], 2, 3, 0.1, 1), psdtw_prime_vanilla_lb(
    query_set[0], target_set[1], 2, 3, 0.1, 1
)

((3.8098028757174984,
  1683779,
  array([[  0,  49,   0,  43],
         [ 49, 114,  43, 111],
         [114, 150, 111, 150]])),
 (3.8098028757174984,
  973162,
  array([[  0,  49,   0,  43],
         [ 49, 114,  43, 111],
         [114, 150, 111, 150]])))

In [26]:
%%time
print(psdtw_prime_vanilla_lb_cache(query_set[0], target_set[0], 2, 3, 0.1, 1))

psdtw_prime_vanilla_lb_cache
(2.616443648118869, 857807, array([[  0,  44,   0,  44],
       [ 44, 114,  44, 112],
       [114, 150, 112, 150]]))
CPU times: user 7.55 s, sys: 1.58 s, total: 9.13 s
Wall time: 10.2 s


In [27]:
import datetime

print(f"This notebook was last run end-to-end on: {datetime.datetime.now()}\n")
###
###
###

This notebook was last run end-to-end on: 2025-10-22 20:44:31.222554

