In [1]:
# Imported Python files do not automatically reload after you modify them
# M1: Automatically Reload
%load_ext autoreload
%autoreload 2
# M2: Manual Reload
# import importlib
# import ksfdtw
# importlib.reload(ksfdtw)

# https://stackoverflow.com/questions/20309456/how-do-i-call-a-function-from-another-py-file
from ksfdtw import *

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tslearn.metrics import dtw, lb_keogh
from sklearn.metrics import accuracy_score
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from tslearn.metrics import dtw as tsln_dtw
from tqdm import tqdm

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [None]:
# # A neat way to load the dataset
# data = np.load("../data/gunpoint_preprocessed.npz")
# data_dict = {key: data[key] for key in data.files}


In [32]:
# A old way to load the dataset
data = np.load("../data/gunpoint_preprocessed.npz", allow_pickle=True)
X_train_scaled = data["X_train_scaled"]
X_train_proc = data["X_train_proc"]
X_train_proc_noisy = data["X_train_proc_noisy"]
y_train = data["y_train"]
X_test_scaled = data["X_test_scaled"]
X_test_proc = data["X_test_proc"]
X_test_proc_noisy = data["X_test_proc_noisy"]
y_test = data["y_test"]
train_cutting_orig = data["train_cutting_orig"].tolist()
train_cutting_final = data["train_cutting_final"].tolist()
test_cutting_orig = data["test_cutting_orig"].tolist()
test_cutting_final = data["test_cutting_final"].tolist()


In [12]:
# Compute Euclidean distances to all training samples
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

In [None]:
def test_querying_ed_dtw(query_ts_idx, dist_method):
    query_ts = X_train_proc_noisy[query_ts_idx]
    query_ts_original = X_train[query_ts_idx]
    if dist_method == "dtw":
        distances = np.array([tsln_dtw(query_ts, x) for x in X_train])
    elif dist_method == "psdtw":
        results = [ps_distance_p(query_ts, x, 2, 0.1, L, 3) for x in X_train[:5]]
        distances = np.array([r[0] for r in results])
        cuts = [r[1] for r in results]
        iterations = np.array([r[2] for r in results])  
    else:
        raise ValueError("Must be either dtw or psdtw")
    # Find best match
    best_idx = np.argmin(distances)
    best_match = X_train[best_idx]

    print(f"Best match index: {best_idx}")
    print(f"Distance: {distances[best_idx]:.4f}")
    plt.plot(query_ts, label="Query", linewidth=2, color= "red")
    plt.plot(query_ts_original, label="Original version of Query", linestyle="--", color="green")
    plt.plot(best_match, label="Best Match", linestyle="--", color="blue")

    plt.legend()
    plt.title("1-NN Query (Euclidean Distance)")
    plt.show()

    for i in range(0, 6):
    test_querying_ed_dtw(i, "psdtw")



In [None]:

m = len(query_ts)
n = len(query_ts_noisy)
l = 2
L = int(np.floor(min(np.ceil(l*m),n)/3))

In [75]:
results = []
results_noisy = []
for i in range(0,50): # X_train_proc.shape[0]
    results.append([ps_distance_p(X_train_proc[i], x, 2, 0.1, 10, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train_scaled[:50]])
# for i in range(0,50): # X_train_proc.shape[0]
#     results.append([ps_distance_p(X_train_proc_noisy[i], x, 2, 0.1, 10, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train_scaled])
# np.savez("../results/results.npz", results=np.array(results, dtype=object))
# np.savez("../results/results_noisy.npz", results=np.array(results_noisy, dtype=object))

KeyboardInterrupt: 

In [None]:
results_lb_shen = []
results_no_prune = []
for i in range(0,50): # X_train_proc.shape[0]
    results.append([ps_distance_p(X_train_proc[i], x, 2, 0.1, 10, 3, distance_method='dtw', lower_bound_method=lb_shen) for x in X_train_scaled])
for i in range(0,50): # X_train_proc.shape[0]
    results.append([ps_distance_p_without_prune(X_train_proc[i], x, 2, 0.1, 10, 3, distance_method='dtw', lower_bound_method=lb_dummy) for x in X_train_scaled])
np.savez("../results/results_lb_shen.npz", results=np.array(results_lb_shen, dtype=object))
np.savez("../results/results_no_prune.npz", results=np.array(results_no_prune, dtype=object))

In [73]:
for i in range(0,10):
     distances = np.array([r[0] for r in results[i]])
     cuts = [r[1] for r in results[i]]
     iterations = np.array([r[2] for r in results[i]])  
     best_idx = np.argmin(distances)
     if i != best_idx:
          print(i, "cannot retrieve the original time series") 
        

In [9]:
###