In [2]:
# Imported Python files do not automatically reload after you modify them
# M1: Automatically Reload
%load_ext autoreload
%autoreload 2
# M2: Manual Reload
# import importlib
# import ksfdtw
# importlib.reload(ksfdtw)

# https://stackoverflow.com/questions/20309456/how-do-i-call-a-function-from-another-py-file
from ksfdtw import *

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tslearn.metrics import dtw, lb_keogh
from sklearn.metrics import accuracy_score
from tqdm import tqdm

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [4]:

import numpy as np

from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from sklearn.metrics import accuracy_score
from tslearn.metrics import dtw as tsln_dtw


In [5]:
data = np.load("../data/gunpoint_preprocessed_with_cuts.npz", allow_pickle=True)

X_train = data["X_train"] # scaled to 50
X_train_proc = data["X_train_proc"]
X_train_proc_noisy = data["X_train_proc_noisy"]
y_train = data["y_train"]
X_test = data["X_test"]
X_test_proc = data["X_test_proc"]
X_test_proc_noisy = data["X_test_proc_noisy"]
y_test = data["y_test"]

train_cutting_orig = data["train_cutting_orig"].tolist()
train_cutting_final = data["train_cutting_final"].tolist()
test_cutting_orig = data["test_cutting_orig"].tolist()
test_cutting_final = data["test_cutting_final"].tolist()


In [6]:
# Compute Euclidean distances to all training samples
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

In [7]:
# Use first test series as query
query_ts_noisy = X_train_proc_noisy[0]
query_ts = X_train[0] 

In [8]:

m = len(query_ts)
n = len(query_ts_noisy)
l = 2
L = int(np.floor(min(np.ceil(l*m),n)/3))
# ps_distance_p(query_ts, X_train_proc[0], 2, 0.1, L, 3) 

In [16]:
def test_querying_ed_dtw(query_ts_idx, dist_method):
    query_ts = X_train_proc_noisy[query_ts_idx]
    query_ts_original = X_train[query_ts_idx]
    if dist_method == "dtw":
        distances = np.array([tsln_dtw(query_ts, x) for x in X_train])
    elif dist_method == "psdtw":
        results = [ps_distance_p(query_ts, x, 2, 0.1, L, 3) for x in X_train[:5]]
        distances = np.array([r[0] for r in results])
        cuts = [r[1] for r in results]
        iterations = np.array([r[2] for r in results])  
    else:
        raise ValueError("Must be either dtw or psdtw")
    # Find best match
    best_idx = np.argmin(distances)
    best_match = X_train[best_idx]

    print(f"Best match index: {best_idx}")
    print(f"Distance: {distances[best_idx]:.4f}")
    plt.plot(query_ts, label="Query", linewidth=2, color= "red")
    plt.plot(query_ts_original, label="Original version of Query", linestyle="--", color="green")
    plt.plot(best_match, label="Best Match", linestyle="--", color="blue")

    plt.legend()
    plt.title("1-NN Query (Euclidean Distance)")
    plt.show()



In [17]:
for i in range(0, 6):
    test_querying_ed_dtw(i, "psdtw")


Total iterations: 136292

Total time: 14.18 seconds


KeyboardInterrupt: 

In [47]:
results0_no_prune = [ps_distance_p_without_prune(X_train_proc[0], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_dummy) for x in X_train]
np.savez("../results/results0_no_prune.npz", results=np.array(results0_no_prune, dtype=object))

Total iterations: 224885

Total time: 24.67 seconds
Total iterations: 219366

Total time: 22.80 seconds
Total iterations: 218925

Total time: 22.65 seconds
Total iterations: 215734

Total time: 22.37 seconds
Total iterations: 218750

Total time: 22.63 seconds
Total iterations: 209487

Total time: 21.60 seconds
Total iterations: 342120

Total time: 35.02 seconds
Total iterations: 333125

Total time: 33.35 seconds
Total iterations: 315655

Total time: 32.22 seconds
Total iterations: 256681

Total time: 26.43 seconds
Total iterations: 237861

Total time: 24.51 seconds
Total iterations: 261662

Total time: 26.84 seconds
Total iterations: 277699

Total time: 28.51 seconds
Total iterations: 248408

Total time: 25.84 seconds
Total iterations: 227854

Total time: 23.70 seconds
Total iterations: 250364

Total time: 26.08 seconds
Total iterations: 217568

Total time: 22.98 seconds
Total iterations: 224788

Total time: 23.35 seconds
Total iterations: 250369

Total time: 26.62 seconds
Total iterat

In [60]:
iterations = np.array([r[2] for r in results0_no_prune])    

In [61]:
b= sum(iterations)
b

np.int64(13082814)

In [57]:
iterations = np.array([r[2] for r in results0])    

In [58]:
a= sum(iterations)
a

np.int64(7107385)

In [63]:
(b-a)/b

np.float64(0.45673881781090825)

In [10]:
results0 = [ps_distance_p(X_train_proc[0], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]

Total iterations: 133029

Total time: 22.08 seconds


KeyboardInterrupt: 

In [10]:
results0 = [ps_distance_p(X_train_proc[0], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
results1 = [ps_distance_p(X_train_proc[1], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
results2 = [ps_distance_p(X_train_proc[2], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]


Total iterations: 133029

Total time: 22.27 seconds
Total iterations: 129271

Total time: 21.83 seconds
Total iterations: 125679

Total time: 21.48 seconds
Total iterations: 122577

Total time: 21.18 seconds
Total iterations: 124093

Total time: 21.30 seconds
Total iterations: 118940

Total time: 20.76 seconds
Total iterations: 192436

Total time: 28.57 seconds
Total iterations: 198078

Total time: 29.07 seconds
Total iterations: 157025

Total time: 24.89 seconds
Total iterations: 126761

Total time: 21.60 seconds
Total iterations: 121751

Total time: 21.15 seconds
Total iterations: 130210

Total time: 22.00 seconds
Total iterations: 147634

Total time: 23.88 seconds
Total iterations: 123931

Total time: 21.53 seconds
Total iterations: 133736

Total time: 23.33 seconds
Total iterations: 127290

Total time: 22.38 seconds
Total iterations: 122441

Total time: 21.89 seconds
Total iterations: 130877

Total time: 22.90 seconds
Total iterations: 125493

Total time: 21.70 seconds
Total iterat

In [None]:
np.savez("../results/no-noise-results0.npz", results=np.array(results0, dtype=object))
np.savez("../results/no-noise-results1.npz", results=np.array(results1, dtype=object))
np.savez("../results/no-noise-results2.npz", results=np.array(results2, dtype=object))

In [15]:
results3 = [ps_distance_p(X_train_proc[3], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
results4 = [ps_distance_p(X_train_proc[4], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
np.savez("../results/no-noise-results3.npz", results=np.array(results3, dtype=object))
np.savez("../results/no-noise-results4.npz", results=np.array(results4, dtype=object))

Total iterations: 135972

Total time: 22.51 seconds
Total iterations: 119800

Total time: 20.79 seconds
Total iterations: 118873

Total time: 20.68 seconds
Total iterations: 110421

Total time: 19.81 seconds
Total iterations: 102862

Total time: 19.04 seconds
Total iterations: 105362

Total time: 19.36 seconds
Total iterations: 189146

Total time: 28.22 seconds
Total iterations: 180896

Total time: 27.34 seconds
Total iterations: 151609

Total time: 26.00 seconds
Total iterations: 140574

Total time: 23.15 seconds
Total iterations: 118941

Total time: 20.82 seconds
Total iterations: 143901

Total time: 23.60 seconds
Total iterations: 165640

Total time: 25.77 seconds
Total iterations: 137255

Total time: 22.81 seconds
Total iterations: 124207

Total time: 21.73 seconds
Total iterations: 139335

Total time: 23.16 seconds
Total iterations: 121475

Total time: 21.12 seconds
Total iterations: 128002

Total time: 21.89 seconds
Total iterations: 136767

Total time: 22.90 seconds
Total iterat

In [69]:
np.savez("../results/no-noise-results0_lb-shen.npz", results=np.array(results0, dtype=object))

In [None]:
results0 = results2
np.savez("../results/results0.npz", results=np.array(results0, dtype=object))


In [None]:
results1 = [ps_distance_p(X_train_proc_noisy[1], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
np.savez("../results/results1.npz", results=np.array(results0, dtype=object))

Total iterations: 152466

Total time: 24.15 seconds
Total iterations: 139532

Total time: 22.78 seconds
Total iterations: 147373

Total time: 23.53 seconds
Total iterations: 140806

Total time: 22.82 seconds
Total iterations: 125768

Total time: 21.26 seconds
Total iterations: 132377

Total time: 21.85 seconds
Total iterations: 219917

Total time: 31.28 seconds
Total iterations: 227713

Total time: 32.01 seconds
Total iterations: 159271

Total time: 25.05 seconds
Total iterations: 169541

Total time: 26.63 seconds
Total iterations: 148072

Total time: 23.70 seconds
Total iterations: 171168

Total time: 26.09 seconds
Total iterations: 197002

Total time: 28.68 seconds
Total iterations: 166770

Total time: 25.59 seconds
Total iterations: 140041

Total time: 22.69 seconds
Total iterations: 168435

Total time: 25.74 seconds
Total iterations: 148758

Total time: 24.62 seconds
Total iterations: 144654

Total time: 23.18 seconds
Total iterations: 165144

Total time: 25.67 seconds
Total iterat

In [None]:
# results2 = [ps_distance_p(X_train_proc[2], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
# # np.savez("../results/results2.npz", results=np.array(results2, dtype=object))
# results3 = [ps_distance_p(X_train_proc_noisy[3], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
# np.savez("../results/results3.npz", results=np.array(results3, dtype=object))
# results4 = [ps_distance_p(X_train_proc_noisy[4], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_kim_fl) for x in X_train]
# np.savez("../results/results4.npz", results=np.array(results4, dtype=object))
# # results0 = [ps_distance_p(X_train_proc_noisy[0], x, 2, 0.1, L, 3, distance_method='dtw', lower_bound_method=lb_shen) for x in X_train]



NameError: name 'X_train' is not defined

In [66]:
distances = np.array([r[0] for r in results0])
cuts = [r[1] for r in results0]
iterations = np.array([r[2] for r in results0])  

In [None]:
np.savez("../data/results.npz", results=np.array(results, dtype=object))


In [None]:
# np.savez("../data/results.npz", results=results)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (50, 3) + inhomogeneous part.

In [None]:
# np.savez("../data/results.npz",
    # results = results)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (50, 3) + inhomogeneous part.

In [67]:
best_idx = np.argmin(distances)
best_idx

np.int64(0)

In [68]:
print(np.sum(iterations))



4984336


In [None]:
loaded = np.load("../results/results1.npz", allow_pickle=True)
results = loaded["results"]

In [18]:
distances = np.array([r[0] for r in results4])
# cuts = [r[1] for r in results]
# iterations = np.array([r[2] for r in results])  
best_idx = np.argmin(distances)
best_idx

np.int64(4)

np.int64(0)

In [None]:
distances

array([ 1.25034484,  1.29817677,  1.64221159,  1.51172763,  1.62919089,
        1.5349059 ,  7.02067051, 11.1848986 ,  5.24360274,  4.16472302,
        3.03769362,  3.92132728,  4.42737663,  3.92668811,  1.6041911 ,
        3.65408967,  1.77893272,  1.36668177,  3.57454898,  5.89536633,
        2.35162312,  3.30358593,  3.83719064, 10.4894105 ,  1.76833791,
        3.93123432,  3.9244545 ,  3.76499981,  5.64646358,  6.84661703,
        2.84181071,  7.5659781 ,  4.60589786,  1.59839608,  1.52662152,
        1.69022669,  6.35265813,  1.70249262, 10.2323002 ,  6.1913796 ,
        1.35151151,  4.0281034 ,  3.24874207,  3.23798151,  5.8800925 ,
        1.46031656,  1.67935595,  1.49279246,  1.43148067,  8.23074155])