In [1]:
import os
import json
import time
import numpy as np
from tqdm import tqdm 
from source.jabba import dtw
from source.ABBA import ABBA
from fABBA import digitize
from fABBA import inverse_digitize
from fABBA import inverse_compress
from sklearn.metrics import mean_squared_error as mse
from fABBA import fABBA
from scipy.io import arff
import matplotlib.pyplot as plt
from source.jabba import JABBA
from software.qabba import QABBA
from source.jabba.storage import compute_storage1, compute_storage2

import warnings
warnings.filterwarnings("ignore")

In [2]:
def preprocess(data):
    time_series = list()
    for ii in data[0]:
        database = list()
        for i in ii[0]:
            database.append(list(i))
        time_series.append(database)
    return np.nan_to_num(np.array(time_series).astype(np.float32))


def compute_dtw(ts, rts):
    dtw_list = []
    for ii in range(ts.shape[0]):
        uts =  np.array(ts[ii])[~np.isnan(ts[ii])]
        urts = np.array(rts[ii])[~np.isnan(rts[ii])]
        min_len = min(len(uts), len(urts))
        if min_len >=1:
            dtw_list.append(dtw(uts[:min_len], urts[:min_len]))
    return np.sum(dtw_list)

def compute_mse(ts, rts):
    mse_list = []
    for ii in range(ts.shape[0]):
        uts =  np.array(ts[ii])[~np.isnan(ts[ii])]
        urts = np.array(rts[ii])[~np.isnan(rts[ii])]
        min_len = min(len(uts), len(urts))
        if min_len >=1:
            mse_list.append(mse(uts[:min_len], urts[:min_len]))
    return np.sum(mse_list)
    
def test_info(multivariate_ts, tol=0.01):

    dtw_jabba1 = list()
    mse_jabba1 = list()
    timing_jabba1 = list()
    nm_symbols_jabba1 = list()
    
    dtw_jabba2 = list()
    mse_jabba2 = list()
    timing_jabba2 = list()
    nm_symbols_jabba2 = list()
    
    dtw_qabba1 = list()
    mse_qabba1 = list()
    timing_qabba1 = list()
    nm_symbols_qabba1 = list()
    
    dtw_qabba2 = list()
    mse_qabba2 = list()
    timing_qabba2 = list()
    nm_symbols_qabba2 = list()
    
    digit_timing_jabba1 = list()
    digit_timing_jabba2 = list()
    digit_timing_qabba1 = list()
    digit_timing_qabba2 = list()
    
    storage_jabba1 = list()
    storage_jabba2 = list()
    storage_qabba1 = list()
    storage_qabba2 = list()
    
    for i in tqdm(range(multivariate_ts.shape[0])):
        mu = multivariate_ts[i].mean(axis=1)
        std = multivariate_ts[i].std(axis=1)
        norm_ts = ((multivariate_ts[i].T - mu)/ std).T
        
        st_comp = time.time()
        jabba1 = JABBA(tol=tol, init="agg", alpha=0.1, auto_digitize=False, verbose=0)
        collect_pieces = jabba1.parallel_compress(norm_ts, n_jobs=multivariate_ts.shape[1]) 
        et_comp = time.time()
        
        st_digit = time.time()
        symbols_jabba1 = jabba1.digitize(norm_ts, collect_pieces)
        et_digit = time.time()
        digit_timing_jabba1 = et_digit - st_digit
        
        storage_jabba1.append(compute_storage1(jabba1.parameters.centers, 
                                               np.sum([len(j) for j in symbols_jabba1]),
                                               len(jabba1.start_set),
                                               bits_for_len=32, bits_for_inc=32, bits_for_ts=32
                                               ))

        timing_jabba1.append(et_digit + et_comp - st_digit - st_comp)
        reconst_jabba1 = jabba1.inverse_transform(symbols_jabba1, n_jobs=-1)
        nsb_jabba = jabba1.parameters.centers.shape[0] 
        nm_symbols_jabba1.append(nsb_jabba)

        jabba2 = JABBA(tol=tol, init="kmeans", r=1, k=nsb_jabba, verbose=0)

        st_digit = time.time()
        symbols_jabba2 = jabba2.digitize(norm_ts, collect_pieces)
        et_digit = time.time()
        digit_timing_jabba2 = et_digit - st_digit
        
        storage_jabba2.append(compute_storage1(jabba2.parameters.centers, 
                                               np.sum([len(j) for j in symbols_jabba2]),
                                               len(jabba1.start_set),
                                               bits_for_len=32, bits_for_inc=32, bits_for_ts=32
                                               ) 
                             )
        
        jabba2.return_series_univariate = False

        timing_jabba2.append(et_digit + et_comp - st_digit - st_comp)
        reconst_jabba2 = jabba2.inverse_transform(symbols_jabba2, 
                                                  jabba1.start_set,
                                                  n_jobs=-1)
        nm_symbols_jabba2.append(nsb_jabba)
        
        dtw_jabba1.append(compute_dtw(norm_ts, reconst_jabba1))
        dtw_jabba2.append(compute_dtw(norm_ts, reconst_jabba2))
        
        mse_jabba1.append(compute_mse(norm_ts, reconst_jabba1))
        mse_jabba2.append(compute_mse(norm_ts, reconst_jabba2))
        # print("jabba1:", storage_jabba1[-1], ";jabba2:", storage_jabba2[-1])
        # ----------------------------------------------------------- 
        st_comp = time.time()
        qabba1 = QABBA(tol=tol, init="agg", alpha=0.1, bits_for_len=8, bits_for_inc=12, auto_digitize=False, verbose=0)
        collect_pieces = qabba1.parallel_compress(norm_ts, n_jobs=multivariate_ts.shape[1]) 
        et_comp = time.time()
        
        st_digit = time.time()
        symbols_qabba1 = qabba1.digitize(norm_ts, collect_pieces)
        et_digit = time.time()
        digit_timing_qabba1 = et_digit - st_digit

        storage_qabba1.append(compute_storage2(qabba1.parameters.centers, 
                                               np.sum([len(j) for j in symbols_qabba1]),
                                               len(qabba1.start_set),
                                               bits_for_len=8, bits_for_inc=12, bits_for_sz=32, bits_for_ts=32
                                               ))
        
        timing_qabba1.append(et_digit + et_comp - st_digit - st_comp)
        reconst_qabba1 = qabba1.inverse_transform(symbols_qabba1, n_jobs=-1)
        nsb_qabba = qabba1.parameters.centers.shape[0] 
        nm_symbols_qabba1.append(nsb_qabba)

        qabba2 = QABBA(tol=tol, init="kmeans", k=nsb_qabba, bits_for_len=8, bits_for_inc=12, verbose=0)

        st_digit = time.time()
        symbols_qabba2 = qabba2.digitize(norm_ts, collect_pieces)
        et_digit = time.time()
        digit_timing_qabba2 = et_digit - st_digit

        storage_qabba2.append(compute_storage2(qabba2.parameters.centers, 
                                               np.sum([len(j) for j in symbols_qabba2]),
                                               len(qabba1.start_set),
                                               bits_for_len=8, bits_for_inc=12, bits_for_sz=32, bits_for_ts=32
                                               ))
        
        qabba2.return_series_univariate = False

        timing_qabba2.append(et_digit + et_comp - st_digit - st_comp)
        reconst_qabba2 = qabba2.inverse_transform(symbols_qabba2, 
                                                  qabba1.start_set,
                                                  n_jobs=-1)
        nm_symbols_qabba2.append(nsb_qabba)
        # print("qabba1:", storage_qabba1[-1], ";qabba2:", storage_qabba2[-1])
        dtw_qabba1.append(compute_dtw(norm_ts, reconst_qabba1))
        dtw_qabba2.append(compute_dtw(norm_ts, reconst_qabba2))
        
        mse_qabba1.append(compute_mse(norm_ts, reconst_qabba1))
        mse_qabba2.append(compute_mse(norm_ts, reconst_qabba2))
        
    return (storage_jabba1, dtw_jabba1, mse_jabba1, timing_jabba1, nm_symbols_jabba1, digit_timing_jabba1),\
           (storage_jabba2, dtw_jabba2, mse_jabba2, timing_jabba2, nm_symbols_jabba2, digit_timing_jabba2), \
           (storage_qabba1, dtw_qabba1, mse_qabba1, timing_qabba1, nm_symbols_qabba1, digit_timing_qabba1), \
           (storage_qabba2, dtw_qabba2, mse_qabba2, timing_qabba2, nm_symbols_qabba2, digit_timing_qabba2)

In [3]:
_dir = 'UEA2018'
test_data = ['AtrialFibrillation', 'BasicMotions', 'CharacterTrajectories', 'LSST',
             'Epilepsy', 'NATOPS', 'UWaveGestureLibrary', 'JapaneseVowels', 
            ]  # 'Heartbeat', 'StandWalkJump'

tols = [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01] # , 0.0001, 0.001

In [4]:
jabba1_info, jabba2_info, qabba1_info, qabba2_info = list(), list(), list(), list()
i = 0
for filename in test_data:
    data = arff.loadarff(os.path.join(_dir, os.path.join(filename, filename+'_TRAIN.arff')))
    multivariate_ts_train = preprocess(data)
    data = arff.loadarff(os.path.join(_dir, os.path.join(filename, filename+'_TEST.arff')))
    multivariate_ts_test = preprocess(data)
    multivariate_ts = np.vstack((multivariate_ts_train, multivariate_ts_test)).astype(np.float32)
    print(filename, multivariate_ts.shape)
    
    _jabba1_info, _jabba2_info, _qabba1_info, _qabba2_info = test_info(multivariate_ts, tol=tols[i])
    jabba1_info.append(_jabba1_info)
    jabba2_info.append(_jabba2_info)
    qabba1_info.append(_qabba1_info)
    qabba2_info.append(_qabba2_info)
    
    i = i + 1

AtrialFibrillation (30, 2, 640)


100%|██████████| 30/30 [30:56<00:00, 61.87s/it]


BasicMotions (80, 6, 100)


100%|██████████| 80/80 [11:32<00:00,  8.65s/it]


CharacterTrajectories (2858, 3, 182)


100%|██████████| 2858/2858 [8:37:53<00:00, 10.87s/it]  


LSST (4925, 6, 36)


100%|██████████| 4925/4925 [6:12:35<00:00,  4.54s/it]  


Epilepsy (275, 3, 206)


100%|██████████| 275/275 [41:38<00:00,  9.08s/it]


NATOPS (360, 24, 51)


100%|██████████| 360/360 [42:15<00:00,  7.04s/it]


UWaveGestureLibrary (440, 3, 315)


100%|██████████| 440/440 [4:42:31<00:00, 38.53s/it]  


JapaneseVowels (640, 12, 29)


100%|██████████| 640/640 [41:31<00:00,  3.89s/it]


In [5]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)


In [6]:
with open("results/jabba1_info2.json", 'w') as f:
    json.dump(jabba1_info, f, cls=NpEncoder, indent=2) 

with open("results/jabba2_info2.json", 'w') as f:
    json.dump(jabba2_info, f, cls=NpEncoder, indent=2) 
    
with open("results/qabba1_info2.json", 'w') as f:
    json.dump(qabba1_info, f, cls=NpEncoder, indent=2) 

with open("results/qabba2_info2.json", 'w') as f:
    json.dump(qabba2_info, f, cls=NpEncoder, indent=2) 