In [1]:
import os
import json
import time
import numpy as np
from tqdm import tqdm 
from source.jabba import dtw
from source.ABBA import ABBA
from fABBA import digitize
from fABBA import inverse_digitize
from fABBA import inverse_compress
from sklearn.metrics import mean_squared_error as mse
from fABBA import fABBA
from scipy.io import arff
import matplotlib.pyplot as plt
from fABBA import JABBA
from software.qabba import QABBA
from source.jabba.storage import compute_storage1, compute_storage2, compute_storage1_sep, compute_storage2_sep

import warnings
warnings.filterwarnings("ignore")

In [2]:
def preprocess(data):
    time_series = list()
    for ii in data[0]:
        database = list()
        for i in ii[0]:
            database.append(list(i))
        time_series.append(database)
    return np.nan_to_num(np.array(time_series).astype(np.float32))



def compute_err(ts, rts):
    return np.sum((ts - rts)**2) / np.prod(ts.shape)

    
    
def test_info(multivariate_ts, tol=0.01):

    err_jabba1 = list()
    timing_jabba1 = list()
    nm_symbols_jabba1 = list()
    
    err_jabba2 = list()
    timing_jabba2 = list()
    nm_symbols_jabba2 = list()
    
    err_qabba1 = list()
    timing_qabba1 = list()
    nm_symbols_qabba1 = list()
    
    err_qabba2 = list()
    timing_qabba2 = list()
    nm_symbols_qabba2 = list()
    
    digit_timing_jabba1 = list()
    digit_timing_jabba2 = list()
    digit_timing_qabba1 = list()
    digit_timing_qabba2 = list()
    
    storage_jabba1 = dict()
    storage_jabba2 = dict()
    storage_qabba1 = dict()
    storage_qabba2 = dict()
    
    storage_jabba1_str = list()
    storage_jabba2_str = list()
    storage_qabba1_str = list()
    storage_qabba2_str = list()
    
    storage_jabba1_center = list()
    storage_jabba2_center = list()
    storage_qabba1_center = list()
    storage_qabba2_center = list()
    
    storage_jabba1_others = list()
    storage_jabba2_others = list()
    storage_qabba1_others = list()
    storage_qabba2_others = list()
    
    st_comp = time.time()
    qabba1 = QABBA(tol=tol, init="agg", alpha=0.005, bits_for_len=8, bits_for_inc=12, auto_digitize=False, verbose=0)
    collect_pieces = qabba1.parallel_compress(multivariate_ts, n_jobs=multivariate_ts.shape[1]) 
    et_comp = time.time()

    st_digit = time.time()
    symbols_qabba1 = qabba1.digitize(multivariate_ts, collect_pieces)
    et_digit = time.time()
    digit_timing_qabba1 = et_digit - st_digit

    size_centers, size_chr, size_others = compute_storage2_sep(qabba1.parameters.centers, 
                                           np.sum([len(j) for j in symbols_qabba1]),
                                           len(qabba1.start_set),
                                           bits_for_len=8, bits_for_inc=12, bits_for_sz=32, bits_for_ts=32
                                           )
    
    storage_qabba1_center.append(size_centers)
    storage_qabba1_str.append(size_chr)
    storage_qabba1_others.append(size_others)
    
    timing_qabba1.append(et_digit + et_comp - st_digit - st_comp)
    reconst_qabba1 = qabba1.inverse_transform(symbols_qabba1, n_jobs=-1)
    nsb_qabba = qabba1.parameters.centers.shape[0] 
    nm_symbols_qabba1.append(nsb_qabba)

    qabba2 = QABBA(tol=tol, init="kmeans", k=nsb_qabba, bits_for_len=8, bits_for_inc=12, verbose=0)

    st_digit = time.time()
    symbols_qabba2 = qabba2.digitize(multivariate_ts, collect_pieces)
    et_digit = time.time()
    digit_timing_qabba2 = et_digit - st_digit

    size_centers, size_chr, size_others = compute_storage2_sep(qabba2.parameters.centers, 
                                           np.sum([len(j) for j in symbols_qabba2]),
                                           len(qabba1.start_set),
                                           bits_for_len=8, bits_for_inc=12, bits_for_sz=32, bits_for_ts=32
                                           )
    
    storage_qabba2_center.append(size_centers)
    storage_qabba2_str.append(size_chr)
    storage_qabba2_others.append(size_others)
    
    qabba2.return_series_univariate = False

    timing_qabba2.append(et_digit + et_comp - st_digit - st_comp)
    reconst_qabba2 = qabba2.inverse_transform(symbols_qabba2, 
                                              qabba1.start_set,
                                              n_jobs=-1)
    nm_symbols_qabba2.append(nsb_qabba)
    # print("qabba1:", storage_qabba1[-1], ";qabba2:", storage_qabba2[-1])

    err_qabba1.append(compute_err(multivariate_ts, qabba1.recast_shape(reconst_qabba1)))
    err_qabba2.append(compute_err(multivariate_ts, qabba1.recast_shape(reconst_qabba2)))
    
    # -----------------------------------------------------------------------------------
    
    st_comp = time.time()
    jabba1 = JABBA(tol=tol, init="agg", alpha=0.005, auto_digitize=False, verbose=0)
    collect_pieces = jabba1.parallel_compress(multivariate_ts, n_jobs=multivariate_ts.shape[1]) 
    et_comp = time.time()

    st_digit = time.time()
    symbols_jabba1 = jabba1.digitize(multivariate_ts, collect_pieces)
    et_digit = time.time()
    digit_timing_jabba1 = et_digit - st_digit

    size_centers, size_chr, size_others = compute_storage1_sep(jabba1.parameters.centers, 
                                           np.sum([len(j) for j in symbols_jabba1]),
                                           len(jabba1.start_set),
                                           bits_for_len=32, bits_for_inc=32, bits_for_ts=32
                                           )
    
    storage_jabba1_center.append(size_centers)
    storage_jabba1_str.append(size_chr)
    storage_jabba1_others.append(size_others)
    
    timing_jabba1.append(et_digit + et_comp - st_digit - st_comp)
    reconst_jabba1 = jabba1.inverse_transform(symbols_jabba1, n_jobs=-1)
    nsb_jabba = jabba1.parameters.centers.shape[0] 
    nm_symbols_jabba1.append(nsb_jabba)

    jabba2 = JABBA(tol=tol, init="kmeans", r=1, k=nsb_jabba, verbose=0)

    st_digit = time.time()
    symbols_jabba2 = jabba2.digitize(multivariate_ts, collect_pieces)
    et_digit = time.time()
    digit_timing_jabba2 = et_digit - st_digit

    size_centers, size_chr, size_others = compute_storage1_sep(jabba2.parameters.centers, 
                                           np.sum([len(j) for j in symbols_jabba2]),
                                           len(jabba1.start_set),
                                           bits_for_len=32, bits_for_inc=32, bits_for_ts=32
                                           ) 
    
    storage_jabba2_center.append(size_centers)
    storage_jabba2_str.append(size_chr)
    storage_jabba2_others.append(size_others)
    
    jabba2.return_series_univariate = False

    timing_jabba2.append(et_digit + et_comp - st_digit - st_comp)
    reconst_jabba2 = jabba2.inverse_transform(symbols_jabba2, 
                                              jabba1.start_set,
                                              n_jobs=-1)
    nm_symbols_jabba2.append(nsb_jabba)

    err_jabba1.append(compute_err(multivariate_ts, jabba1.recast_shape(reconst_jabba1)))
    err_jabba2.append(compute_err(multivariate_ts, jabba1.recast_shape(reconst_jabba2)))

    storage_jabba1['centers'] = storage_jabba1_center
    storage_jabba1['strs'] = storage_jabba1_str
    storage_jabba1['others'] = storage_jabba1_others
    
    storage_jabba2['centers'] = storage_jabba2_center
    storage_jabba2['strs'] = storage_jabba2_str
    storage_jabba2['others'] = storage_jabba2_others
    
    storage_qabba1['centers'] = storage_qabba1_center
    storage_qabba1['strs'] = storage_qabba1_str
    storage_qabba1['others'] = storage_qabba1_others
    
    storage_qabba2['centers'] = storage_qabba2_center
    storage_qabba2['strs'] = storage_qabba2_str
    storage_qabba2['others'] = storage_qabba2_others
    
    return (storage_jabba1, err_jabba1, timing_jabba1, nm_symbols_jabba1, digit_timing_jabba1),\
           (storage_jabba2, err_jabba2, timing_jabba2, nm_symbols_jabba2, digit_timing_jabba2), \
           (storage_qabba1, err_qabba1, timing_qabba1, nm_symbols_qabba1, digit_timing_qabba1), \
           (storage_qabba2, err_qabba2, timing_qabba2, nm_symbols_qabba2, digit_timing_qabba2)

In [3]:
_dir = 'UEA2018'
test_data = ['AtrialFibrillation', 'BasicMotions', 'CharacterTrajectories', 'Epilepsy','JapaneseVowels', 'LSST',
             'NATOPS', 'UWaveGestureLibrary', 
            ]  # 'Heartbeat', 'StandWalkJump'

tols = [0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005] # , 0.0001, 0.001

In [4]:
jabba1_info, jabba2_info, qabba1_info, qabba2_info = list(), list(), list(), list()
i = 0
for filename in test_data:
    data = arff.loadarff(os.path.join(_dir, os.path.join(filename, filename+'_TRAIN.arff')))
    multivariate_ts_train = preprocess(data)
    data = arff.loadarff(os.path.join(_dir, os.path.join(filename, filename+'_TEST.arff')))
    multivariate_ts_test = preprocess(data)
    multivariate_ts = np.vstack((multivariate_ts_train, multivariate_ts_test)).astype(np.float32)
    print(filename, multivariate_ts.shape)
    
    _jabba1_info, _jabba2_info, _qabba1_info, _qabba2_info = test_info(multivariate_ts, tol=tols[i])
    jabba1_info.append(_jabba1_info)
    jabba2_info.append(_jabba2_info)
    qabba1_info.append(_qabba1_info)
    qabba2_info.append(_qabba2_info)
    
    i = i + 1

AtrialFibrillation (30, 2, 640)
BasicMotions (80, 6, 100)
CharacterTrajectories (2858, 3, 182)
Epilepsy (275, 3, 206)
JapaneseVowels (640, 12, 29)
LSST (4925, 6, 36)
NATOPS (360, 24, 51)
UWaveGestureLibrary (440, 3, 315)


In [5]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)


In [6]:
with open("results/jabba1_info1.json", 'w') as f:
    json.dump(jabba1_info, f, cls=NpEncoder, indent=2) 

with open("results/jabba2_info1.json", 'w') as f:
    json.dump(jabba2_info, f, cls=NpEncoder, indent=2) 
    
with open("results/qabba1_info1.json", 'w') as f:
    json.dump(qabba1_info, f, cls=NpEncoder, indent=2) 

with open("results/qabba2_info1.json", 'w') as f:
    json.dump(qabba2_info, f, cls=NpEncoder, indent=2) 