In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import iisignature
import torch 
import math

from sklearn.preprocessing import LabelEncoder
from tslearn.datasets import UCR_UEA_datasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit

from tslearn.svm import TimeSeriesSVC

import sigkernel

In [None]:
# define datasets
datasets = tqdm(['ArticularyWordRecognition', 'BasicMotions', 'Cricket', 'ERing', 'Epilepsy', 'FingerMovements', 
                 'HandMovementDirection', 'Handwriting', 'Heartbeat', 'Libras', 'NATOPS', 'RacketSports', 
                 'SelfRegulationSCP1', 'SelfRegulationSCP2', 'UWaveGestureLibrary'])

# define grid-search hyperparameters for SVC (common to all kernels)
svc_parameters = {'C': np.logspace(0, 4, 5), 'gamma': list(np.logspace(-4, 4, 9)) + ['auto']}
_scales = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1.]
_sigmas = [1e-3, 2.5e-3, 5e-3, 7.5e-3, 1e-2, 2.5e-2, 5e-2, 7.5e-2, 1e-1, 2.5e-1, 5e-1, 7.5e-1, 1., 5.]

# start grid-search
for name in datasets:
    datasets.set_description(f"dataset: {name}")

    # store classification results
    final = {}

    # grid-search for path-transforms (add-time, lead-lag)
    transforms = tqdm([(True,True), (False,True), (True,False), (False,False)], leave=False)
    for (at,ll) in transforms:
        transforms.set_description(f"add-time: {at}, lead-lag: {ll}")
        
        # grid-search on scales
        scales = tqdm(_scales, leave=False)
        for scale in scales:
            scales.set_description(f"scale: {scale}")

            # load data
            x_train, y_train, x_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(name)

            # standard-scale inputs
            x_train = TimeSeriesScalerMeanVariance().fit_transform(x_train)
            x_test = TimeSeriesScalerMeanVariance().fit_transform(x_test)

            # encode outputs as labels
            y_train = LabelEncoder().fit_transform(y_train)
            y_test = LabelEncoder().fit_transform(y_test)

            # path-transform
            x_train = sigkernel.transform(x_train, at=at, ll=ll, scale=scale)
            x_test = sigkernel.transform(x_test, at=at, ll=ll, scale=scale)

            # subsample every time steps if certain length is exceeded
            if x_train.shape[1]<100:
                subsample = 1
            elif x_train.shape[1]>=100 and x_train.shape[1]<500:
                subsample = 10
            elif x_train.shape[1]>=500 and x_train.shape[1]<1000:
                subsample = 20
            else:
                subsample = 30
            x_train = x_train[:,::subsample,:]
            x_test = x_test[:,::subsample,:]

            #==================================================================================
            # Linear, RBF and GAK kernels
            #==================================================================================
            # define standard kernels
            std_kernels = tqdm(['linear', 'rbf', 'gak'], leave=False)
            for ker in std_kernels:
                std_kernels.set_description(f"standard kernel: {ker}")

                # SVC tslearn estimator
                svc = TimeSeriesSVC(kernel=ker, decision_function_shape='ovo')
                svc_model = GridSearchCV(estimator=svc, param_grid=svc_parameters, cv=TimeSeriesSplit(n_splits=5), n_jobs=-1)
                svc_model.fit(x_train, y_train)
                score = svc_model.score(x_test, y_test)

                # store results
                final[(ker, f'add-time: {at}', f'lead-lag: {ll}', f'scale: {scale}')] = score

            #==================================================================================
            # Truncated signature kernels
            #==================================================================================
            # set max signature truncation
            dim  = x_train.shape[-1]
            if dim <= 4:
                max_depth = 7
            elif dim <= 6:
                max_depth = 6
            elif dim <= 8:
                max_depth = 5
            elif dim <= 10:
                max_depth = 4
            else:
                max_depth = 3
            
            # grid search on truncation levels
            depths = tqdm(range(2,max_depth), leave=False)
            for d in depths:
                depths.set_description(f"(truncated signature kernel) depth: {d}")

                # record best score
                best_score, best_scale = 0., 0.

                # truncated signatures
                sig_train = iisignature.sig(x_train, d)
                sig_test = iisignature.sig(x_test, d)

                # normalization
                sig_train = sigkernel.normalize(sig_train, x_train.shape[-1], d)
                sig_test = sigkernel.normalize(sig_test, x_test.shape[-1], d)

                # SVC tslearn estimator
                svc = TimeSeriesSVC(kernel='linear', decision_function_shape='ovo')
                svc_model = GridSearchCV(estimator=svc, param_grid=svc_parameters, cv=5, n_jobs=-1)
                svc_model.fit(sig_train, y_train)
                score = svc_model.score(sig_test, y_test)
                
                # store results
                final[('truncated signature kernel', f'depth: {d}', f'add-time: {at}', f'lead-lag: {ll}', f'scale: {scale}')] = score

            #==================================================================================
            # Signature PDE kernel
            #==================================================================================
            # move to cuda (if available)
            x_train = torch.tensor(x_train, dtype=torch.float64, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            x_test = torch.tensor(x_test, dtype=torch.float64, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

            # record best score
            best_score, best_sigma = 0., 0.

            # grid search over sigmas
            sigmas = tqdm(_sigmas, leave=False)
            for sigma in sigmas:
                sigmas.set_description(f"(signature PDE kernel) sigma: {sigma}")

                # define static kernel
                static_kernel = sigkernel.RBFKernel(sigma=sigma)

                # initialize corresponding signature PDE kernel
                signature_kernel = sigkernel.SigKernel(static_kernel, dyadic_order=0)

                # compute Gram matrix on train data
                G_train = signature_kernel.compute_Gram(x_train, x_train, sym=True).cpu().numpy()

                # compute Gram matrix on test data
                G_test = signature_kernel.compute_Gram(x_test, x_train, sym=False).cpu().numpy()

                # SVC sklearn estimator
                svc = TimeSeriesSVC(kernel='precomputed', decision_function_shape='ovo')
                svc_model = GridSearchCV(estimator=svc, param_grid=svc_parameters, cv=5, n_jobs=-1)
                svc_model.fit(G_train, y_train)

                # choose best model
                _score = svc_model.score(G_test, y_test)
                if _score > best_score:
                    best_score = _score
                    best_sigma = sigma

                del G_train
                del G_test
                torch.cuda.empty_cache()

            # store results
            final[('signature PDE kernel', f'add-time: {at}', f'lead-lag: {ll}', f'scale: {scale}', f'cv best sigma: {best_sigma}')] = best_score

    final_csv = pd.DataFrame.from_dict(final, orient='index').rename(columns={0:'accuracy (%)'})
    final_csv.to_csv(f'../results/_{name}.csv')

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]