In [1]:
import os

import numpy as np
import pandas as pd

from src.experiments.utils import development_datasets, univariate_equal_length

### 5.1 Hyperparameter optimization

In [2]:
hyperparameter_optimization_results = pd.read_csv(
    os.path.join("hyperparameter_optimization", "WEASELTransformerV2", "clacc_mean.csv"), index_col=0
)


df_hyperparameter_optimization = pd.concat((
    hyperparameter_optimization_results.rank(axis=1, ascending=False).mean(axis=0),
    (hyperparameter_optimization_results.rank(axis=1, ascending=False) == 1.0).sum(axis=0)
), axis=1)
df_hyperparameter_optimization.columns = ["Mean rank", "Winning count"]
df_hyperparameter_optimization["Mean rank"] = df_hyperparameter_optimization["Mean rank"].round(3)
df_hyperparameter_optimization = df_hyperparameter_optimization.sort_values("Mean rank")
df_hyperparameter_optimization

Unnamed: 0,Mean rank,Winning count
PCA(0.2),5.28,6
PCA(0.3),5.293,7
PCA(0.4),5.305,5
PCA(0.5),6.171,1
PCA(0.6),6.537,1
PCA(1.0),6.768,0
PCA(0.1),6.89,2
PCA(0.7),7.122,3
PCA(0.05),7.354,1
PCA(0.8),7.72,2


In [3]:
print(df_hyperparameter_optimization.to_latex())

\begin{tabular}{lrr}
\toprule
 & Mean rank & Winning count \\
\midrule
PCA(0.2) & 5.280000 & 6 \\
PCA(0.3) & 5.293000 & 7 \\
PCA(0.4) & 5.305000 & 5 \\
PCA(0.5) & 6.171000 & 1 \\
PCA(0.6) & 6.537000 & 1 \\
PCA(1.0) & 6.768000 & 0 \\
PCA(0.1) & 6.890000 & 2 \\
PCA(0.7) & 7.122000 & 3 \\
PCA(0.05) & 7.354000 & 1 \\
PCA(0.8) & 7.720000 & 2 \\
PCA(0.99) & 8.232000 & 1 \\
PCA(0.9) & 9.098000 & 2 \\
PCA(0.95) & 9.232000 & 2 \\
\bottomrule
\end{tabular}



### 5.3 Comparisons to other time series clustering algorithms with different setups

#### 5.3.1 RandomNet

In [4]:
development_datasets_randomnet = [
    "Strawberry",
    "SwedishLeaf",
    "Symbols",
    "SyntheticControl",
    "ToeSegmentation1",
    "ToeSegmentation2",
    "Trace",
    "TwoLeadECG",
    "TwoPatterns",
    "UMD",
    "UWaveGestureLibraryAll",
    "UWaveGestureLibraryX",
    "UWaveGestureLibraryY",
    "UWaveGestureLibraryZ",
    "Wafer",
    "Wine",
    "WordSynonyms",
    "Worms",
    "WormsTwoClass",
    "Yoga",
]

evaluation_datasets_clues_weasel_randomnet = np.setdiff1d(
    univariate_equal_length,
    np.union1d(development_datasets, development_datasets_randomnet)
)

evaluation_datasets_clues_weasel_randomnet.size

61

#### 5.3.2 Trained deep learning models

In [5]:
df = pd.read_csv(
    os.path.join("comparisons", "train-test", "deep_learning_results.csv")
)

def get_model_name(series):
    if isinstance(series['clustering_loss'], float):
        clustering_loss = "None"
    else:
        clustering_loss = series['clustering_loss']
    return f"{series['encoder_architecture']}_{series['encoder_loss']}_{clustering_loss}"

df["model"] = df.apply(get_model_name, axis=1)

In [6]:
df = df[df["dataset_name"].isin(univariate_equal_length)]

df_acc = pd.pivot_table(df, values="acc", index="dataset_name", columns="model")

best_deep_algorithms_acc = (
    df_acc.dropna(axis=1, thresh=92).mean(axis=0).nlargest(3).index.tolist()
    + df_acc.dropna(axis=1, thresh=92).rank(axis=1, ascending=False).mean(axis=0).nsmallest(3).index.tolist()
)
best_deep_algorithms_acc = list(set(best_deep_algorithms_acc))

best_deep_algorithms_acc_no_na = (
    df_acc.dropna(axis=1, how="any").mean(axis=0).nlargest(3).index.tolist()
    + df_acc.dropna(axis=1, how="any").rank(axis=1, ascending=False).mean(axis=0).nsmallest(3).index.tolist()
)
best_deep_algorithms_acc_no_na = list(set(best_deep_algorithms_acc_no_na))

df_nmi = pd.pivot_table(df, values="nmi", index="dataset_name", columns="model")

best_deep_algorithms_nmi = (
    df_nmi.dropna(axis=1, thresh=92).mean(axis=0).nlargest(3).index.tolist()
    + df_nmi.dropna(axis=1, thresh=92).rank(axis=1, ascending=False).mean(axis=0).nsmallest(3).index.tolist()
)
best_deep_algorithms_nmi = list(set(best_deep_algorithms_nmi))

best_deep_algorithms_nmi_no_na = (
    df_nmi.dropna(axis=1, how="any").mean(axis=0).nlargest(3).index.tolist()
    + df_nmi.dropna(axis=1, how="any").rank(axis=1, ascending=False).mean(axis=0).nsmallest(3).index.tolist()
)
best_deep_algorithms_nmi_no_na = list(set(best_deep_algorithms_nmi_no_na))

In [7]:
best_deep_algorithms_acc

['fcnn_tripletK10_None',
 'fcnn_joint_SDCN',
 'dilated_cnn_tripletKcombined_None',
 'res_cnn_tripletKcombined_None']

In [8]:
best_deep_algorithms_acc_no_na

['fcnn_tripletK10_None',
 'dilated_cnn_tripletK10_None',
 'fcnn_tripletK2_None',
 'dilated_cnn_reconstruction_None']

In [9]:
best_deep_algorithms_nmi

['fcnn_tripletK10_None',
 'dilated_cnn_reconstruction_None',
 'dilated_cnn_tripletKcombined_None',
 'fcnn_tripletKcombined_None',
 'fcnn_joint_SDCN']

In [10]:
best_deep_algorithms_nmi_no_na

['fcnn_tripletK10_None',
 'dilated_cnn_tripletK10_None',
 'dilated_cnn_reconstruction_None']

### 5.4 Runtimes

In [11]:
df_runtimes_clues_weasel = pd.read_csv(
    os.path.join("runtimes", "train-test", "CLUES-WEASEL", "runtimes.csv"), index_col=0
)
df_runtimes_kasba = pd.read_csv(
    os.path.join("runtimes", "train-test", "KASBA", "runtimes.csv"), index_col=0
)

df_runtimes = pd.concat([
    df_runtimes_clues_weasel.sum(axis=0).div(3600).agg(("min", "median", "mean", "max")).rename("CLUES-WEASEL"),
    df_runtimes_kasba.sum(axis=0).div(3600).agg(("min", "median", "mean", "max")).rename("KASBA"),
], axis=1).round(3)

df_runtimes

Unnamed: 0,CLUES-WEASEL,KASBA
min,1.326,3.25
median,1.331,3.476
mean,1.331,3.467
max,1.335,3.638


In [12]:
print(df_runtimes.to_latex())

\begin{tabular}{lrr}
\toprule
 & CLUES-WEASEL & KASBA \\
\midrule
min & 1.326000 & 3.250000 \\
median & 1.331000 & 3.476000 \\
mean & 1.331000 & 3.467000 \\
max & 1.335000 & 3.638000 \\
\bottomrule
\end{tabular}



### 5.5 Ablation experiments

#### 5.5.1 Other transformers

In [13]:
transformers = {
    "HydraTransformer": "Hydra",
    "MultiRocket": "MultiROCKET",
    "QUANTTransformer": "QUANT",
    "RandomDilatedShapeletTransform": "RDST",
    "TSFresh": "TSFresh",
    "WEASELTransformerV2": "WEASEL 2.0",
}

hyperparameter_optimization_other_transformers = {
    tnf_name: pd.read_csv(
        os.path.join("hyperparameter_optimization", tnf, "clacc_mean.csv"), index_col=0
    )
    for tnf, tnf_name in transformers.items()
}

df_hyperparameter_optimization_other_transformers = pd.concat([
    hyperparameter_optimization_other_transformers[tnf_name].rank(axis=1, ascending=False).mean(axis=0)
    for tnf_name in transformers.values()
], axis=1)

df_hyperparameter_optimization_other_transformers.columns = transformers.values()
df_hyperparameter_optimization_other_transformers = (
    df_hyperparameter_optimization_other_transformers.round(3)
)

In [14]:
df_hyperparameter_optimization_other_transformers

Unnamed: 0,Hydra,MultiROCKET,QUANT,RDST,TSFresh,WEASEL 2.0
PCA(1.0),6.341,5.768,6.793,6.268,5.622,6.768
PCA(0.99),6.195,5.878,6.805,6.488,6.854,8.232
PCA(0.95),5.854,5.61,6.524,6.012,6.561,9.232
PCA(0.9),6.463,5.549,6.732,6.183,8.073,9.098
PCA(0.8),6.22,5.768,5.866,6.341,6.463,7.72
PCA(0.7),5.72,6.5,5.476,5.195,6.122,7.122
PCA(0.6),6.561,6.622,6.195,6.012,6.878,6.537
PCA(0.5),7.171,8.451,6.902,6.256,7.317,6.171
PCA(0.4),8.256,8.073,8.146,7.951,6.232,5.305
PCA(0.3),8.146,8.195,7.854,8.5,7.427,5.293


In [15]:
df_hyperparameter_optimization_other_transformers.idxmin()

Hydra          PCA(0.7)
MultiROCKET    PCA(0.9)
QUANT          PCA(0.7)
RDST           PCA(0.7)
TSFresh        PCA(1.0)
WEASEL 2.0     PCA(0.2)
dtype: object

In [16]:
df_hyperparameter_optimization_other_transformers.min()

Hydra          5.720
MultiROCKET    5.549
QUANT          5.476
RDST           5.195
TSFresh        5.622
WEASEL 2.0     5.280
dtype: float64

#### 5.5.2 Lower maximum feature counts

In [17]:
max_feature_counts = [10000, 5000, 1000, 500, 200]

transformers = {
    f"WEASELTransformerV2({max_feature_count})": f"WEASEL 2.0 ({max_feature_count})"
    for max_feature_count in max_feature_counts
}

hyperparameter_optimization_max_feature_counts = {
    value: pd.read_csv(
        os.path.join(
            "ablation_experiments",
            "hyperparameter_optimization",
            key,
            "clacc_mean.csv"
        ), index_col=0
    )
    for key, value in transformers.items()
}

df_hyperparameter_optimization_max_feature_counts = pd.concat([
    hyperparameter_optimization_max_feature_counts[
        f"WEASEL 2.0 ({max_feature_count})"
    ].rank(axis=1, ascending=False).mean(axis=0)
    for max_feature_count in max_feature_counts
], axis=1)

df_hyperparameter_optimization_max_feature_counts.columns = [
    f"WEASEL 2.0 ({max_feature_count})" for max_feature_count in max_feature_counts
]
df_hyperparameter_optimization_max_feature_counts = (
    df_hyperparameter_optimization_max_feature_counts.round(3)
)

df_hyperparameter_optimization_max_feature_counts

Unnamed: 0,WEASEL 2.0 (10000),WEASEL 2.0 (5000),WEASEL 2.0 (1000),WEASEL 2.0 (500),WEASEL 2.0 (200)
PCA(1.0),7.122,6.695,7.78,8.0,7.72
PCA(0.99),7.683,7.085,7.537,8.098,7.463
PCA(0.95),8.902,9.22,7.866,8.28,7.89
PCA(0.9),9.11,8.524,8.61,7.988,7.878
PCA(0.8),7.622,8.402,7.683,6.963,7.268
PCA(0.7),7.378,6.939,6.232,6.61,6.28
PCA(0.6),6.451,6.585,6.39,6.39,6.134
PCA(0.5),6.0,5.707,5.659,5.415,5.988
PCA(0.4),5.061,5.476,5.305,5.341,5.707
PCA(0.3),5.11,5.098,5.049,4.5,5.159


In [18]:
df_hyperparameter_optimization_max_feature_counts.idxmin()

WEASEL 2.0 (10000)    PCA(0.4)
WEASEL 2.0 (5000)     PCA(0.3)
WEASEL 2.0 (1000)     PCA(0.3)
WEASEL 2.0 (500)      PCA(0.3)
WEASEL 2.0 (200)      PCA(0.3)
dtype: object

In [19]:
df_hyperparameter_optimization_max_feature_counts.min()

WEASEL 2.0 (10000)    5.061
WEASEL 2.0 (5000)     5.098
WEASEL 2.0 (1000)     5.049
WEASEL 2.0 (500)      4.500
WEASEL 2.0 (200)      5.159
dtype: float64