In [1]:
import pandas as pd

from utils import load_parquet_to_df_list, z_normalize
from approximation.paa import PAA
from discretization.sax.sax import SAX
from discretization.sax.extended_sax import ExtendedSAX
from information_embedding_cost.kullback_leibler_divergence import compute_raw_prob_distribution, compute_symbolic_prob_distribution, EquiWidth
from discretization.sax.symbol_mapping import IntervalNormMedian
from scipy.stats import entropy

In [2]:
# load time series dataset into dataframe and z-normalize it
path = "../../../0_data/UCRArchive_2018/SwedishLeaf/train"
df_list = load_parquet_to_df_list(path)

# concatenate all time series to one dataframe, because they all have the same length
df_norm = pd.concat([z_normalize(df["signal"].to_frame()) for df in df_list], axis=1).iloc[:, :2]
df_norm

Unnamed: 0,signal,signal.1
0,2.223633,1.879365
1,2.058567,1.733153
2,1.849589,1.579823
3,1.642328,1.402389
4,1.438219,1.257130
...,...,...
123,1.345195,1.162620
124,1.547908,1.314745
125,1.760490,1.500432
126,1.970141,1.657326


In [3]:
alphabet_size = 5

sax = SAX(alphabet_size)
e_sax = ExtendedSAX(alphabet_size)



In [4]:
interval_norm_median = IntervalNormMedian(alphabet_size)

In [5]:
window_size = 5

paa = PAA(window_size)
df_paa = paa.transform(df_norm)

In [6]:
df_sax = sax.transform(df_paa)
df_e_sax, df_sax_mean, df_sax_max, df_sax_min = e_sax.transform(df_paa, df_norm, window_size)

In [7]:
df_sax_inv = sax.inv_transform(df_sax, df_norm.shape[0], window_size, interval_norm_median)
df_e_sax_inv = e_sax.inv_transform(df_sax_mean, df_sax_max, df_sax_min, df_norm.shape[0], window_size, interval_norm_median)

In [8]:
equi_width = EquiWidth()

sax_raw = compute_raw_prob_distribution(df_norm, sax, equi_width)
e_sax_raw = compute_raw_prob_distribution(df_norm, e_sax, equi_width)

sax_inv = compute_raw_prob_distribution(df_sax_inv, sax, equi_width)
e_sax_inv = compute_raw_prob_distribution(df_e_sax_inv, sax, equi_width)

In [9]:
sax_entropy = entropy(sax_raw, sax_inv)
e_sax_entropy = entropy(e_sax_raw, e_sax_inv)
sax_entropy

array([0.07604773, 0.13980701])

In [10]:
e_sax_entropy

array([0.08929887, 0.1436536 ])