In [1]:
import pandas as pd

from utils import load_parquet_to_df_list, z_normalize
from approximation.paa import PAA
from discretization.sax.one_d_sax import OneDSAX

In [2]:
# load time series dataset into dataframe and z-normalize it
path = "../../../0_data/UCRArchive_2018/SwedishLeaf/train"
df_list = load_parquet_to_df_list(path)

# concatenate all time series to one dataframe, because they all have the same length
df_norm = pd.concat([z_normalize(df["signal"].to_frame()) for df in df_list], axis=1).iloc[:, :2]
df_norm

Unnamed: 0,signal,signal.1
0,2.223633,1.879365
1,2.058567,1.733153
2,1.849589,1.579823
3,1.642328,1.402389
4,1.438219,1.257130
...,...,...
123,1.345195,1.162620
124,1.547908,1.314745
125,1.760490,1.500432
126,1.970141,1.657326


In [3]:
paa = PAA(window_size=6)
df_paa = paa.transform(df_norm)
df_paa

Unnamed: 0,signal,signal.1
0,1.750459,1.48998
1,0.794762,0.640083
2,-0.035251,0.123632
3,-0.608049,-0.129974
4,-0.858602,-0.398119
5,-1.093425,-1.070109
6,-1.054961,-1.690607
7,-0.671168,-1.148322
8,-0.020219,-0.117851
9,0.680383,0.908803


In [4]:
one_d_sax = OneDSAX(alphabet_size_slope=7)
df_one_d_sax = one_d_sax.transform(df_paa, df_norm, paa.window_size)
df_one_d_sax

Unnamed: 0,signal,signal.1
0,ca,ca
1,ca,ca
2,ba,bb
3,ac,bb
4,ab,bb
5,ad,aa
6,ae,ac
7,ag,ag
8,bg,bg
9,cg,cg


In [5]:
df_avg = df_one_d_sax.applymap(lambda symbols: symbols[0])
df_slope = df_one_d_sax.applymap(lambda symbols: symbols[1])
df_avg

Unnamed: 0,signal,signal.1
0,c,c
1,c,c
2,b,b
3,a,b
4,a,b
5,a,a
6,a,a
7,a,a
8,b,b
9,c,c


In [6]:
df_slope

Unnamed: 0,signal,signal.1
0,a,a
1,a,a
2,a,b
3,c,b
4,b,b
5,d,a
6,e,c
7,g,g
8,g,g
9,g,g
