In [0]:
# Referred: https://www.cs.ucr.edu/~eamonn/SAX.pdf
import statistics
import string
import scipy.stats
import numpy as np

In [0]:
# equi-probable areas for gaussian curve using statistical lookup table
def lookup_for_equi_probable_regions(alphabet_size):
    regions = np.arange(0, alphabet_size, 1) / alphabet_size
    return scipy.stats.norm.ppf(regions)


In [0]:
def z_normalize(time_series):
    tsMean = statistics.mean(time_series)
    tsDeviation = statistics.stdev(time_series)

    normalized_value = (time_series - tsMean) / tsDeviation
    return normalized_value


In [0]:
# piecewise aggregate approximation - ts divided into segments and then each segment replace by avg of its data points
def paa_calculation(timeSeries, word_length):
    
    try:
        assert timeSeries.shape[0] != word_length
    except AssertionError as e:
        return np.copy(timeSeries)
    else:
        # time series can be divided equally
        if timeSeries.shape[0] % word_length == 0:
            words = np.split(timeSeries, word_length)
            res = []
            # find mean of every segment
            for word in words:
                res.append(word.mean())

            res = np.array(
                [mean_value for item in res for mean_value in
                 [item.mean()] * item.shape[0]]
            )

            # cannot be divided equally
        else:
            space_size = np.arange(0, timeSeries.shape[0] * word_length - 1) 
           
            output_indx = space_size // timeSeries.shape[0]
           
            input_indx = space_size // word_length
           
            unique, unique_indices = np.unique(output_indx, return_counts=True)

            #print("Unique", unique)
            #print("Unique_indices", unique_indices)

            res = []
            for indices in np.split(input_indx, unique_indices.cumsum())[:-1]:
                avg_segment = timeSeries[indices].sum() / timeSeries.shape[0]
                res.append(avg_segment)

        return res


In [0]:
def partitioning_ts(paa_transformed_ts, alphabet_size):
    equal_segments = lookup_for_equi_probable_regions(alphabet_size)
    print("\n")
    print("Equal_segments", equal_segments)
    symbols = string.ascii_lowercase
    paa_ts_length = len(paa_transformed_ts)
    print('PAA Transformed time series', paa_transformed_ts)
    sax = list()
    for i in range(0, paa_ts_length):
        val = paa_transformed_ts[i]
        if val >= 0:
            j = len(equal_segments) - 1
            while (j > 0) and (equal_segments[j] >= val):
                j = j - 1
            sym = symbols[j]
        else:
            j = 1
            while j < len(equal_segments) and (equal_segments[j] <= val):
                j = j + 1
            sym = symbols[j-1]
        sax.append(sym)
    print("SAX representation for a given time series is: ", ''.join(sax))

In [0]:
def sax_time_series(timeSeries, alphabet_size, word_length):
    scipy.stats.norm(0, 1)  # probablity distribution
    normalized_ts = z_normalize(timeSeries)
    paa_transformed_ts = paa_calculation(normalized_ts, word_length)  # piecewise aggregate approximation
    partitioning_ts(paa_transformed_ts, alphabet_size)  # equal_area_partitioning


In [29]:
ts1 = np.array([2.02, 2.33, 2.99, 6.85, 9.20, 8.80, 7.50, 6.00, 5.85, 3.85, 4.85, 3.85, 2.22, 1.45, 1.34])
ts2 = np.array([3.12, 2.02, 2.33, 2.99, 6.85, 9.20, 8.80, 7.50, 6.00, 5.85, 3.85, 4.85, 3.85, 2.22, 1.45, 1.34])
ts3 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
ts4 = np.array([0.50, 1.29, 2.58, 3.83, 3.25, 4.25, 3.83, 5.63, 6.44, 6.25, 8.75, 8.83, 3.25, 0.75, 0.72])
ts5 = np.array([-2, 0, 2, 0, -1])

sax_time_series(ts1, 4, 9)
sax_time_series(ts2, 4, 9)
sax_time_series(ts3, 4, 3)
sax_time_series(ts4, 4, 9)
sax_time_series(ts5, 3, 5)




Equal_segments [       -inf -0.67448975  0.          0.67448975]
PAA Transformed time series [-0.93271676745998, -0.36990526917403094, 1.38367295226873, 1.3912478041972087, 0.6299751853851243, 0.016412179178370026, -0.059336340106414524, -0.8387886035468469, -1.1380794197431736]
SAX representation for a given time series is:  abddccbaa


Equal_segments [       -inf -0.67448975  0.          0.67448975]
PAA Transformed time series [-0.7273863763904705, -0.7821828167452193, 0.7028977017186578, 1.6894760902295998, 0.8675294849083679, 0.2310664055667061, -0.039278864325085455, -0.7295685355196418, -1.1356016723739393]
SAX representation for a given time series is:  aadddcbaa


Equal_segments [       -inf -0.67448975  0.          0.67448975]
PAA Transformed time series [-0.9333333333333332, 0.16666666666666666, 1.0999999999999999]
SAX representation for a given time series is:  acd


Equal_segments [       -inf -0.67448975  0.          0.67448975]
PAA Transformed time series [-1.1733474804