In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_probability as tfp

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.python.keras.layers import Input
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.python.keras.layers.merge import concatenate
from tensorflow.keras import Model
from tensorflow.keras import layers

from tensorflow.python.eager import context
from tensorflow.python.framework import ops

print(tf.__version__)
print(tfa.__version__)
print(tfp.__version__)

print("GPU is", "available" if tf.config.list_physical_devices("GPU") else "NOT AVAILABLE")

In [None]:
import os
import random
import numpy as np
import math
import scipy.stats
import pandas as pd
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import seaborn as sns


from os import walk

from functools import reduce

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.utils import shuffle

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics

from scipy.stats import norm
from scipy.interpolate import interp1d
from scipy.io import arff

from pyts.approximation import SymbolicAggregateApproximation

from modules import helper
from modules import transformer

Set random variables so that one run on the same computer always results in the same models

In [None]:
seed_value = 42
os.environ['PYTHONHASHSEED']=str(seed_value)# 2. Set `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)# 3. Set `numpy` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)
np.random.RandomState(seed_value)

np.random.seed(seed_value)

context.set_global_seed(seed_value)
ops.get_default_graph().seed = seed_value

#pip install tensorflow-determinism needed
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
np.random.seed(seed_value)

Loading and formarting the test and train data

In [None]:
# Select dataset!!!

#source: http://www.timeseriesclassification.com/description.php?Dataset=SyntheticControl
data_path_train = './Datasets/SyntheticControl/SyntheticControl_TRAIN.arff'
data_path_test = './Datasets/SyntheticControl/SyntheticControl_TEST.arff'
num_of_classes = 6
seqSize = 60

#source: http://www.timeseriesclassification.com/description.php?Dataset=ECG5000
#data_path_train = './Datasets/ecg5000/ECG5000_TRAIN.arff'
#data_path_test = './Datasets/ecg5000/ECG5000_TEST.arff'
#num_of_classes = 5
#seqSize = 140

#save preprocessing results and model weights
useSaves = False

#number of symbolics for SAX
n_bins = 5

#Load and formate data
data_train, meta_train = arff.loadarff(data_path_train)
data_test, meta_test = arff.loadarff(data_path_test)

data_train = np.array(data_train.tolist())
data_test = np.array(data_test.tolist())

y_trainy = data_train[:,-1].astype(int)
y_train = []
X_train = data_train[:,:-1]
y_testy_full = data_test[:,-1].astype(int)
y_testy = y_testy_full
y_test = []
X_test = data_test[:,:-1]

X_train, y_trainy = shuffle(X_train, y_trainy, random_state = seed_value)

for y in y_trainy:
    y_train_puffer = np.zeros(num_of_classes)
    y_train_puffer[y-1] = 1
    y_train.append(y_train_puffer)

for y in y_testy:
    y_puffer = np.zeros(num_of_classes)
    y_puffer[y-1] = 1
    y_test.append(y_puffer)

y_train = np.array(y_train)
y_train = y_train.astype(float)
y_test_full = np.array(y_test)
y_test_full = y_test_full.astype(float)
y_test = y_test_full  
y_test = y_test.astype(float)

print(X_test.shape)
print(X_train.shape)
print(y_test.shape)
print(y_train.shape)

X_test = X_test.astype(float)
X_train = X_train.astype(float)

# Initialize k-folds
kf = StratifiedKFold(5, shuffle=True, random_state=seed_value) # Use for StratifiedKFold classification
fold = 0

# Earlystopping callback
earlystop = EarlyStopping(monitor= 'val_loss', min_delta=0 , patience=50, verbose=0, mode='auto')

print('done')


Plots to exam the data

In [None]:
z = 1

for z in range(15):

    x = range(seqSize)
    y = X_train[z]

    plt.plot(x,y)
    plt.show()
    print(y_train[z])

In [None]:
#create the transformer model with given information
def createModel(splits, x_train, x_val, x_test, batchSize, num_of_classes, doMask= False, rate = 0, numOfAttentionLayers=2):    
        print(np.array(x_train1).shape)
        x_trains = np.dsplit(x_train, splits)
        print(np.array(x_trains).shape)
        #x_trains = np.array(x_trains).squeeze(axis=3)
        x_trainsBatch = np.dsplit(x_train[:batchSize], splits)

        x_tests = np.dsplit(x_test, splits)
        x_vals = np.dsplit(x_val, splits)
        maxLen = len(x_trains[0][0])
        print(maxLen)

        print(np.array(x_trains).shape)
        flattenArray = []
        inputShapes = []
        encClasses = []
        for i in range(len(x_trains)):
            if doMask:

                masky = createMask(x_trains[i], 6)
                x_part = np.array(x_trains[i])
                print(np.array(x_part).shape)
            
                seq_len1 = x_part.shape[1]

                sens1 = x_part.shape[2]
                input_shape1 = (seq_len1, sens1)
                left_input1 = tf.keras.layers.Input(input_shape1, name='input_ids')
                    
                mask = tf.keras.layers.Input(shape=masky.shape[1:], name='attention_mask')
                print('masky shape')
                print(masky.shape)
                print(mask)
            else: 
                mask = Input(1)
                x_part = np.array(x_trains[i])
                print(np.array(x_part).shape)
            
                seq_len1 = x_part.shape[1]

                sens1 = x_part.shape[2]
                input_shape1 = (seq_len1, sens1)
                left_input1 = Input(input_shape1)

            inputShapes.append(left_input1)
            if doMask:
                inputShapes.append(mask)

            encoded = left_input1
            input_vocab_size = 0
            
            #create transformer encoder layer 
            encClass1 = transformer.Encoder(numOfAttentionLayers, 16, 6, 6, 5000, rate=rate, input_vocab_size = input_vocab_size + 2, maxLen = maxLen, doMask=doMask, seed_value=seed_value)
                
            encClasses.append(encClass1)
            if doMask:
                encInput = encoded, mask
                print('ssssssssssssssssssssss')
                print(encoded.shape)
                print(mask.shape)
                print(encoded)
                print(mask)
                print('endddddddd')
            else:
                maskLayer = tf.keras.layers.Masking(mask_value=-2)
                encInput = maskLayer(encoded)
            enc1, attention, fullAttention = encClass1(encInput)
            flatten1 = Flatten()(enc1)
            flattenArray.append(flatten1)
        

        # Merge nets
        if splits == 1:
            merged = flattenArray[0]
        else:
            merged = concatenate(flattenArray)

        output = Dense(num_of_classes, activation = "sigmoid")(merged)
        
        # Create combined model
        wdcnnt_multi = Model(inputs=inputShapes,outputs=(output))
        print(wdcnnt_multi.summary())
        
        print(wdcnnt_multi.count_params())
        
        tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=seed_value)

        learning_rate = transformer.CustomSchedule(16)
        optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.99, 
                                     epsilon=1e-9)
        
        wdcnnt_multi.compile(optimizer=optimizer,
                    loss='mean_squared_error',
                    metrics=['accuracy'], run_eagerly=False)
        
        print('done')
        
        return wdcnnt_multi, inputShapes, x_trains, x_tests, x_vals

In [None]:
#abstracting dating with interpolation
def abstractData(data, earlyPredictorZ, takeAvg = True, heatLayer = 0, limit = 300):
    attentionFQ = []
    
    #split data so only #limit many variables are predicted at a time
    for bor in range(int(math.ceil(len(data)/limit))):
        if(takeAvg):
            attentionFQ.extend(earlyPredictorZ.predict([data[bor*limit:(bor+1)*limit]])[1])
        else: 
            attentionFQ.extend(earlyPredictorZ.predict([data[bor*limit:(bor+1)*limit]])[2])
    newX = []
    reduction = []
    for index in range(len(data)):        

            X_sax = np.array(data).squeeze()[index]
            X_ori = X_sax 

            if(takeAvg):
                heat = np.sum(np.max(attentionFQ[index], axis = 1), axis = 0)
            else:
                heat = np.sum(np.max(attentionFQ[heatLayer][index], axis = 1), axis = 0)
        
            #Do abstraction based on thresholds
            if doMax:
                maxHeat = np.max(heat)
                borderHeat = maxHeat/2
                borderHeat2 = maxHeat/3
            else:
                maxHeat = np.average(heat)
                borderHeat = maxHeat
                borderHeat2 = maxHeat/1.2

            #filter based on heat
            fitleredSet = []
            indexSet = []
            avgSet = []
            for h in range(len(heat)):
                if heat[h] > borderHeat:
                    if len(avgSet) != 0:
                        fitleredSet.append(np.median(avgSet))
                        indexSet.append(h - math.ceil(len(avgSet)/2))
                        avgSet = []
                    fitleredSet.append(X_ori[h])
                    indexSet.append(h)
                elif heat[h] > borderHeat2:
                    avgSet.append(X_ori[h])
                elif len(avgSet) != 0:
                    fitleredSet.append(np.median(avgSet))
                    indexSet.append(h - math.ceil(len(avgSet)/2))
                    avgSet = []
            
            if len(avgSet) != 0:
                fitleredSet.append(np.median(avgSet))
                indexSet.append(h - math.ceil(len(avgSet)/2))
                
            reduction.append(1 - len(fitleredSet)/len(heat))
            
            if(len(fitleredSet) < 2):
                fitleredSet.append(0)
                indexSet.append(len(heat))
            newXTemp = interp1d(indexSet, fitleredSet, bounds_error = False, fill_value = -2)
            newX.append([[x] for x in newXTemp(range(len(heat)))])

    newX = np.array(newX, dtype=np.float32)
    print(np.array(newX).shape)
    print(data.shape)
    return newX, reduction

#abstracting dating with no interpolation and added mask values
def abstractData2(data, earlyPredictorZ, takeAvg = True, heatLayer = 0, limit = 300):
    attentionFQ = []
    for bor in range(int(math.ceil(len(data)/limit))):
        if(takeAvg):
            attentionFQ.extend(earlyPredictorZ.predict([data[bor*limit:(bor+1)*limit]])[1])
        else: 
            attentionFQ.extend(earlyPredictorZ.predict([data[bor*limit:(bor+1)*limit]])[2])
    newX = []
    reduction = []
    for index in range(len(data)):        
            X_sax = np.array(data).squeeze()[index]
            X_ori = X_sax 

            if(takeAvg):
                heat = np.sum(np.max(attentionFQ[index], axis = 1), axis = 0)
            else:
                heat = np.sum(np.max(attentionFQ[heatLayer][index], axis = 1), axis = 0)
            if doMax:
                maxHeat = np.max(heat)
                borderHeat = maxHeat/2
                borderHeat2 = maxHeat/3
            else:
                maxHeat = np.average(heat)
                borderHeat = maxHeat
                borderHeat2 = maxHeat/1.2
            #print(heat)
            #print(maxHeat)

            
            #filter based on heat and add masked values
            fitleredSet = []
            indexSet = []
            avgSet = []
            for h in range(len(heat)):
                if heat[h] > borderHeat:
                    if len(avgSet) != 0:
                        fitleredSet[h - math.ceil(len(avgSet)/2)] = np.median(avgSet)
                        avgSet = []
                    fitleredSet.append(X_ori[h])
                elif heat[h] > borderHeat2:
                    fitleredSet.append(-2)
                    avgSet.append(X_ori[h])
                elif len(avgSet) != 0:
                    fitleredSet.append(-2)
                    fitleredSet[h - math.ceil(len(avgSet)/2)] = np.median(avgSet)

                    avgSet = []
                else:
                    fitleredSet.append(-2)
            if len(avgSet) != 0:
                fitleredSet[h - math.ceil(len(avgSet)/2)] = np.median(avgSet)

            reduction.append(1 - len([x for x in fitleredSet if x != -2])/len(heat))
            newX.append([[x] for x in fitleredSet])


    newX = np.array(newX, dtype=np.float32)
    print(np.array(newX).shape)
    print(data.shape)
    return newX, reduction

#preprocess data with StandardScaler and SAX
def preprocessData(x_train1, x_val, X_test, y_train1, y_val, y_test, y_trainy, y_testy, binNr):    
    
    x_test = X_test.copy()
    
    processedDataName = "./saves/"+str(data_path_train.split('/')[-1].split('.')[0])+ '-size' + str(seqSize) + '-bin' + str(binNr)
    fileExists = os.path.isfile(processedDataName +'.pkl')

    if(fileExists and useSaves):
        print('found file! Start loading file!')
        res = helper.load_obj(processedDataName)


        for index, v in np.ndenumerate(res):
            print(index)
            res = v
        res.keys()

        x_train1 = res['X_train']
        x_train1 = res['X_val']
        x_test = res['X_test']
        x_val = res['X_val']
        X_train_ori = res['X_train_ori']
        X_test_ori = res['X_test_ori']
        y_trainy = res['y_trainy']
        y_train1 = res['y_train']
        y_test = res['y_test']
        y_testy = res['y_testy']
        y_val = res['y_val']
        X_val_ori = res['X_val_ori']
        print(x_test.shape)
        print(x_train1.shape)
        print(y_test.shape)
        print(y_train.shape)
        
    else:
        print(x_test.shape)
        print(x_train1.shape)
        print(x_val.shape)
        print(y_test.shape)
        print(y_train.shape)
        trainShape = x_train1.shape
        valShape = x_val.shape
        testShape = x_test.shape
        
        scaler = StandardScaler()    
        scaler = scaler.fit(x_train1.reshape((-1,1)))
        X_train = scaler.transform(x_train1.reshape(-1, 1)).reshape(trainShape)
        x_val = scaler.transform(x_val.reshape(-1, 1)).reshape(valShape)
        x_test = scaler.transform(x_test.reshape(-1, 1)).reshape(testShape)

        X_test_ori = x_test.copy()
        X_val_ori = x_val.copy()
        X_train_ori = x_train1.copy()

        #Do SAX
        sax = SymbolicAggregateApproximation(n_bins=n_bins, strategy='uniform')
        sax.fit(x_train1)
        x_train1 = helper.symbolizeTrans(x_train1, sax)
        x_val = helper.symbolizeTrans(x_val, sax)
        x_test = helper.symbolizeTrans(x_test, sax)

            

        x_train1 = np.expand_dims(x_train1, axis=2)
        x_val = np.expand_dims(x_val, axis=2)
        x_test = np.expand_dims(x_test, axis=2)   
        X_test_ori = np.expand_dims(X_test_ori, axis=2)   
        X_train_ori = np.expand_dims(X_train_ori, axis=2) 
        X_val_ori = np.expand_dims(X_val_ori, axis=2) 

        print(x_test.shape)
        print(x_train1.shape)

        #save sax results to only calculate them once
        resultsSave = {
            'X_train':x_train1,
            'X_train_ori':X_train_ori,
            'X_test':x_test,
            'X_test_ori':X_test_ori,
            'X_val': x_val,
            'X_val_ori':X_val_ori,
            'y_trainy':y_trainy,
            'y_train':y_train1,
            'y_val': y_val,
            'y_test':y_test,
            'y_testy':y_testy
        }
        helper.save_obj(resultsSave, processedDataName)
    return x_train1, x_val, x_test, y_train1, y_val, y_test, X_train_ori, X_val_ori, X_test_ori, y_trainy, y_testy

In [None]:
#get save weight names! Depends on other set variables
def getWeightName(learning = True):
    baseName = "./saves/weights-" + str(data_path_train.split('/')[-1].split('.')[0]) + '-size' + str(seqSize) + '-threshold' + maxString + '-input' + abstractionString + '-fold' + str(fold)
    if learning:
        return baseName + '-learning.tf'
    else:
        return baseName + '.tf'

#do training with abstraction level = abstraction
def doAbstractedTraining(trainD, valD, testD, abstraction = 0, earlyPredictorZ = None, takeAvg = True, rate=0, heatLayer = 0, numOfAttentionLayers = 1):
    
    #Interpolation abstraction
    if abstraction == 2 and earlyPredictorZ != None:
        newTrain, trainReduction = abstractData(trainD, earlyPredictorZ, takeAvg = takeAvg, heatLayer = heatLayer)
        newVal, valReduction = abstractData(valD, earlyPredictorZ, takeAvg = takeAvg, heatLayer = heatLayer)
        newTest, testReduction = abstractData(testD, earlyPredictorZ, takeAvg = takeAvg, heatLayer = heatLayer)
    #Mask abstraction
    elif abstraction == 3 and earlyPredictorZ != None:
        newTrain, trainReduction = abstractData2(trainD, earlyPredictorZ, takeAvg = takeAvg, heatLayer = heatLayer)
        newVal, valReduction = abstractData2(valD, earlyPredictorZ, takeAvg = takeAvg, heatLayer = heatLayer)
        newTest, testReduction = abstractData2(testD, earlyPredictorZ, takeAvg = takeAvg, heatLayer = heatLayer)
    else:
        newTrain = trainD
        newVal = valD
        newTest = testD
        trainReduction = 0
        valReduction = 0
        testReduction = 0
    

    #calclulate shifts of the data
    valShifts = []
    smallerValSet = []
    for val in newVal:
        shifts = -1
        smallerSet = 2
        lastVal = val[0][0]
        rise = -3
        timeSkip = 1
        for v in val[1:]:
            v = v[0]

            if v == -2:
                timeSkip += 1
            elif helper.truncate((v - lastVal) / timeSkip) != rise:
                shifts += 1
                smallerSet += 1
                rise = helper.truncate((v - lastVal) / timeSkip)
                lastVal = v
                timeSkip = 1
            else:
                lastVal = v
        valShifts.append(shifts)
        smallerValSet.append(smallerSet)

    valShifts = np.average(valShifts)
    smallerValSet = np.average(smallerValSet)


    testShifts = []
    smallerTestSet = []
    print(newTest.shape)
    for val in newTest:
        shifts = -1
        smallerSet = 2
        lastVal = val[0][0]
        rise = -3
        timeSkip = 1
        for v in val[1:]:
            v = v[0]
            if v == -2:
                timeSkip += 1
            elif round(v - lastVal / timeSkip, 1) != rise:
                shifts += 1
                smallerSet += 1
                rise = round(v - lastVal / timeSkip, 1)
                lastVal = v
                timeSkip = 1
            else:
                lastVal = v
        testShifts.append(shifts)
        smallerTestSet.append(smallerSet)

    testShifts = np.average(testShifts)
    smallerTestSet = np.average(smallerTestSet)

    #create model and train
    n_model2, inputs2, x_trains2, x_tests2, x_vals2 = createModel(1, newTrain, newVal, newTest , BATCH, num_of_classes, rate=rate, doMask=False, numOfAttentionLayers=numOfAttentionLayers)
    weightsName = getWeightName(learning=True)
    saveBest2 = transformer.SaveBest(weightsName)
    print(np.array(x_trains2).shape)
    print(np.array(x_vals2).shape)

    x_trains_mask = x_trains2
    
    if (os.path.isfile(getWeightName(learning=False) + '.index') and useSaves):
        print('found weights to load! Won\'t train model!')
        n_model2.load_weights(getWeightName(learning=False))
    else:
        print('No weights found! Start training model!')
        n_model2.fit(x_trains_mask, y_train1, validation_data = (x_vals2, y_val) , epochs = 500, batch_size = BATCH, verbose=1, callbacks =[earlystop, saveBest2], shuffle = True)
        n_model2.load_weights(getWeightName(learning=True))
        n_model2.save_weights(getWeightName(learning=False), overwrite=True)
        
    earlyPredictor2 = tf.keras.Model(n_model2.inputs, n_model2.layers[2].output)

    # Predictions on the validation set
    predictions2 = n_model2.predict(x_vals2)
    attentionQ2 = earlyPredictor2.predict(x_vals2)

    print('############################')
    predictions2 = np.argmax(predictions2,axis=1)

    # Measure this fold's accuracy on validation set compared to actual labels
    y_compare = np.argmax(y_val, axis=1)
    val_score2 = metrics.accuracy_score(y_compare, predictions2)

    print(f"validation fold score with input {abstractionString}-{maxString}(accuracy): {val_score2}")

    # Predictions on the test set
    limit = 300
    test_predictions_loop2 = []
    for bor in range(int(math.ceil(len(x_tests2[0])/limit))):
        test_predictions_loop2.extend(n_model2.predict([x_tests2[0][bor*limit:(bor+1)*limit]]))

    # Append actual labels of the test set to empty list
    y_testyy = [y-1 for y in y_testy]
    test_predictions_loop2 = np.argmax(test_predictions_loop2, axis=1)

    # Measure this fold's accuracy on test set compared to actual labels
    test_score2 = metrics.accuracy_score(y_testyy, test_predictions_loop2)

    print(f"test fold score with input {abstractionString}-{maxString}(accuracy): {test_score2}")
    return val_score2, test_score2, predictions2, test_predictions_loop2, n_model2, inputs2, x_trains2, x_tests2, x_vals2, attentionQ2, smallerValSet, smallerTestSet, valShifts, testShifts, earlyPredictor2, newTrain, newVal, newTest, valReduction, testReduction

In [None]:
# Initialize loop for every kth fold

doAbstraction = True
#Attention layers count
numOfAttentionLayers = 2
#take Attention average
takeAvg = True
#drouput rate
rate=0.3

usedAbstraction = ['Ori', 'SAX', 'interpol', 'mask']    
usedThresholds = ['None', 'None', 'average', 'max']   
BATCH = 50
fold = 0

maxString = 'None'
accResults = [[],[],[],[],[],[]]
resultNames = ['Ori', 'SAX', 'avgInter', 'maxInter', 'avgMask', 'maxMask' ]

for train, test in kf.split(X_train, y_trainy): # Must specify y StratifiedKFold for 
    fold+=1
    print(f"Fold #{fold}")
    
    #preprocess data
    x_train1 = X_train[train]
    x_val = X_train[test]
    y_train1 = y_train[train]
    y_val = y_train[test]
    
    x_train1, x_val, x_test, y_train1, y_val, y_test, X_train_ori, X_val_ori, X_test_ori, y_trainy, y_testy = preprocessData(x_train1, x_val, X_test, y_train1, y_val, y_test, y_trainy, y_testy, fold)

    abstractionIndex = 0
    resultIndex = 0
    
    #ori data    
    abstractionString = usedAbstraction[abstractionIndex]    
    outOri = doAbstractedTraining(X_train_ori, X_val_ori, X_test_ori, abstractionIndex, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   

    accResults[resultIndex].append(outOri)
    resultIndex+=1
    
    # sax data    
    abstractionIndex += 1
    abstractionString = usedAbstraction[abstractionIndex]  
    outSax = doAbstractedTraining(x_train1, x_val, x_test, abstractionIndex, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
    accResults[resultIndex].append(outSax)
    resultIndex+=1
    
    if doAbstraction:
        earlyPredictor = outSax[-6]
        abstractionIndex += 1
        abstractionString = usedAbstraction[abstractionIndex]  
        doMax = False
        maxString = 'average'
        outAvgAverage = doAbstractedTraining(x_train1, x_val, x_test, abstractionIndex, earlyPredictorZ=earlyPredictor, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
        accResults[resultIndex].append(outAvgAverage)
        resultIndex+=1        
        doMax = True
        maxString = 'max'
        outMaxAverage = doAbstractedTraining(x_train1, x_val, x_test, abstractionIndex, earlyPredictorZ=earlyPredictor, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
        accResults[resultIndex].append(outMaxAverage)
        resultIndex+=1    
        
        abstractionIndex += 1
        abstractionString = usedAbstraction[abstractionIndex]  
        doMax = False
        maxString = 'average'
        outAvgMask = doAbstractedTraining(x_train1, x_val, x_test, abstractionIndex, earlyPredictorZ=earlyPredictor, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
        accResults[resultIndex].append(outAvgMask)
        resultIndex+=1         
        doMax = True
        maxString = 'max'
        outMaxMask = doAbstractedTraining(x_train1, x_val, x_test, abstractionIndex, earlyPredictorZ=earlyPredictor, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
        accResults[resultIndex].append(outMaxMask)
        resultIndex+=1     
    
rIndex = 0    
for results in accResults:
    resultName = resultNames[rIndex]
    rIndex += 1
    
    print('#########################################')
    print(resultName + ' Scores:')
    print('#########################################')
    print(f"Avg validation score (accuracy): {np.average([r[0] for r in results])}")   
    print(f"Avg test score (accuracy): {np.average([r[1] for r in results])}")
    print(f"Avg Val reduced by: {np.average([r[-2] for r in results])}")
    print(f"Avg Test reduced by: {np.average([r[-1] for r in results])}")
    print(f"Avg Interpol Val Size: {np.average([r[-10] for r in results])}")
    print(f"Avg Interpol Test Size: {np.average([r[-8] for r in results])}")
    print(f"Avg Val shifts: {np.average([r[-8] for r in results])}")
    print(f"Avg Test shifts: {np.average([r[-7] for r in results])}")

    if rIndex > 2:
        abstractValPred = np.concatenate([r[2] for r in results])
        oriValPred = np.concatenate([r[2] for r in accResults[1]])
        valSwitch = 0
        for i in range(len(abstractValPred)):
            if abstractValPred[i] != oriValPred[i]:
                valSwitch += 1
                              
        abstractTestPred = np.concatenate([r[3] for r in results])
        oriTestPred = np.concatenate([r[3] for r in accResults[1]])
        testSwitch = 0
        for i in range(len(abstractTestPred)):
            if abstractTestPred[i] != oriTestPred[i]:
                testSwitch += 1

        print(f"Val Switched: {valSwitch / len(abstractValPred)}")
        print(f"Test Switched: {testSwitch / len(abstractTestPred)}")

Attention visualisation

In [None]:
index = 3
heads = 6

sns.set()
predictions = outSax[2]
attentionQ = outSax[9]
print('###')
print(y_val[index])
print(predictions[index] + 1)
for head in range(heads):
    data_word = np.array(x_val).squeeze()[index]
    data_att = np.max(attentionQ[1][index][head], axis = 0)
    d = pd.DataFrame(data = data_att,index = data_word, columns=range(1))
    f, ax = plt.subplots(figsize=(60,3))
    d = d.transpose()
    sns.heatmap(d, vmin=0, vmax=0.1, ax=ax, cmap="OrRd")
    label_y = ax.get_yticklabels()
    plt.setp(label_y, rotation=360, horizontalalignment='right')
    label_x = ax.get_xticklabels()
    plt.setp(label_x, rotation=45, horizontalalignment='right')
    plt.tick_params(labelsize=26)
    plt.show()

Prepare attention abstraction data for visualisation

In [None]:
# Parameters
ran = 60

maxLen = 0
predictionsT = outAvgAverage[2]
attentionQT = outAvgAverage[9]
abstractedData = outAvgAverage[-4]

filteredTrain = []
filteredVal = []
for indi in range(len(abstractedData)):

    X_sax = np.array(abstractedData).squeeze()[indi]
    X_ori = X_sax

    heat = np.sum(np.max(attentionQT[1][indi], axis = 1), axis = 0)
    maxHeat = np.average(heat)
    borderHeat = maxHeat


    fitleredSet = []
    for h in range(len(heat)):
        if heat[h] > borderHeat:
            fitleredSet.append([X_ori[h]])
    filteredTrain.append(fitleredSet)
    if len(fitleredSet) > maxLen:
        maxLen = len(fitleredSet)
        
print("++++")
x_vals = x_val
for indi in range(len(x_vals[0])):

    X_sax = np.array(x_vals).squeeze()[indi]
    X_ori = X_sax

    heat = np.sum(np.max(attentionQ[1][indi], axis = 1), axis = 0)
    maxHeat = np.average(heat)
    borderHeat = maxHeat / 2


    fitleredSet = []
    for h in range(len(heat)):
        if heat[h] > borderHeat:
            fitleredSet.append([X_ori[h]])
    filteredVal.append(fitleredSet)
    if len(fitleredSet) > maxLen:
        maxLen = len(fitleredSet)
        
print("####")
print(maxLen)
print(np.array(filteredVal).shape)

Show attention abstraction for each head for a single input

In [None]:


ran = 60
# Parameters
X_sax = np.array(abstractedData).squeeze()[index]
X_ori = X_sax 

# Compute gaussian bins
bins = norm.ppf(np.linspace(0, 1, n_bins + 1)[1:-1])

# Show the results for the first time series
bottom_bool = X_sax
#heads = 6
for head in range(heads):

    plt.figure(figsize=(12, 2))
    plt.plot(X_ori, 'o--', label='Original')
    for x, y, s, bottom in zip(range(ran), X_ori, X_sax, bottom_bool):
        va = 'bottom' if bottom else 'top'
        plt.text(x, y, s, ha='center', va=va, fontsize=14, color='#ff7f0e')
    plt.hlines(bins, 0, ran, color='g', linestyles='--', linewidth=0.5)
    sax_legend = mlines.Line2D([], [], color='#ff7f0e', marker='*',
                               label='SAX - {0} bins'.format(n_bins))
    first_legend = plt.legend(handles=[sax_legend], fontsize=8, loc=(0.76, 0.86))
    ax = plt.gca().add_artist(first_legend)
    plt.legend(loc=(0.81, 0.93), fontsize=8)
    plt.xlabel('Time', fontsize=14)
    plt.title('Symbolic Aggregate approXimation', fontsize=16)
    heat = np.max(attentionQ[1][index][head], axis = 0)

    for i in range(len(heat)):
        plt.axvspan(i, i+1, color='red', alpha=heat[i]*6)
    plt.show()
    
    maxHeat = np.average(heat)
    borderHeat = maxHeat
    borderHeat2 = maxHeat/1.2

    fitleredSet = []
    timeSet = []
    avgSet = []
    for h in range(len(heat)):
        if heat[h] > borderHeat:
            if len(avgSet) != 0:
                fitleredSet.append(np.median(avgSet))
                timeSet.append(h - math.ceil(len(avgSet)/2))
                avgSet = []
            fitleredSet.append(X_ori[h])
            timeSet.append(h)
        elif heat[h] > borderHeat2:
            avgSet.append(X_ori[h])
            #avgSet = []
        elif len(avgSet) != 0:
            fitleredSet.append(np.median(avgSet))
            timeSet.append(h - math.ceil(len(avgSet)/2))
            avgSet = []
    
    plt.figure(figsize=(12, 2))
    plt.plot(timeSet, fitleredSet, 'o--', label='Original')
    for x, y, s, bottom in zip(range(len(heat)), fitleredSet, fitleredSet, bottom_bool):
        va = 'bottom' if bottom else 'top'
    plt.hlines(bins, 0, len(heat), color='g', linestyles='--', linewidth=0.5)
    sax_legend = mlines.Line2D([], [], color='#ff7f0e', marker='*',
                               label='SAX - {0} bins'.format(n_bins))
    first_legend = plt.legend(handles=[sax_legend], fontsize=8, loc=(0.76, 0.86))
    ax = plt.gca().add_artist(first_legend)
    plt.legend(loc=(0.81, 0.93), fontsize=8)
    plt.xlabel('Time', fontsize=14)
    plt.title('Symbolic Aggregate approXimation', fontsize=16)
    plt.show()
    
print('#################################################')
plt.figure(figsize=(12, 2))
plt.plot(X_ori, 'o--', label='Original')
for x, y, s, bottom in zip(range(ran), X_ori, X_sax, bottom_bool):
    va = 'bottom' if bottom else 'top'
    plt.text(x, y, s, ha='center', va=va, fontsize=14, color='#ff7f0e')
plt.hlines(bins, 0, ran, color='g', linestyles='--', linewidth=0.5)
sax_legend = mlines.Line2D([], [], color='#ff7f0e', marker='*',
                           label='SAX - {0} bins'.format(n_bins))
first_legend = plt.legend(handles=[sax_legend], fontsize=8, loc=(0.76, 0.86))
ax = plt.gca().add_artist(first_legend)
plt.legend(loc=(0.81, 0.93), fontsize=8)
plt.xlabel('Time', fontsize=14)
plt.title('Symbolic Aggregate approXimation', fontsize=16)
#heat = np.max(attentionQ[1][index][head], axis = 0)

heat = np.sum(np.max(attentionQ[1][index], axis = 1), axis = 0)
#heat = np.sum(np.max(attentionQ[2][1][index], axis = 1), axis = 0)
maxHeat = np.average(heat)
borderHeat = maxHeat
borderHeat2 = maxHeat/1.2
print(heat)
print(maxHeat)

#remove?
for i in range(len(heat)):
    plt.axvspan(i, i+1, color='red', alpha=heat[i]*1)
plt.show()

fitleredSet = []
timeSet = []
avgSet = []
for h in range(len(heat)):
    if heat[h] > borderHeat:
        if len(avgSet) != 0:
            fitleredSet.append(np.median(avgSet))
            timeSet.append(h - math.ceil(len(avgSet)/2))
            avgSet = []
        fitleredSet.append(X_ori[h])
        timeSet.append(h)
    elif heat[h] > borderHeat2:
        avgSet.append(X_ori[h])
        #avgSet = []
    elif len(avgSet) != 0:
        fitleredSet.append(np.median(avgSet))
        timeSet.append(h - math.ceil(len(avgSet)/2))
        avgSet = []

plt.figure(figsize=(12, 2))
plt.plot(timeSet, fitleredSet, 'o--', label='Original')
for x, y, s, bottom in zip(range(len(heat)), fitleredSet, fitleredSet, bottom_bool):
    va = 'bottom' if bottom else 'top'
plt.hlines(bins, 0, len(heat), color='g', linestyles='--', linewidth=0.5)
sax_legend = mlines.Line2D([], [], color='#ff7f0e', marker='*',
                           label='SAX - {0} bins'.format(n_bins))
first_legend = plt.legend(handles=[sax_legend], fontsize=8, loc=(0.76, 0.86))
ax = plt.gca().add_artist(first_legend)
plt.legend(loc=(0.81, 0.93), fontsize=8)
plt.xlabel('Time', fontsize=14)
plt.title('Symbolic Aggregate approXimation', fontsize=16)
plt.show()

Plots original data, sax data and abstracted data for each lable in goal

In [None]:
#for goal in [1,2,3,4,5]:
indexSafe = index

#theSet = outAvgAverage[-4]
theSet = outSax[7]
valSet = y_test #y_val
n_model = outSax[4]
earlyPredictor = outSax[-6]
doMax = False
maxString = 'average'
X_ori = outSax[7]

limit = 300
predictionsX = outSax[3]
attentionQX = []
for bor in range(int(math.ceil(len(theSet[0])/limit))):
    attentionQX.extend(earlyPredictor.predict([theSet[0][bor*limit:(bor+1)*limit]])[1])

counterLimit = 20
for goal in [6]:
    print(goal)

    counter = 0 
    for index in range(len(predictionsX)):
        if predictionsX[index] + 1 == goal and np.argmax(valSet[index]) + 1 == goal and counter <= counterLimit:
            counter += 1

            X_sax = np.array(theSet).squeeze()[index]
            X_ori = X_sax 

            plt.figure(figsize=(12, 2))
            plt.plot(X_test_ori[index], 'o--', label='Original')
            plt.title('Original Time Series', fontsize=16)
            plt.xlabel('Time', fontsize=14)
            #plt.savefig('./Bilder/' +  str(data_path_train.split('/')[-1].split('.')[0]) + '/interpolate/' + maxString + '/abstractions/' + str(valSet[index]) + '-p' + str(goal) + '-' + str(index) + 'oriForm.png', dpi = 300)
            plt.show()
            
            print('#################################################')
            print(np.argmax(valSet[index], axis=0))
            plt.figure(figsize=(12, 2))
            plt.plot(X_ori, 'o--', label='Original')
            for x, y, s, bottom in zip(range(len(X_ori)), X_ori, X_sax, bottom_bool):
                va = 'bottom' if bottom else 'top'
            plt.hlines(bins, 0, ran, color='g', linestyles='--', linewidth=0.5)

            plt.xlabel('Time', fontsize=14)
            plt.title('Symbolic Time Series', fontsize=16)


            heat = np.sum(np.max(attentionQX[index], axis = 1), axis = 0)
            if doMax:
                maxHeat = np.max(heat)
                borderHeat = maxHeat/2
                borderHeat2 = maxHeat/3
            else:
                maxHeat = np.average(heat)
                borderHeat = maxHeat
                borderHeat2 = maxHeat/1.2

            #plt.savefig('./Bilder/' +  str(data_path_train.split('/')[-1].split('.')[0]) + '/interpolate/' + maxString + '/abstractions/' + str(valSet[index]) + '-p' + str(goal) + '-' + str(index) + 'saxForm.png', dpi = 300)
            plt.show()
            
            data_att = heat
            #d = pd.DataFrame(data = data_att,index = data_word, columns=data_word)
            d = pd.DataFrame(data = data_att,index = data_word, columns=range(1))
            f, ax = plt.subplots(figsize=(60,3))
            d = d.transpose()
            sns.heatmap(d, vmin=0, vmax=0.3, ax=ax, cmap="OrRd")
            label_y = ax.get_yticklabels()
            plt.setp(label_y, rotation=360, horizontalalignment='right')
            label_x = ax.get_xticklabels()
            plt.setp(label_x, rotation=45, horizontalalignment='right')
            plt.tick_params(labelsize=26)
            #plt.savefig('./Bilder/' +  str(data_path_train.split('/')[-1].split('.')[0]) + '/interpolate/' + maxString + '/abstractions/' + str(valSet[index]) + '-p' + str(goal) + '-' + str(index) + 'heatForm.png', dpi = 300)
            plt.show()

            fitleredSet = []
            timeSet = []
            avgSet = []
            for h in range(len(heat)):
                if heat[h] > borderHeat:
                    if len(avgSet) != 0:
                        fitleredSet.append(np.median(avgSet))
                        timeSet.append(h - math.ceil(len(avgSet)/2))
                        avgSet = []
                    fitleredSet.append(X_ori[h])
                    timeSet.append(h)
                elif heat[h] > borderHeat2:
                    avgSet.append(X_ori[h])
                    #avgSet = []
                elif len(avgSet) != 0:
                    fitleredSet.append(np.median(avgSet))
                    timeSet.append(h - math.ceil(len(avgSet)/2))
                    avgSet = []

            #print(fitleredSet)

            plt.figure(figsize=(12, 2))
            plt.plot(timeSet, fitleredSet, 'o--', label='Original')
            for x, y, s, bottom in zip(range(len(heat)), fitleredSet, fitleredSet, bottom_bool):
                va = 'bottom' if bottom else 'top'
            plt.hlines(bins, 0, len(heat), color='g', linestyles='--', linewidth=0.5)
            plt.xlabel('Time', fontsize=14)
            plt.title('Abstracted Time Series', fontsize=16)
            
            #plt.savefig('./Bilder/' + str(data_path_train.split('/')[-1].split('.')[0]) + '/interpolate/' + maxString + '/abstractions/' + str(valSet[index]) + '-p' + str(goal) + '-' + str(index) + 'abstractForm.png', dpi = 300)
            plt.show()

index = indexSafe