In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_probability as tfp

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.python.keras.layers import Input
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.python.keras.layers.merge import concatenate
from tensorflow.keras import Model
from tensorflow.keras import layers

from tensorflow.python.eager import context
from tensorflow.python.framework import ops

print(tf.__version__)
print(tfa.__version__)
print(tfp.__version__)


print("GPU is", "available" if tf.config.list_physical_devices("GPU") else "NOT AVAILABLE")

In [None]:
import os
import random
import numpy as np
import math
import scipy.stats
import pandas as pd
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import seaborn as sns


from os import walk

from functools import reduce

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.utils import shuffle

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics

from scipy.stats import norm
from scipy.interpolate import interp1d
from scipy.io import arff

from pyts.approximation import SymbolicAggregateApproximation

from modules import helper
from modules import transformer

from collections import defaultdict

Set random variables so that one run on the same computer always results in the same models

In [None]:
seed_value = 42
os.environ['PYTHONHASHSEED']=str(seed_value)# 2. Set `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)# 3. Set `numpy` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)
np.random.RandomState(seed_value)

np.random.seed(seed_value)

context.set_global_seed(seed_value)
ops.get_default_graph().seed = seed_value

#pip install tensorflow-determinism needed
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
np.random.seed(seed_value)
#tf.experimental.numpy.random.seed(seed_value)

Loading and preprocessing the test and train data

Change parameters for different settings

In [None]:
#source: http://www.timeseriesclassification.com/description.php?Dataset=SyntheticControl
data_path_train = './Datasets/SyntheticControl/SyntheticControl_TRAIN.arff'
data_path_test = './Datasets/SyntheticControl/SyntheticControl_TEST.arff'
num_of_classes = 6
seqSize = 60
##source: http://www.timeseriesclassification.com/description.php?Dataset=ECG5000
#data_path_train = './Datasets/ecg5000/ECG5000_TRAIN.arff'
#data_path_test = './Datasets/ecg5000/ECG5000_TEST.arff'
#num_of_classes = 5
#seqSize = 140
##source: http://www.timeseriesclassification.com/description.php?Dataset=Plane
#data_path_train = './Datasets/Plane/Plane_TRAIN.arff'
#data_path_test = './Datasets/Plane/Plane_TEST.arff'
#num_of_classes = 7
#seqSize = 144
##source: http://www.timeseriesclassification.com/description.php?Dataset=PowerCons
#data_path_train = './Datasets/PowerCons/PowerCons_TRAIN.arff'
#data_path_test = './Datasets/PowerCons/PowerCons_TEST.arff'
#num_of_classes = 2
#seqSize = 144


#Use saved model weights in the save folder
useSaves = True

#number of symbolics for SAX
n_bins = 5

#number of folds
nrFolds = 5


#Load and formate data
data_train, meta_train = arff.loadarff(data_path_train)
data_test, meta_test = arff.loadarff(data_path_test)

data_train = np.array(data_train.tolist())
data_test = np.array(data_test.tolist())

y_trainy = data_train[:,-1].astype(int)
y_train = []
X_train = data_train[:,:-1]
y_testy_full = data_test[:,-1].astype(int)
y_testy = y_testy_full
y_test = []
X_test = data_test[:,:-1]

X_train, y_trainy = shuffle(X_train, y_trainy, random_state = seed_value)

for y in y_trainy:
    y_train_puffer = np.zeros(num_of_classes)
    y_train_puffer[y-1] = 1
    y_train.append(y_train_puffer)

for y in y_testy:
    y_puffer = np.zeros(num_of_classes)
    y_puffer[y-1] = 1
    y_test.append(y_puffer)

y_train = np.array(y_train)
y_train = y_train.astype(float)
y_test_full = np.array(y_test)
y_test_full = y_test_full.astype(float)
y_test = y_test_full  
y_test = y_test.astype(float)

print(X_test.shape)
print(X_train.shape)
print(y_test.shape)
print(y_train.shape)

X_test = X_test.astype(float)
X_train = X_train.astype(float)

# Initialize k-folds
kf = StratifiedKFold(nrFolds, shuffle=True, random_state=seed_value) # Use for StratifiedKFold classification
fold = 0

# Earlystopping callback
earlystop = EarlyStopping(monitor= 'val_loss', min_delta=0 , patience=70, verbose=0, mode='auto')

print('done')

Plots to exam the data

In [None]:
import datetime

z = 1

for z in range(8,20):

    x = range(seqSize)
    y = X_train[z]
    
    plt.plot(x,y)
    plt.show()
    print(y_train[z])

Methods for attention combination

In [None]:
def doCombiStep(step, field, axis):
    if(step == 'max'):
        return np.max(field, axis=axis)
    elif (step == 'sum'):
        return np.sum(field, axis=axis)
    

# creating all cohrence attention combinations
def makeAttention(outSax, x_train1, y_train1):

    #predicted lables
    attentionQ = outSax[9]
    print('2222222222222')
    print(len(attentionQ[2]))
    print(len(attentionQ[2][0]))
    print(len(attentionQ[2][0][0]))
    
    data_att = np.flip(np.array(attentionQ[2][0][0][5]), axis=0)
    
    if(order == 'lh'):
        axis1 = 0
        axis2 = 2
    elif(order == 'hl'):
        axis1 = 2
        axis2 = 0
    
    attentionQ[1] = doCombiStep(step1, attentionQ[2], axis1)
    attentionQ[1] = doCombiStep(step2, attentionQ[1], axis2) 
    
    #compared to original predictions
    #predictions = outSax[4].predict(outSax[6])
    #predictions = np.argmax(predictions,axis=1) +1 
    

    #true lables
    predictions = np.argmax(y_train1,axis=1) +1  


    #nestest dict for saves
    def nested_dict(n, type):
        if n == 1:
            return defaultdict(type)
        else:
            return defaultdict(lambda: nested_dict(n-1, type))

    #position counter
    rM = nested_dict(3, list)
    #attention sum at each point
    rMS = nested_dict(3, list)
    #relative average at each point + more side combinations
    rMA = nested_dict(3, list)


    #put together all train attention to from symbol x to symbol y representation
    z = 0
    for index in range(len(attentionQ[1])):

        data_word = np.array(x_train1).squeeze()[index]
        X_ori = data_word
        data_att = attentionQ[1][index]

        for i in range(len(data_att)):
            for j in range(len(data_att[i])):

                if(len(rM[predictions[index]][X_ori[i]][X_ori[j]]) is 0):
                    rM[predictions[index]][X_ori[i]][X_ori[j]] = np.zeros((len(data_att), len(data_att[i])))
                    rMS[predictions[index]][X_ori[i]][X_ori[j]] = np.zeros((len(data_att), len(data_att[i])))
                    rMA[predictions[index]][X_ori[i]][X_ori[j]] = np.zeros((len(data_att), len(data_att[i])))

                    if len(rMA[predictions[index]]['x'][X_ori[j]]) is 0:
                        rMA[predictions[index]]['x'][X_ori[j]] = np.zeros((len(data_att), len(data_att[i])))

                    if len(rMA[predictions[index]]['xAvg'][X_ori[j]]) is 0:
                        rMA[predictions[index]]['xAvg'][X_ori[j]] = np.zeros((len(data_att), len(data_att[i])))
                if data_att[i][j] != 0:
                    rM[predictions[index]][X_ori[i]][X_ori[j]][i][j] += 1           

                #sum FCAM
                rMS[predictions[index]][X_ori[i]][X_ori[j]][i][j] += data_att[i][j]
                #CRCAM Sum
                rMA[predictions[index]]['x'][X_ori[j]][i][j] += data_att[i][j]

    valuesA = [-1, -0.5, 0, 0.5, 1]
    for lable in rMA.keys():
        for toL in valuesA:

            for fromL in valuesA:
                for j in range(len(data_att[i])):
                    for i in range(len(data_att)): 
                        
                        #FCAM r. average
                        if rM[lable][fromL][toL][i][j] > 0:
                            rMA[lable][fromL][toL][i][j] = rMS[lable][fromL][toL][i][j] / float(rM[lable][fromL][toL][i][j])
                        else:
                            rMA[lable][fromL][toL][i][j] = rMS[lable][fromL][toL][i][j]

                        #CRCAM r. average
                        if rM[lable][fromL][toL][i][j] > 0:
                            rMA[lable]['xAvg'][toL][i][j] += rMS[lable][fromL][toL][i][j] / float(rM[lable][fromL][toL][i][j])
                        else:
                            rMA[lable]['xAvg'][toL][i][j] += rMS[lable][fromL][toL][i][j]

            #GTM max of sum                
            rMA[lable]['max'][toL] = np.max(rMA[lable]['x'][toL], axis=0) 
            #GTM average of sum         
            rMA[lable]['average'][toL] = np.mean(rMA[lable]['x'][toL], axis=0) 
            #GTM median of sum         
            rMA[lable]['median'][toL] = np.median(rMA[lable]['x'][toL], axis=0) 
            #GTM max of r.average          
            rMA[lable]['max+'][toL] = np.max(rMA[lable]['xAvg'][toL], axis=0)  
            #GTM average of r.average          
            rMA[lable]['average+'][toL] = np.mean(rMA[lable]['xAvg'][toL], axis=0)
            #GTM median of r.average         
            rMA[lable]['median+'][toL] = np.median(rMA[lable]['xAvg'][toL], axis=0) 
    print('done')

    return rMA, rMS

In [None]:
#validate the full coherence matrices 
def classFullAtt(rMG, x_test, y_testy, inputKeys):
    results = []
    predictResults = []
    allLableScores = []


    maxFound = False

    #sum maximum score
    maxScores = dict()
    for lable in rMG.keys():
        maxis = []
        for fromV in inputKeys:
                maxis.append(np.max(list(rMG[lable][float(fromV)].values()), axis=0))
        maxScores[lable]  = np.sum(np.max(maxis, axis=0))

    print('done summing')

    #sum normal score
    for ti in range(len(x_test)):
        trial = x_test[ti]
        lableScores = dict()
        
        for lable in rMG.keys():
            lableScores[lable] = 0

        for fromVi in range(len(trial)):
            fromV = trial[fromVi]
            for toVi in range(len(trial)):
                toV = trial[toVi]

                for lable in rMG.keys():
                    lableScores[lable] += rMG[lable][float(fromV)][float(toV)][fromVi][toVi]

        maxFound = True

        #get final score
        for lable in rMG.keys():
            lableScores[lable] = lableScores[lable]/maxScores[lable]

        allLableScores.append(lableScores)

        #classification
        biggestValue = 0
        biggestLable = np.nan
        for lable in lableScores.keys():
            if lableScores[lable] > biggestValue:
                biggestValue = lableScores[lable]
                biggestLable = lable
        predictResults.append(biggestLable)
        results.append(biggestLable == y_testy[ti])

    print(sum(results)/len(results))
    return sum(results)/len(results)

In [None]:
#validate the column reduced coherence matrices 
def xAttentionMatch(rMA, x_test, y_testy, inputKeys, key):    
    results = []
    predictResults = []
    
    #sum max score
    maxScores = dict()
    for lable in rMA.keys():
            maxScores[lable] = np.sum(np.max(list(rMA[lable][key].values()), axis=0))
    print('done summing')

    #sum normal score
    for ti in range(len(x_test)):
        trial = x_test[ti]
        lableScores = dict()
        
        for lable in rMA.keys():
            lableScores[lable] = 0
        
        for fromVi in range(len(trial)):
            for toVi in range(len(trial)):
                toV = trial[toVi]

                for lable in rMA.keys():
                    lableScores[lable] += rMA[lable][key][float(toV)][fromVi][toVi]

        #get final score                
        for lable in rMA.keys():
            lableScores[lable] = lableScores[lable]/maxScores[lable]
            
        #classification
        biggestValue = 0
        biggestLable = np.nan
        for lable in lableScores.keys():
            if lableScores[lable] > biggestValue:
                biggestValue = lableScores[lable]
                biggestLable = lable
        predictResults.append(biggestLable)
        results.append(biggestLable == y_testy[ti])

    print(sum(results)/len(results))
    return sum(results)/len(results)

In [None]:
#validate the minimal coherence matrix 
def calcFullAbstractAttention(reductionInt, x_test, rMA):
    results = []
    predictResults = []
    
    #all possible implemented reductions
    reduceStrings = ['max','max+','average','average+','median','median+']
    reduceString = reduceStrings[reductionInt]
    
    # calc normal and max scores
    for ti in range(len(x_test)):
        trial = x_test[ti]
        lableScores = dict()
        maxScores = dict()
        for toVi in range(len(trial)):
            toV = trial[toVi]

            for lable in rMA.keys():
                if lable in lableScores.keys():
                    lableScores[lable] += rMA[lable][reduceString][float(toV)][toVi] 
                    maxScores[lable] += np.max(list(rMA[lable][reduceString].values()), axis=0)[toVi]
                else:
                    lableScores[lable] = rMA[lable][reduceString][float(toV)][toVi] 
                    maxScores[lable] =  np.max(list(rMA[lable][reduceString].values()), axis=0)[toVi]

        #get final score
        for lable in rMS.keys():
            lableScores[lable] = lableScores[lable]/maxScores[lable]
        
        #classification
        biggestValue = 0
        biggestLable = np.nan
        for lable in lableScores.keys():
            if lableScores[lable] > biggestValue:
                biggestValue = lableScores[lable]
                biggestLable = lable
        predictResults.append(biggestLable)
        results.append(biggestLable == y_testy[ti])

    #print(predictResults)
    print(sum(results)/len(results))
    return sum(results)/len(results)

Model and process methods

In [None]:
#create the transformer model with given information
def createModel(splits, x_train, x_val, x_test, batchSize, num_of_classes, doMask= False, rate = 0, numOfAttentionLayers=2):    
        print(np.array(x_train1).shape)
        x_trains = np.dsplit(x_train, splits)
        print(np.array(x_trains).shape)

        x_trainsBatch = np.dsplit(x_train[:batchSize], splits)

        x_tests = np.dsplit(x_test, splits)
        x_vals = np.dsplit(x_val, splits)
        maxLen = len(x_trains[0][0])
        print(maxLen)

        print(np.array(x_trains).shape)
        flattenArray = []
        inputShapes = []
        encClasses = []
        for i in range(len(x_trains)):
            if doMask:

                masky = createMask(x_trains[i], 6)
                x_part = np.array(x_trains[i])
                print(np.array(x_part).shape)
            
                seq_len1 = x_part.shape[1]

                sens1 = x_part.shape[2]
                input_shape1 = (seq_len1, sens1)
                left_input1 = tf.keras.layers.Input(input_shape1, name='input_ids')
                    
                mask = tf.keras.layers.Input(shape=masky.shape[1:], name='attention_mask')
                print('masky shape')
                print(masky.shape)
                print(mask)
            else: 
                mask = Input(1)
                x_part = np.array(x_trains[i])
                print(np.array(x_part).shape)
            
                seq_len1 = x_part.shape[1]

                sens1 = x_part.shape[2]
                input_shape1 = (seq_len1, sens1)
                left_input1 = Input(input_shape1)

            inputShapes.append(left_input1)
            if doMask:
                inputShapes.append(mask)

            encoded = left_input1
            input_vocab_size = 0
            
            #create transformer encoder layer 
            encClass1 = transformer.Encoder(numOfAttentionLayers, 16, 6, 6, 5000, rate=rate, input_vocab_size = input_vocab_size + 2, maxLen = maxLen, seed_value=seed_value)
                
            encClasses.append(encClass1)

            maskLayer = tf.keras.layers.Masking(mask_value=-2)
            encInput = maskLayer(encoded)
            enc1, attention, fullAttention = encClass1(encInput)
            flatten1 = Flatten()(enc1)
            flattenArray.append(flatten1)
        

        # Merge nets
        if splits == 1:
            merged = flattenArray[0]
        else:
            merged = concatenate(flattenArray)

        output = Dense(num_of_classes, activation = "sigmoid")(merged)
        
        # Create combined model
        wdcnnt_multi = Model(inputs=inputShapes,outputs=(output))
        print(wdcnnt_multi.summary())
        
        print(wdcnnt_multi.count_params())
        
        tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=seed_value)

        learning_rate = transformer.CustomSchedule(16)
        optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.99, 
                                     epsilon=1e-9)
        
        wdcnnt_multi.compile(optimizer=optimizer,
                    loss='mean_squared_error',
                    metrics=['accuracy'], run_eagerly=False)
        
        print('done')
        
        return wdcnnt_multi, inputShapes, x_trains, x_tests, x_vals

In [None]:
# building saving name for model weights
def getWeightName(learning = True):
    baseName = "./saves/weights-" + str(data_path_train.split('/')[-1].split('.')[0]) + '-size' + str(seqSize) + '-threshold' + maxString + '-input' + abstractionString + '-fold' + str(fold)
    if learning:
        return baseName + '-learning.tf'
    else:
        return baseName + '.tf'

# do training for the given model def
def doAbstractedTraining(newTrain, newVal, newTest, abstraction = 0, earlyPredictorZ = None, takeAvg = True, rate=0, heatLayer = 0, numOfAttentionLayers = 1):
    print('newTrain during abstract training:')
    print(newTrain.shape)
            
    n_model2, inputs2, x_trains2, x_tests2, x_vals2 = createModel(1, newTrain, newVal, newTest , BATCH, num_of_classes, rate=rate, doMask=False, numOfAttentionLayers=numOfAttentionLayers)
    weightsName = getWeightName(learning=True)
    saveBest2 = transformer.SaveBest(weightsName)
    
    if (os.path.isfile(getWeightName(learning=False) + '.index') and useSaves):
        print('found weights to load! Won\'t train model!')
        n_model2.load_weights(getWeightName(learning=False))
    else:
        print('No weights found! Start training model!')
        n_model2.fit(x_trains2, y_train1, validation_data = (x_vals2, y_val) , epochs = 500, batch_size = BATCH, verbose=1, callbacks =[earlystop, saveBest2], shuffle = True)
        n_model2.load_weights(getWeightName(learning=True))
        n_model2.save_weights(getWeightName(learning=False), overwrite=True)
        

    earlyPredictor2 = tf.keras.Model(n_model2.inputs, n_model2.layers[2].output)

    # Predictions on the validation set
    predictions2 = n_model2.predict(x_vals2)
    
    print('############################')
    predictions2 = np.argmax(predictions2,axis=1)

    # Measure this fold's accuracy on validation set compared to actual labels
    y_compare = np.argmax(y_val, axis=1)
    val_score2 = metrics.accuracy_score(y_compare, predictions2) 
    print(f"validation fold score with input {abstractionString}-{maxString}(accuracy): {val_score2}")

    # Predictions on the test set
    limit = 300
    test_predictions_loop2 = []
    for bor in range(int(math.ceil(len(x_tests2[0])/limit))):
        test_predictions_loop2.extend(n_model2.predict([x_tests2[0][bor*limit:(bor+1)*limit]]))

    attentionQ0 = []
    attentionQ1 = []
    attentionQ2 = []

    for bor in range(int(math.ceil(len(x_trains2[0])/limit))):
        attOut = earlyPredictor2.predict([x_trains2[0][bor*limit:(bor+1)*limit]])
        attentionQ0.extend(attOut[0]) 
        attentionQ1.extend(attOut[1])

        if len(attentionQ2) == 0:
            attentionQ2 = attOut[2]
        else:
            for k in range(len(attentionQ2)):
                attentionQ2[k] = [x +y for x, y in zip(attentionQ2[k], attOut[2][k])]
    

    attentionQ2 = [attentionQ0, attentionQ1, attentionQ2]
    
    # Append actual labels of the test set to empty list
    y_testyy = [y-1 for y in y_testy]
    test_predictions_loop2 = np.argmax(test_predictions_loop2, axis=1)

    # Measure this fold's accuracy on test set compared to actual labels
    test_score2 = metrics.accuracy_score(y_testyy, test_predictions_loop2)
    #print(test_predictions_loop2)

    print(f"test fold score with input {abstractionString}-(accuracy): {test_score2}")
    return val_score2, test_score2, predictions2, test_predictions_loop2, n_model2, inputs2, x_trains2, x_tests2, x_vals2, attentionQ2, earlyPredictor2, newTrain, newVal, newTest, y_train1

In [None]:
def preprocessData(x_train1, x_val, X_test, y_train1, y_val, y_test, y_trainy, y_testy, binNr):    
    
    x_test = X_test.copy()
    
    processedDataName = "./saves/"+str(data_path_train.split('/')[-1].split('.')[0])+ '-size' + str(seqSize) + '-bin' + str(binNr)
    fileExists = os.path.isfile(processedDataName +'.pkl')

    if(fileExists and useSaves):
        print('found file! Start loading file!')
        res = helper.load_obj(processedDataName)


        for index, v in np.ndenumerate(res):
            print(index)
            res = v
        res.keys()

        x_train1 = res['X_train']
        #x_train1 = res['X_val']
        x_test = res['X_test']
        x_val = res['X_val']
        X_train_ori = res['X_train_ori']
        X_test_ori = res['X_test_ori']
        y_trainy = res['y_trainy']
        y_train1 = res['y_train']
        y_test = res['y_test']
        y_testy = res['y_testy']
        y_val = res['y_val']
        X_val_ori = res['X_val_ori']
        print(x_test.shape)
        print(x_train1.shape)
        print(y_test.shape)
        print(y_train.shape)
        print('SHAPES loaded')
        
    else:
        print('SHAPES:')
        print(x_test.shape)
        print(x_train1.shape)
        print(x_val.shape)
        print(y_test.shape)
        print(y_train.shape)
        
        trainShape = x_train1.shape
        valShape = x_val.shape
        testShape = x_test.shape
        
        scaler = StandardScaler()    
        scaler = scaler.fit(x_train1.reshape((-1,1)))
        X_train = scaler.transform(x_train1.reshape(-1, 1)).reshape(trainShape)
        x_val = scaler.transform(x_val.reshape(-1, 1)).reshape(valShape)
        x_test = scaler.transform(x_test.reshape(-1, 1)).reshape(testShape)

        X_test_ori = x_test.copy()
        X_val_ori = x_val.copy()
        X_train_ori = x_train1.copy()


        sax = SymbolicAggregateApproximation(n_bins=n_bins, strategy='uniform')
        sax.fit(x_train1)

        x_train1 = helper.symbolizeTrans(x_train1, sax)
        x_val = helper.symbolizeTrans(x_val, sax)
        x_test = helper.symbolizeTrans(x_test, sax)

            

        x_train1 = np.expand_dims(x_train1, axis=2)
        x_val = np.expand_dims(x_val, axis=2)
        x_test = np.expand_dims(x_test, axis=2)   
        X_test_ori = np.expand_dims(X_test_ori, axis=2)   
        X_train_ori = np.expand_dims(X_train_ori, axis=2) 
        X_val_ori = np.expand_dims(X_val_ori, axis=2) 

        print('saves shapes:')
        print(x_test.shape)
        print(x_train1.shape)

        #save sax results to only calculate them once
        resultsSave = {
            'X_train':x_train1,
            'X_train_ori':X_train_ori,
            'X_test':x_test,
            'X_test_ori':X_test_ori,
            'X_val': x_val,
            'X_val_ori':X_val_ori,
            'y_trainy':y_trainy,
            'y_train':y_train1,
            'y_val': y_val,
            'y_test':y_test,
            'y_testy':y_testy
        }
        helper.save_obj(resultsSave, processedDataName)
    return x_train1, x_val, x_test, y_train1, y_val, y_test, X_train_ori, X_val_ori, X_test_ori, y_trainy, y_testy

## Start train and evaluation loop

In [None]:
# Initialize loop for every kth fold

# Build empty lists for results

#calculate globel coherence matrices
doGlobalAbstraction = True
#Attention layers count
numOfAttentionLayers = 2
#take Attention average
takeAvg = True
#drouput rate
rate=0.3

maxString = 'None'
usedAbstraction = ['Ori', 'SAX']
reduceString = ['max','max+','average','average+','median','median+']
BATCH = 50
fold = 0

order = 'hl'
step1 = 'max'
step2 = 'sum'

accResults = [[],[],[],[],[],[],[],[],[],[],[],[]]
resultNames = ['Ori', 'SAX', 'full matries sum', 'full matries r. avg', 'colum reduced sum', 'column reduced r avg.', 'min matrix max',  'min matrix max+', 'min matrix avg', 'min matrix avg+', 'min matrix median', 'min matrix median+']

for train, test in kf.split(X_train, y_trainy): 
    fold+=1
    print(f"Fold #{fold}")
    
    #preprocess data
    x_train1 = X_train[train]
    x_val = X_train[test]
    y_train1 = y_train[train]
    y_trainy2 = y_trainy[train]
    y_val = y_train[test]
    
    x_train1, x_val, x_test, y_train1, y_val, y_test, X_train_ori, X_val_ori, X_test_ori, y_trainy2, y_testy2 = preprocessData(x_train1, x_val, X_test, y_train1, y_val, y_test, y_trainy2, y_testy, fold)

    
    #ori data    
    abstractionIndex = 0
    resultIndex = 0
    abstractionString = usedAbstraction[abstractionIndex]    
    outOri = doAbstractedTraining(X_train_ori, X_val_ori, X_test_ori, abstractionIndex, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
    
    accResults[resultIndex].append(outOri)
    resultIndex+=1
    
    # sax data    
    abstractionIndex += 1
    abstractionString = usedAbstraction[abstractionIndex]  
    outSax = doAbstractedTraining(x_train1, x_val, x_test, abstractionIndex, rate=rate, takeAvg = takeAvg, heatLayer = 0, numOfAttentionLayers = numOfAttentionLayers)   
    accResults[resultIndex].append(outSax)
    resultIndex+=1
    
    #calc coherence matrices
    if doGlobalAbstraction:
        rMA, rMS = makeAttention(outSax, x_train1, y_train1)
        

        print('full attention coherences')
        accResults[resultIndex].append(classFullAtt(rMS, x_test, y_testy, rMS[1].keys()))
        resultIndex+=1 
        accResults[resultIndex].append(classFullAtt(rMA, x_test, y_testy, rMS[1].keys()))
        resultIndex+=1 
        print('column reduced coherence matrices')
        
        accResults[resultIndex].append(xAttentionMatch(rMA, x_test, y_testy, rMS[1].keys(),'x'))
        resultIndex+=1 
        accResults[resultIndex].append(xAttentionMatch(rMA, x_test, y_testy, rMS[1].keys(),'xAvg'))
        resultIndex+=1 
        
        print('minimal coherence matrix')
        for r in range(len(reduceString)):
            accResults[resultIndex].append(calcFullAbstractAttention(r, x_test, rMA))
            resultIndex+=1   


In [None]:
for k in range(len(accResults))[:2]:
    resultName = resultNames[k]
    results = accResults[k]
    print('#########################################')
    print(resultName + ' Scores:')
    print('#########################################')
    print(f"Avg validation score (accuracy): {np.average([r[0] for r in results])}")   
    print([r[0] for r in results])
    print(f"Avg test score (accuracy): {np.average([r[1] for r in results])}")
    print([r[1] for r in results])
    print('---------------------')
print('#################')
for k in range(len(accResults))[2:]:
    resultName = resultNames[k]
    results = accResults[k]
    print('#########################################')
    print(resultName + ' Scores:')
    print('#########################################')
    print(accResults[k])
    print(np.average(accResults[k]))
    print('---------')

## Do visualisations

In [None]:
# calculate attention value combinations for visualisations
binNr = 1
rMA, rMS = makeAttention(accResults[1][binNr], accResults[1][binNr][6], accResults[1][binNr][-1])

Plot FCAM

In [None]:
valuesA = [-1, -0.5, 0, 0.5, 1]

#which combination to display
combination = 'ravg'  #['sum', 'ravg']

for lable in range(1,7):
    print('lable: '+ str(lable))
    fig, ax = plt.subplots(figsize=(60,30), nrows=len(valuesA), ncols=len(valuesA))
    fig.tight_layout()

    for rowI, row in enumerate(ax):
        for colI, col in enumerate(row):
            fromV = valuesA[rowI]
            toV = valuesA[colI]
            
            if combination == 'sum':
                data_att = rMS[lable][fromV][toV]
            elif combination == 'ravg':
                data_att = rMA[lable][fromV][toV]
            data_att = np.flip(np.array(data_att), axis=0)
            data_wordF = [fromV] * len(rMA[lable][fromV][toV])
            data_wordT = [toV] * len(rMA[lable][fromV][toV])
            d = pd.DataFrame(data = data_att,index = data_wordF, columns=data_wordT)
            sns.heatmap(d, vmin=0, vmax=0.22, ax=col, cmap="OrRd")

    
    #plt.savefig('./Bilder/full/Class' + str(lable) +'fullmatrix.png', dpi = 300)
    plt.show()

Plot CRCAM

In [None]:
valuesA = [-1, -0.5, 0, 0.5, 1]

combination = 'xAvg'  #['x', 'xAvg']

for lable in range(1,7):

    print(lable)
    fig, ax = plt.subplots(figsize=(60,30), nrows=len(valuesA), ncols=1)
    fig.tight_layout()

    for rowI, row in enumerate(reversed(ax)):
            toV = valuesA[rowI]
            att_sum = rMA[lable][combination][toV]
            att_sum = np.flip(np.array(att_sum), axis=0)
            data_att = att_sum
            data_wordF = ['x'] * len(rMA[lable][fromV][toV])
            data_wordT = [toV] * len(rMA[lable][fromV][toV])
            d = pd.DataFrame(data = data_att,index = data_wordF, columns=data_wordT)

            sns.heatmap(d, vmin=0, vmax=0.68, ax=row, cmap="OrRd")
            plt.tick_params(labelsize=26)
            label_y = row.get_yticklabels()
            plt.setp(label_y, rotation=360, horizontalalignment='right')
            label_x = row.get_xticklabels()
            plt.setp(label_x, rotation=45, horizontalalignment='right')
            
            plt.xlabel("X axis label")
    #plt.savefig('./Bilder/Red/Class' + str(lable) +'colmMatrix.png', dpi = 300)
    plt.show()

Plot GTM

In [None]:
valuesA = [-1, -0.5, 0, 0.5, 1]
reductionWith = reduceString[2] # ['max','max+','average','average+','median','median+']

for lable in rMS.keys():
    print('lable: '+ str(lable))


    data_att = np.flip(np.array(list(rMA[lable][reductionWith].values())), axis=0)
    data_wordF = [np.flip(np.array(valuesA))]
    data_wordT = range(len(data_att[0])) 
    d = pd.DataFrame(data = data_att,index = data_wordF, columns=data_wordT)
    f, ax = plt.subplots(figsize=(60,30))
    sns.heatmap(d, vmin=0, vmax=0.88, ax=ax, cmap="OrRd")
    label_y = ax.get_yticklabels()
    plt.setp(label_y, rotation=360, horizontalalignment='right')
    label_x = ax.get_xticklabels()
    plt.setp(label_x, rotation=45, horizontalalignment='right')
    plt.tick_params(labelsize=26)
    #plt.savefig('./Bilder/min/Class' + str(lable) +'minMatrix.png', dpi = 300)
    plt.show()