### Algorithm: Transformers including normalization vo
### Dataset : HandMovementDirection
### Author: Edgar Acuna 
### Created: April 16, 2025

In [422]:
import tensorflow

In [423]:

import pandas as pd

In [424]:
import pandas as pd
from aeon.datasets import load_classification
from sklearn import preprocessing
from matplotlib import pyplot

def load_data(dataset, split="Train"):
    if split=="Train":
        X, y = load_classification(dataset, split="Train")
    else:
        X, y = load_classification(dataset, split="Test")
    print(" Shape of X = ", X.shape)
    print(" Shape of y = ", y.shape)
    #Swap dimensions so features are the third dimension
    X = X.swapaxes(1, 2)
    print(" New shape of X = ", X.shape)
    #prepare y
    y = pd.DataFrame(y)
    enc = preprocessing.OneHotEncoder(dtype=int)
    enc.fit(y)
    y = enc.transform(y).toarray()
    print(" New shape of y = ", y.shape)
    return X, y


In [425]:
# loading Training data
X, y = load_data("HandMovementDirection",split="Train")

 Shape of X =  (160, 10, 400)
 Shape of y =  (160,)
 New shape of X =  (160, 400, 10)
 New shape of y =  (160, 4)


In [426]:
import numpy as np
yo=np.argmax(y,axis=1)
yo=np.array(yo,dtype=int)
print(yo)

[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0]


In [427]:
# loading Testing data
Xt, yt = load_data("HandMovementDirection",split="Test")

 Shape of X =  (74, 10, 400)
 Shape of y =  (74,)
 New shape of X =  (74, 400, 10)
 New shape of y =  (74, 4)


In [428]:
yto=np.argmax(yt,axis=1)
yto=np.array(yto,dtype=int)
#print(yto)

In [429]:
import numpy as np
Xb= np.concatenate((X, Xt),axis=0)
print(Xb.shape)

(234, 400, 10)


In [430]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
#tempo=X.reshape(X.shape[2],-1).T
#print(tempo.shape)
#X = scaler.fit_transform(X.reshape(X.shape[2], -1)).reshape(X.shape)
Xb = scaler.fit_transform(Xb.reshape(Xb.shape[2], -1)).reshape(Xb.shape)
#Xt = scaler.fit_transform(Xt.reshape(Xt.shape[2], -1)).reshape(Xt.shape)
#X1 = scaler.fit_transform(tempo)
#print(X)
#tempodf=pd.DataFrame(X1)
#print(tempodf.shape)
#pyplot.boxplot(tempodf)
#pyplot.show()

In [431]:
Xn1=Xb[0:40]
print(Xn1.shape)
Xn2=Xb[40:80]
print(Xn2.shape)

(40, 400, 10)
(40, 400, 10)


In [432]:
from tensorflow import keras
from tensorflow.keras import layers
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, roc_auc_score, ConfusionMatrixDisplay
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import to_categorical
from keras.layers import Flatten
from keras.layers import TimeDistributed
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
# Configuration options
#trainX, trainy, testX, testy = trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.3, random_state=42)
trainX=X
trainy=y
testX=Xt
testy=yt
feature_vector_length = trainX.shape[1]
num_classes = trainy.shape[1]
# Convert target classes to categorical ones
print(trainy.shape)
#trainy= to_categorical(trainy, num_classes)
#testy = to_categorical(testy, num_classes)
print('Train dimension:')
print(trainX.shape)
print('Test dimension:')
print(testX.shape)
#X_train=trainX.to_numpy()
#X_test=testX.to_numpy()
X_train = trainX.reshape((trainX.shape[0], trainX.shape[1], trainX.shape[2]))
X_test = testX.reshape((testX.shape[0], testX.shape[1], trainX.shape[2]))

(160, 4)
Train dimension:
(160, 400, 10)
Test dimension:
(74, 400, 10)


In [433]:
X_test.shape, testy.shape

((74, 400, 10), (74, 4))

In [434]:
X_train.shape, trainy.shape

((160, 400, 10), (160, 4))

In [435]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [436]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    n_length=100
    n_features=6
    inputs = keras.Input(shape=input_shape)
    x= Conv1D(filters=16, kernel_size=1, activation='relu')(inputs)
    x=MaxPooling1D(pool_size=2)(x)
    x= Dropout(0.5)(x)
    x= Conv1D(filters=64, kernel_size=1, activation='relu')(x)
    x=MaxPooling1D(pool_size=2)(x)
    x= Dropout(0.5)(x)
    x= Conv1D(filters=256, kernel_size=1, activation='relu')(x)
    x= MaxPooling1D(pool_size=2)(x)
    x= Dropout(0.5)(x)
    #x = layers.LayerNormalization(epsilon=1e-6)(x)
    #x=Flatten()(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

In [437]:
input_shape = X_train.shape[1:]
print('input', input_shape)
import time
model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0,
    dropout=0,
)
start_time = time.time()
model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["accuracy"],
)
model.summary()

#callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

model.fit(
    X_train,
    trainy,
    validation_split=0,
    epochs=200,
    batch_size=8,
    #callbacks=callbacks,
)


input (400, 10)


Epoch 1/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 81ms/step - accuracy: 0.2938 - loss: 24.9178
Epoch 2/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 81ms/step - accuracy: 0.1918 - loss: 15.9021
Epoch 3/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 80ms/step - accuracy: 0.2552 - loss: 11.4624
Epoch 4/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 81ms/step - accuracy: 0.3154 - loss: 10.0671
Epoch 5/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 81ms/step - accuracy: 0.2879 - loss: 8.5083
Epoch 6/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 82ms/step - accuracy: 0.2692 - loss: 7.2931
Epoch 7/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 82ms/step - accuracy: 0.2172 - loss: 8.9814
Epoch 8/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 85ms/step - accuracy: 0.2970 - loss: 7.2255
Epoch 9/200
[1m20/20[0m [32m━━━━

<keras.src.callbacks.history.History at 0x1889e882990>

In [438]:
results=model.evaluate(X_test, testy, verbose=1)
print("Training time --- %s seconds ---" % (time.time() - start_time))
print(results)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.1774 - loss: 2.3017
Training time --- 813.5619885921478 seconds ---
[2.2054598331451416, 0.22972972691059113]


In [439]:
#Computing F1-score
import numpy as np
import sklearn
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, roc_auc_score
train_features = np.array(X_train)
test_features = np.array(X_test)
train_labels=np.array(trainy)
#train_labels=pd.DataFrame(trainy)
#n_values = train_labels.idxmax(axis=1)
y_values=np.argmax(train_labels,axis=1)
#print(y_values)
test_labels=np.array(testy)
yt_values=np.argmax(test_labels,axis=1)
#train_predictions_baseline = model.predict_classes(train_features, batch_size=150)
a=model.predict([test_features])
predict_classes=np.argmax(a,axis=1)
th=.8
atempo=a.max(axis=1)
print(atempo.shape)
print('Rate of Probability of classification exceeding a threshold in test',(atempo[atempo>th].shape[0])/atempo.shape[0])
#prob2=pd.DataFrame(model.predict_proba(test_features,batch_size=150))
#a=prob2.max(axis=1)
#print('Probability of classification',(a[a>.80].shape[0])/prob2.shape[0])
#print('Rate of Probability of classification exceding a trhershold',(a[a>.50].shape[0])/a.shape[0])
a1=model.predict([train_features])
predict_classes1=np.argmax(a1,axis=1)
#f1_train=sklearn.metrics.f1_score(ytrain, train_predictions_baseline, average="weighted")
#test_predictions_baseline = model.predict_classes(test_features, batch_size=150)
#print(test_predictions_baseline)
#f1_test=sklearn.metrics.f1_score(test_labels, test_predictions_baseline, average="weighted")
#print('f1_scores in testing set',f1_test)
#Calculating metrics for each class
print("EVALUATION ON TESTING DATA")
print(classification_report(yt_values, predict_classes))


The structure of `inputs` doesn't match the expected structure.
Expected: keras_tensor_500
Received: inputs=('Tensor(shape=(32, 400, 10))',)



[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 89ms/step 


The structure of `inputs` doesn't match the expected structure.
Expected: keras_tensor_500
Received: inputs=('Tensor(shape=(None, 400, 10))',)



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 332ms/step
(74,)
Rate of Probability of classification exceeding a threshold in test 0.20270270270270271
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 105ms/step
EVALUATION ON TESTING DATA
              precision    recall  f1-score   support

           0       0.21      0.87      0.33        15
           1       0.36      0.13      0.20        30
           2       0.00      0.00      0.00        15
           3       0.00      0.00      0.00        14

    accuracy                           0.23        74
   macro avg       0.14      0.25      0.13        74
weighted avg       0.19      0.23      0.15        74




Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [440]:
cm=confusion_matrix(yt_values, predict_classes)
print(cm)
good=np.diag(cm)/np.unique(yt_values,return_counts=True)[1]
print(good)

[[13  2  0  0]
 [26  4  0  0]
 [14  1  0  0]
 [10  4  0  0]]
[0.86666667 0.13333333 0.         0.        ]


In [441]:
import plotly.figure_factory as ff
# invert z idx values

z= cm

# invert z idx values
#z = z[::-1]

x= [0,1,2,3]
y1=x
#y =x[::-1].copy() # invert idx values of x
#print(y)
#y=x
# change each element of z to type string for annotations
z_text = [[str(y1) for y1 in x] for x in z]

# set up figure
fig = ff.create_annotated_heatmap(z, x=x, y=y1, annotation_text=z_text, colorscale='Viridis')

# add title
fig.update_layout(title_text='<i><b>Confusion matrix</b></i>',
                  #xaxis = dict(title='x'),
                  #yaxis = dict(title='x')
                 )

# add custom xaxis title
fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=0.5,
                        y=-0.15,
                        showarrow=False,
                        text="Predicted value",
                        xref="paper",
                        yref="paper"))

# add custom yaxis title
fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=-0.35,
                        y=0.5,
                        showarrow=False,
                        text="Actual value",
                        textangle=-90,
                        xref="paper",
                        yref="paper"))

# adjust margins to make room for yaxis title
fig.update_layout(margin=dict(t=50, l=200))

# add colorbar
fig['data'][0]['showscale'] = True
fig.show()