In [1]:
# importing libraries

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers


2023-07-06 09:20:50.898979: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# reading the data 

def read_data(filename):
    data = pd.read_csv(filename, header = None)
    features = data.iloc[:, :-1].values
    labels = data.iloc[:, -1].values - 1 # we'll shift the class labels to 0-25
    return features, labels

# Read the data
data_url = "https://raw.githubusercontent.com/UMBInal/data/main/data.csv"

In [3]:
x, y = read_data(data_url)

In [4]:
x[0]

array([0.01176471, 0.01176471, 0.97647059, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

In [5]:
y[0]

0

In [6]:
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [7]:
x_train.shape

(29336, 10)

In [8]:
y_train.shape

(29336,)

In [9]:
x_train[0]

array([0.03529412, 0.01176471, 0.95294118, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

In [10]:
# Reshape the data
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

In [11]:
x_train[0]

array([[0.03529412],
       [0.01176471],
       [0.95294118],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ]])

In [12]:
# Determine the input shape and number of classes
input_shape = (x_train.shape[1], 1)
num_classes = len(np.unique(y))

In [13]:
input_shape

(10, 1)

In [14]:
num_classes

26

In [15]:
# Transformer Encoder function
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs
    
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res

In [None]:
# Transformer Encoder function
def transformer_decoder(inputs, enc_output, head_size, num_heads, ff_dim, dropout=0):
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    
    x_attended = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, enc_output)
    x_attended = layers.Dropout(dropout)(x_attended)
    x_attended = layers.LayerNormalization(epsilon=1e-6)(x_attended)
    
    res = x + x_attended
    
    x_ff = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x_ff = layers.Dropout(dropout)(x_ff)
    x_ff = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x_ff)
    x_ff = layers.LayerNormalization(epsilon=1e-6)(x_ff)
    return x + res

In [21]:
# Build the model
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    
    # Encoder
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    
    # FFN
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
        
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)


In [None]:

''' # Decoder
    enc_output = x  # Output from the Encoder
    for _ in range(num_transformer_blocks):
        x = transformer_decoder(x, enc_output, head_size, num_heads, ff_dim, dropout)
'''


In [22]:
# Define the hyperparameters
head_size = 2
num_heads = 8
ff_dim = 64
num_transformer_blocks = 8
mlp_units = [64]
dropout = 0.25
mlp_dropout = 0.4

In [23]:
# declare the model
model = build_model(input_shape,
                    head_size,
                    num_heads,
                    ff_dim,
                    num_transformer_blocks,
                    mlp_units,
                    dropout,
                    mlp_dropout)


In [24]:
# Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [25]:
# Train the model
model.fit(x_train, y_train, batch_size=64, epochs=200, validation_split=0.2)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

KeyboardInterrupt: 

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test loss: {loss}, Test accuracy: {accuracy}")