In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
X_train = np.load("vectors/X_train_all.npy")
Y_train = np.load("vectors/Y_train_all.npy")

In [3]:
X_valid = np.load("vectors/X_valid_all.npy")
Y_valid = np.load("vectors/Y_valid_all.npy")

In [4]:
X_test = np.load("vectors/X_test_all.npy")
Y_test = np.load("vectors/Y_test_all.npy")

In [5]:
maxlen = X_train.shape[1]
vocab_size = 26000
embeding_dimension = 10
number_of_heads = 6 

In [6]:
norm_layer = layers.LayerNormalization(epsilon=1e-6)

In [7]:
dropout = layers.Dropout(0.15)

In [8]:
feednn = keras.Sequential(
    [
        layers.Dense(10, activation="relu"),
        layers.Dense(embeding_dimension),
    ]
)

2022-01-14 08:15:02.252618: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
mha = layers.MultiHeadAttention(num_heads = number_of_heads, key_dim = embeding_dimension)

In [10]:
token_embending = layers.Embedding(input_dim=vocab_size, output_dim=embeding_dimension)

In [11]:
positional_embending = layers.Embedding(input_dim=maxlen, output_dim=embeding_dimension)

In [12]:
soft_layer = tf.keras.layers.Softmax()

In [13]:
inputs = layers.Input(shape=(maxlen,))

pos = tf.range(start=0, limit=tf.shape(inputs)[-1], delta=1)
pos = positional_embending(pos)
x = token_embending(inputs)
x += pos

a = mha(x, x)
a = dropout(a)
a = norm_layer(x + a)
f = feednn(a)
f = dropout(f)
x = layers.GlobalAveragePooling1D()(f)
x = layers.Dense(32, activation="relu")(x)
x = layers.Dense(12, activation="relu")(x)
x = layers.Reshape((4, 3))(x)
outputs = soft_layer(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile("adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
# model.summary()

In [14]:
model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), batch_size=8, epochs=6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x12fda3fa0>

In [15]:
model.evaluate(X_test, Y_test, batch_size=32)



[0.8543951511383057, 0.7235211133956909]

In [16]:
Y_pred = model.predict(X_test)

In [17]:
Y_pred = np.argmax(Y_pred, axis=2)

In [18]:
Y_pred.shape

(1775, 4)

In [19]:
Y_test.shape

(1775, 4)

In [20]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(Y_test[:, 0], Y_pred[:, 0])
cm_df = pd.DataFrame(cm, index=['Actual yes', 'Actual no'], columns=['Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted yes,Predicted no
Actual yes,481,347
Actual no,115,832


In [21]:
cm = confusion_matrix(Y_test[:, 1], Y_pred[:, 1])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Actual unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Actual unknown,Predicted yes,Predicted no
Actual unknown,535,0,307
Actual yes,22,0,49
Actual no,120,0,742


In [22]:
cm = confusion_matrix(Y_test[:, 2], Y_pred[:, 2])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Predicted unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,482,342,0
Actual yes,117,827,0
Actual no,5,2,0


In [23]:
cm = confusion_matrix(Y_test[:, 3], Y_pred[:, 3])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Predicted unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,543,0,284
Actual yes,28,0,100
Actual no,125,0,695
