In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
X_train = np.load("vectors/X_train_q1_aug.npy")
Y_train = np.load("vectors/Y_train_q1.npy")

In [3]:
X_valid = np.load("vectors/X_valid.npy")
Y_valid = np.load("vectors/Y_valid_q1.npy")

In [4]:
Y_train

array([0, 0, 0, ..., 1, 1, 1])

In [5]:
X_test = np.load("vectors/X_test.npy")
Y_test = np.load("vectors/Y_test_q1.npy")

In [6]:
maxlen = X_train.shape[1]
vocab_size = 67000
embeding_dimension = 10
number_of_heads = 6 

In [7]:
norm_layer = layers.LayerNormalization(epsilon=1e-6)

In [8]:
dropout = layers.Dropout(0.2)

In [9]:
feednn = keras.Sequential(
    [
        layers.Dense(10, activation="relu"),
        layers.Dense(embeding_dimension),
    ]
)

2022-01-15 19:28:36.980727: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
mha = layers.MultiHeadAttention(num_heads = number_of_heads, key_dim = embeding_dimension)

In [11]:
token_embending = layers.Embedding(input_dim=vocab_size, output_dim=embeding_dimension)

In [12]:
positional_embending = layers.Embedding(input_dim=maxlen, output_dim=embeding_dimension)

In [13]:
inputs = layers.Input(shape=(maxlen,))

pos = tf.range(start=0, limit=tf.shape(inputs)[-1], delta=1)
pos = positional_embending(pos)
x = token_embending(inputs)
x += pos

a = mha(x, x)
a = dropout(a)
a = norm_layer(x + a)
f = feednn(a)
f = dropout(f)
x = layers.GlobalAveragePooling1D()(f)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])

model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), batch_size=64, epochs=1)



<keras.callbacks.History at 0x1250960d0>

In [14]:
model.evaluate(X_test, Y_test, batch_size=32)



[0.7081847786903381, 0.7476056218147278]

In [15]:
Y_pred = model.predict(X_test)

In [16]:
Y_pred = np.argmax(Y_pred, axis=1)
Y_pred

array([1, 1, 1, ..., 1, 0, 1])

In [17]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(Y_test[:], Y_pred[:])
cm_df = pd.DataFrame(cm, index=['Actual yes', 'Actual no'], columns=['Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted yes,Predicted no
Actual yes,560,268
Actual no,180,767


In [18]:
clf = classification_report(Y_test[:], Y_pred[:], output_dict = True)
pd.DataFrame(clf)

Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.756757,0.741063,0.747606,0.74891,0.748384
recall,0.676329,0.809926,0.747606,0.743127,0.747606
f1-score,0.714286,0.773966,0.747606,0.744126,0.746126
support,828.0,947.0,0.747606,1775.0,1775.0
