In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
X_train = np.load("vectors/X_train_q2-4.npy")
Y_train = np.load("vectors/Y_train_q2-4.npy")

In [3]:
X_valid = np.load("vectors/X_valid_q2-4.npy")
Y_valid = np.load("vectors/Y_valid_q2-4.npy")

In [4]:
X_test = np.load("vectors/X_test_q2-4.npy")
Y_test = np.load("vectors/Y_test_q2-4.npy")

In [5]:
maxlen = X_train.shape[1]
vocab_size = 67000
embeding_dimension = 10
number_of_heads = 6

In [6]:
norm_layer = layers.LayerNormalization(epsilon=1e-6)

In [7]:
dropout = layers.Dropout(0.1)

In [8]:
feednn = keras.Sequential(
    [
        layers.Dense(10, activation="relu"),
        layers.Dense(embeding_dimension),
    ]
)

2022-01-15 18:56:38.816233: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
mha = layers.MultiHeadAttention(num_heads = number_of_heads, key_dim = embeding_dimension)

In [10]:
token_embending = layers.Embedding(input_dim=vocab_size, output_dim=embeding_dimension)

In [11]:
positional_embending = layers.Embedding(input_dim=maxlen, output_dim=embeding_dimension)

In [12]:
soft_layer = tf.keras.layers.Softmax()

In [13]:
inputs = layers.Input(shape=(maxlen,))

pos = tf.range(start=0, limit=tf.shape(inputs)[-1], delta=1)
pos = positional_embending(pos)
x = token_embending(inputs)
x += pos

a = mha(x, x)
a = dropout(a)
a = norm_layer(x + a)
f = feednn(a)
f = dropout(f)
x = layers.GlobalAveragePooling1D()(f)
x = layers.Dense(32, activation="relu")(x)
x = layers.Dense(9, activation="relu")(x)
x = layers.Reshape((3, 3))(x)
outputs = soft_layer(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile("adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
# model.summary()

In [14]:
model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), batch_size=64, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x123374850>

In [15]:
model.evaluate(X_test, Y_test, batch_size=32)



[0.8106127977371216, 0.737089216709137]

In [16]:
Y_pred = model.predict(X_test)
Y_pred = np.argmax(Y_pred, axis=2)

### Q2

In [17]:
from sklearn.metrics import confusion_matrix, classification_report


cm = confusion_matrix(Y_test[:, 0], Y_pred[:, 0])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Predicted unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,628,0,214
Actual yes,25,0,46
Actual no,187,0,675


In [18]:
clf = classification_report(Y_test[:, 0], Y_pred[:,0], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Unknown","1":"Yes","2":"No"})

Unnamed: 0,Unknown,Yes,No,accuracy,macro avg,weighted avg
precision,0.747619,0.0,0.721925,0.734085,0.489848,0.705236
recall,0.745843,0.0,0.783063,0.734085,0.509635,0.734085
f1-score,0.74673,0.0,0.751252,0.734085,0.499327,0.719057
support,842.0,71.0,862.0,0.734085,1775.0,1775.0


### Q3

In [19]:
cm = confusion_matrix(Y_test[:, 1], Y_pred[:, 1])
cm_df = pd.DataFrame(cm, index=['Actual unknown','Actual yes', 'Actual no'], columns=['Predicted unknown','Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,596,228,0
Actual yes,186,758,0
Actual no,6,1,0


In [20]:
clf = classification_report(Y_test[:, 1], Y_pred[:, 1], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Unknown","1":"Yes","2":"No"})

Unnamed: 0,Unknown,Yes,No,accuracy,macro avg,weighted avg
precision,0.756345,0.767984,0.0,0.762817,0.50811,0.759552
recall,0.723301,0.802966,0.0,0.762817,0.508756,0.762817
f1-score,0.739454,0.785085,0.0,0.762817,0.50818,0.760806
support,824.0,944.0,7.0,0.762817,1775.0,1775.0


### Q4

In [21]:
cm = confusion_matrix(Y_test[:, 2], Y_pred[:, 2])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Predicted unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,626,0,201
Actual yes,43,0,85
Actual no,178,0,642


In [22]:
clf = classification_report(Y_test[:, 2], Y_pred[:,2], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Unknown","1":"Yes","2":"No"})

Unnamed: 0,Unknown,Yes,No,accuracy,macro avg,weighted avg
precision,0.739079,0.0,0.69181,0.714366,0.476963,0.663945
recall,0.756953,0.0,0.782927,0.714366,0.513293,0.714366
f1-score,0.747909,0.0,0.734554,0.714366,0.494154,0.687806
support,827.0,128.0,820.0,0.714366,1775.0,1775.0
