In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
X_train = np.load("vectors/X_train_all.npy")
Y_train = np.load("vectors/Y_train_all.npy")

In [3]:
X_valid = np.load("vectors/X_valid_all.npy")
Y_valid = np.load("vectors/Y_valid_all.npy")

In [4]:
X_test = np.load("vectors/X_test_all.npy")
Y_test = np.load("vectors/Y_test_all.npy")

In [5]:
maxlen = X_train.shape[1]
vocab_size = 26000
embeding_dimension = 10
number_of_heads = 6 

In [6]:
norm_layer = layers.LayerNormalization(epsilon=1e-6)

In [7]:
dropout = layers.Dropout(0.15)

In [8]:
feednn = keras.Sequential(
    [
        layers.Dense(10, activation="relu"),
        layers.Dense(embeding_dimension),
    ]
)

2022-01-15 22:34:48.534463: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
mha = layers.MultiHeadAttention(num_heads = number_of_heads, key_dim = embeding_dimension)

In [10]:
token_embending = layers.Embedding(input_dim=vocab_size, output_dim=embeding_dimension)

In [11]:
positional_embending = layers.Embedding(input_dim=maxlen, output_dim=embeding_dimension)

In [12]:
soft_layer = tf.keras.layers.Softmax()

In [13]:
inputs = layers.Input(shape=(maxlen,))

pos = tf.range(start=0, limit=tf.shape(inputs)[-1], delta=1)
pos = positional_embending(pos)
x = token_embending(inputs)
x += pos

a = mha(x, x)
a = dropout(a)
a = norm_layer(x + a)
f = feednn(a)
f = dropout(f)
x = layers.GlobalAveragePooling1D()(f)
x = layers.Dense(32, activation="relu")(x)
x = layers.Dense(12, activation="relu")(x)
x = layers.Reshape((4, 3))(x)
outputs = soft_layer(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile("adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
# model.summary()

In [42]:
model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), batch_size=8, epochs=18)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x12edb96d0>

In [43]:
model.evaluate(X_test, Y_test, batch_size=32)



[0.9421395063400269, 0.6346479058265686]

In [44]:
Y_pred = model.predict(X_test)

In [45]:
Y_pred = np.argmax(Y_pred, axis=2)

In [46]:
Y_pred.shape

(1775, 4)

In [47]:
Y_test.shape

(1775, 4)

### Q1

In [56]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(Y_test[:, 0], Y_pred[:, 0])
cm_df = pd.DataFrame(cm, index=['Actual yes', 'Actual no'], columns=['Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted yes,Predicted no
Actual yes,532,296
Actual no,169,778


In [57]:
clf = classification_report(Y_test[:, 0], Y_pred[:,0], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Yes","1":"No"})

Unnamed: 0,Yes,No,accuracy,macro avg,weighted avg
precision,0.758916,0.724395,0.738028,0.741655,0.740498
recall,0.642512,0.821542,0.738028,0.732027,0.738028
f1-score,0.69588,0.769916,0.738028,0.732898,0.73538
support,828.0,947.0,0.738028,1775.0,1775.0


### Q2

In [58]:
cm = confusion_matrix(Y_test[:, 1], Y_pred[:, 1])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Actual unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Actual unknown,Predicted yes,Predicted no
Actual unknown,550,0,292
Actual yes,26,0,45
Actual no,155,0,707


In [64]:
clf = classification_report(Y_test[:, 1], Y_pred[:,1], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Unknown","1":"Yes","2":"No"})

Unnamed: 0,Unknown,Yes,No,accuracy,macro avg,weighted avg
precision,0.752394,0.0,0.677203,0.708169,0.476532,0.685783
recall,0.653207,0.0,0.820186,0.708169,0.491131,0.708169
f1-score,0.699301,0.0,0.741868,0.708169,0.480389,0.692001
support,842.0,71.0,862.0,0.708169,1775.0,1775.0


### Q3

In [65]:
cm = confusion_matrix(Y_test[:, 2], Y_pred[:, 2])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Predicted unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,824,0,0
Actual yes,944,0,0
Actual no,7,0,0


In [66]:
clf = classification_report(Y_test[:, 2], Y_pred[:,2], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Unknown","1":"Yes","2":"No"})

Unnamed: 0,Unknown,Yes,No,accuracy,macro avg,weighted avg
precision,0.464225,0.0,0.0,0.464225,0.154742,0.215505
recall,1.0,0.0,0.0,0.464225,0.333333,0.464225
f1-score,0.63409,0.0,0.0,0.464225,0.211363,0.294361
support,824.0,944.0,7.0,0.464225,1775.0,1775.0


### Q4

In [67]:
cm = confusion_matrix(Y_test[:, 3], Y_pred[:, 3])
cm_df = pd.DataFrame(cm, index=['Actual unknown', 'Actual yes', 'Actual no'], columns=['Predicted unknown', 'Predicted yes', 'Predicted no'])
cm_df

Unnamed: 0,Predicted unknown,Predicted yes,Predicted no
Actual unknown,506,136,185
Actual yes,31,31,66
Actual no,123,119,578


In [68]:
clf = classification_report(Y_test[:, 3], Y_pred[:,3], output_dict = True, zero_division = 0)
df = pd.DataFrame(clf)
df.rename(columns={"0":"Unknown","1":"Yes","2":"No"})

Unnamed: 0,Unknown,Yes,No,accuracy,macro avg,weighted avg
precision,0.766667,0.108392,0.697226,0.628169,0.524095,0.687117
recall,0.61185,0.242188,0.704878,0.628169,0.519639,0.628169
f1-score,0.680565,0.149758,0.701031,0.628169,0.510451,0.651742
support,827.0,128.0,820.0,0.628169,1775.0,1775.0
