In [66]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from tensorflow.keras.preprocessing.text import tokenizer_from_json, Tokenizer

In [67]:
path = 'train.csv'
df = pd.read_csv(path)
df = df.drop(columns=['id', 'context'])

In [68]:
x_data_1 = df['anchor']
x_data_2 = df['target']
score = df['score']

In [69]:
x_combined = x_data_1 + " " + x_data_2

In [70]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(x_combined)

In [71]:
anchor_tokenized = tokenizer.texts_to_sequences(x_data_1)
target_tokenized = tokenizer.texts_to_sequences(x_data_2)

In [72]:
padded_anchor = tf.keras.preprocessing.sequence.pad_sequences(anchor_tokenized, maxlen=7)
padded_target = tf.keras.preprocessing.sequence.pad_sequences(target_tokenized, maxlen=17)

In [73]:
all_inputs = tf.keras.layers.Concatenate(axis=1)([padded_anchor, padded_target])

In [74]:
from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder()
y_score = LE.fit_transform(score)

In [75]:
x_data = all_inputs[:33000]
x_val = all_inputs[33000:]
y_data = y_score[:33000]
y_val = y_score[33000:]

## Build Encoder Decoder Model

In [76]:
class PositionalEmbedding(keras.layers.Layer):
    def __init__(self, vocab_size, output_dim, input_dim):
        super(PositionalEmbedding, self).__init__()
        self.word_embedding = layers.Embedding(vocab_size, output_dim=output_dim, input_length=input_dim)
        self.postional_embedding = layers.Embedding(input_dim, output_dim)
        
    def call(self, inputs):
        position_indices = tf.range(tf.shape(inputs)[-1])
        embedded_words = self.word_embedding(inputs)
        embedded_indices = self.postional_embedding(position_indices)
        return embedded_words + embedded_indices

In [77]:
class Transformer(keras.layers.Layer):
    def __init__(self,num_heads, embed_dim, ff_dim, rate=0.1):
        super(Transformer,self).__init__()
        self.att = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = keras.layers.Dropout(rate)
        self.dropout2 = keras.layers.Dropout(rate)
    def call(self, inputs, training):
        out1 = self.att(inputs, inputs)
        out1 = self.dropout1(out1, training=training)
        out1 = self.layernorm1(inputs + out1)
        out2 = self.ffn(out1)
        out2 = self.dropout2(out2, training=training)
        output = self.layernorm2(out1 + out2)
        
        return output

In [78]:
vocab_size = len(tokenizer.word_index)
output_dim = 32
input_dim = 24
num_heads = 3
embed_dim = 32
ff_dim = 64

In [79]:
embedding_layer = PositionalEmbedding(vocab_size, output_dim, input_dim)
transformer = Transformer(num_heads, embed_dim, ff_dim)

In [80]:
inputs = keras.layers.Input(shape=(input_dim,))
x = embedding_layer(inputs)
x = transformer(x)
x = keras.layers.GlobalAveragePooling1D()(x)
x = keras.layers.Dropout(rate=0.1)(x)
x = keras.layers.Dense(20, activation='relu')(x)
x = keras.layers.Dropout(rate=0.1)(x)
outputs = keras.layers.Dense(5, activation='softmax')(x)

In [81]:
model = keras.Model(inputs = inputs, outputs=outputs)

In [82]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [83]:
history = model.fit(x_data, y_data, epochs=100, batch_size=64)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [84]:
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 24)]              0         
                                                                 
 positional_embedding_2 (Pos  (None, 24, 32)           286528    
 itionalEmbedding)                                               
                                                                 
 transformer_2 (Transformer)  (None, 24, 32)           16928     
                                                                 
 global_average_pooling1d_2   (None, 32)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dropout_10 (Dropout)        (None, 32)                0         
                                                                 
 dense_10 (Dense)            (None, 20)                660 

In [85]:
model.evaluate(x_val, y_val)



[8.451582908630371, 0.3976389169692993]

In [86]:
y_score.shape

(36473,)

In [93]:
predicts = model.predict(x_val[-15:])


In [95]:
for x in predicts:
    print(np.argmax(x))

0
2
2
2
1
0
1
1
1
1
1
1
1
1
1


In [96]:
y_val[-15:]

array([2, 1, 3, 3, 2, 0, 4, 2, 3, 2, 4, 2, 2, 3, 2])