In [41]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
# from tensorflow.data.Dataset import from_tensor_slices
from tensorflow.keras.preprocessing.text import tokenizer_from_json, Tokenizer

In [42]:
path = 'train.csv'
df = pd.read_csv(path)
df = df.drop(columns=['id', 'context'])

In [43]:
x_data_1 = df['anchor']
x_data_2 = df['target']
score = df['score']

In [44]:
x_combined = x_data_1 + " " + x_data_2

In [45]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(x_combined)

In [46]:
anchor_tokenized = tokenizer.texts_to_sequences(x_data_1)
target_tokenized = tokenizer.texts_to_sequences(x_data_2)

In [47]:
padded_anchor = tf.keras.preprocessing.sequence.pad_sequences(anchor_tokenized, maxlen=7)
padded_target = tf.keras.preprocessing.sequence.pad_sequences(target_tokenized, maxlen=17)

In [48]:
all_inputs = tf.keras.layers.Concatenate(axis=1)([padded_anchor, padded_target])

In [49]:
from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder()
y_score = LE.fit_transform(score)

## Build Encoder Decoder Model

In [50]:
class PositionalEmbedding(keras.layers.Layer):
    def __init__(self, vocab_size, output_dim, input_dim):
        super(PositionalEmbedding, self).__init__()
        self.word_embedding = layers.Embedding(vocab_size, output_dim=output_dim, input_length=input_dim)
        self.postional_embedding = layers.Embedding(input_dim, output_dim)
        
    def call(self, inputs):
        position_indices = tf.range(tf.shape(inputs)[-1])
        embedded_words = self.word_embedding(inputs)
        embedded_indices = self.postional_embedding(position_indices)
        return embedded_words + embedded_indices

In [51]:
class Transformer(keras.layers.Layer):
    def __init__(self,num_heads, embed_dim, ff_dim, rate=0.1):
        super(Transformer,self).__init__()
        self.att = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = keras.layers.Dropout(rate)
        self.dropout2 = keras.layers.Dropout(rate)
    def call(self, inputs, training):
        out1 = self.att(inputs, inputs)
        out1 = self.dropout1(out1, training=training)
        out1 = self.layernorm1(inputs + out1)
        out2 = self.ffn(out1)
        out2 = self.dropout2(out2, training=training)
        output = self.layernorm2(out1 + out2)
        
        return output

In [52]:
vocab_size = len(tokenizer.word_index)
output_dim = 32
input_dim = 24
num_heads = 3
embed_dim = 32
ff_dim = 64

In [53]:
embedding_layer = PositionalEmbedding(vocab_size, output_dim, input_dim)
transformer = Transformer(num_heads, embed_dim, ff_dim)

In [54]:
inputs = keras.layers.Input(shape=(input_dim,))
x = embedding_layer(inputs)
x = transformer(x)
x = keras.layers.GlobalAveragePooling1D()(x)
x = keras.layers.Dropout(rate=0.1)(x)
x = keras.layers.Dense(20, activation='relu')(x)
x = keras.layers.Dropout(rate=0.1)(x)
outputs = keras.layers.Dense(5, activation='softmax')(x)

In [55]:
model = keras.Model(inputs = inputs, outputs=outputs)

In [56]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [57]:
history = model.fit(all_inputs, y_score, epochs=40, batch_size=64)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [58]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 24)]              0         
                                                                 
 positional_embedding_4 (Pos  (None, 24, 32)           286528    
 itionalEmbedding)                                               
                                                                 
 transformer_2 (Transformer)  (None, 24, 32)           16928     
                                                                 
 global_average_pooling1d_1   (None, 32)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dropout_8 (Dropout)         (None, 32)                0         
                                                                 
 dense_8 (Dense)             (None, 20)                660 

In [68]:
model.evaluate(all_inputs[:1000], y_score[:1000])



[0.1960495114326477, 0.9139999747276306]

In [67]:
y_score.shape

(36473,)