In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, concatenate, Bidirectional, LSTM, Dense, TimeDistributed
from tensorflow.keras.models import Model

In [2]:
MAX_LEN = 10
EMBEDDING_DIM = 10
LSTM_DIM = 50
NUM_CLASSES = 2
EPOCHS = 20
BATCH_SIZE = 16

In [3]:
# INPUTS
texts = ['él vino', 'vino él'] * 100
poses = ['PRON VERB', 'VERB PRON'] * 100

# OUTPUTS
labels = ['SUBJ ROOT', 'ROOT SUBJ'] * 100
dependencies = [[[2], [0]], [[0], [1]]] * 100

In [4]:
texts_tokenizer = tf.keras.preprocessing.text.Tokenizer()
poses_tokenizer = tf.keras.preprocessing.text.Tokenizer()
labels_tokenizer = tf.keras.preprocessing.text.Tokenizer()

texts_tokenizer.fit_on_texts(texts)
poses_tokenizer.fit_on_texts(poses)
labels_tokenizer.fit_on_texts(labels)

X1 = np.array(texts_tokenizer.texts_to_sequences(texts))
X2 = np.array(poses_tokenizer.texts_to_sequences(poses))

y1 = np.reshape(labels_tokenizer.texts_to_sequences(labels), (len(labels), -1, 1))
y2 = np.array(dependencies)

In [5]:
input_text = Input(shape=(None,), dtype='int32')
input_pos = Input(shape=(None,), dtype='int32')

output_text = Embedding(MAX_LEN, EMBEDDING_DIM, trainable=True, mask_zero=True)(input_text)
output_pos = Embedding(MAX_LEN, EMBEDDING_DIM, trainable=True, mask_zero=True)(input_pos)
output = concatenate([output_text, output_pos])
output = Bidirectional(LSTM(LSTM_DIM, return_sequences=True))(output)
output_label = TimeDistributed(Dense(NUM_CLASSES + 1, activation='softmax'))(output)
output_dependency = TimeDistributed(Dense(NUM_CLASSES + 1, activation='softmax'))(output)

model = Model([input_text, input_pos], [output_label, output_dependency])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 10)     100         input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 10)     100         input_2[0][0]                    
______________________________________________________________________________________________

In [6]:
model.fit(x=[X1, X2], y=[y1, y2], epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split = 0.25)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fd0c82b5410>

In [8]:
texts_to_predict = ['él vino']
poses_to_predict = ['PRON NOUN']

X1 = np.array(texts_tokenizer.texts_to_sequences(texts_to_predict))
X2 = np.array(poses_tokenizer.texts_to_sequences(poses_to_predict))
p1, p2 = model.predict([X1, X2])

predicted_labels = labels_tokenizer.sequences_to_texts(
    [[x.index(max(x))] for x in p1[0].tolist()]
    )
predicted_dependencies = [[x.index(max(x))] for x in p2[0].tolist()]

print(predicted_labels)
print(predicted_dependencies)


ValueError: ignored