In [1]:
import nltk
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding
import numpy as np

input_texts = ['I love NLP', 'He plays football']
target_texts = [['PRON', 'VERB', 'NOUN'], ['PRON', 'VERB', 'NOUN']]

word_vocab = sorted(set(word for sent in input_texts for word in sent.split()))
tag_vocab = sorted(set(tag for tags in target_texts for tag in tags))

word2idx = {word: i + 1 for i, word in enumerate(word_vocab)}
tag2idx = {tag: i for i, tag in enumerate(tag_vocab)}

max_encoder_seq_length = max(len(sent.split()) for sent in input_texts)
max_decoder_seq_length = max(len(tags) for tags in target_texts)

encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length), dtype='int32')
for i, sent in enumerate(input_texts):
    for t, word in enumerate(sent.split()):
        encoder_input_data[i, t] = word2idx[word]

decoder_input_data = np.zeros((len(target_texts), max_decoder_seq_length), dtype='int32')
for i, tags in enumerate(target_texts):
    for t, tag in enumerate(tags):
        decoder_input_data[i, t] = tag2idx[tag]

decoder_output_data = np.zeros((len(target_texts), max_decoder_seq_length, 1), dtype='int32')
for i, tags in enumerate(target_texts):
    for t, tag in enumerate(tags):
        decoder_output_data[i, t, 0] = tag2idx[tag]

encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(input_dim=len(word_vocab) + 1, output_dim=64)(encoder_inputs)
encoder = LSTM(64, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embedding)

decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(input_dim=len(tag_vocab), output_dim=64)(decoder_inputs)
decoder_lstm = LSTM(64, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])
decoder_dense = Dense(len(tag_vocab), activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit([encoder_input_data, decoder_input_data], decoder_output_data, batch_size=2, epochs=10)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3333 - loss: 1.0964
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.8333 - loss: 1.0916
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 1.0000 - loss: 1.0867
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 1.0000 - loss: 1.0818
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 1.0000 - loss: 1.0767
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 1.0000 - loss: 1.0715
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 1.0000 - loss: 1.0660
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 1.0000 - loss: 1.0602
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x1ce4216d130>