In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

vocab_size = 1000
max_len = 20

# Load IMDb dataset
(X_train, _), _ = imdb.load_data(num_words=vocab_size)
X_train = pad_sequences(X_train[:1000], maxlen=max_len)

# Dummy entity labels (0 = Non-Entity, 1 = Entity)
y_train = np.random.randint(0, 2, size=(X_train.shape[0], max_len))

# Build NER model
model = Sequential([
    Embedding(vocab_size, 64, input_length=max_len),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dense(2, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.fit(X_train, y_train, epochs=2, batch_size=32)

# ---- OUTPUT PART ----
sample = X_train[0].reshape(1, max_len)
prediction = model.predict(sample)

predicted_labels = np.argmax(prediction, axis=-1)

print("\nNamed Entity Recognition Output:")
for i, label in enumerate(predicted_labels[0]):
    print(f"Token {i+1}: Entity" if label == 1 else f"Token {i+1}: Non-Entity")

Epoch 1/2
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 24ms/step - accuracy: 0.4979 - loss: 0.6932
Epoch 2/2
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.5331 - loss: 0.6917
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339ms/step

Named Entity Recognition Output:
Token 1: Entity
Token 2: Entity
Token 3: Entity
Token 4: Entity
Token 5: Non-Entity
Token 6: Non-Entity
Token 7: Non-Entity
Token 8: Non-Entity
Token 9: Entity
Token 10: Entity
Token 11: Entity
Token 12: Entity
Token 13: Entity
Token 14: Entity
Token 15: Entity
Token 16: Entity
Token 17: Entity
Token 18: Entity
Token 19: Entity
Token 20: Entity
