#EXP 12 - NAME ENTITY RECOGNITION - LSTM

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Sample training data (replace this with your actual data)
training_data = [
    [("John", "B-PER"), ("Smith", "I-PER"), ("lives", "O"), ("in", "O"), ("New", "B-LOC"), ("York", "I-LOC")],
    [("Mary", "B-PER"), ("works", "O"), ("in", "O"), ("San", "B-LOC"), ("Francisco", "I-LOC")]
]

# Create a vocabulary and labels set
words = set()
labels = set()
for sentence in training_data:
    for word, label in sentence:
        words.add(word)
        labels.add(label)

# Assign indices to words and labels
word2idx = {word: idx + 1 for idx, word in enumerate(words)}
label2idx = {label: idx for idx, label in enumerate(labels)}

# Convert sentences to sequences of indices
X = [[word2idx[word] for word, _ in sentence] for sentence in training_data]
y = [[label2idx[label] for _, label in sentence] for sentence in training_data]

# Pad sequences to ensure equal length
X = pad_sequences(X)
y = pad_sequences(y)

# Convert labels to one-hot encoding
y = [to_categorical(seq, num_classes=len(labels)) for seq in y]

# Build the LSTM model with additional modifications
model = Sequential()
model.add(Embedding(input_dim=len(words) + 1, output_dim=100, input_length=X.shape[1]))
model.add(Bidirectional(LSTM(units=200, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))
model.add(LSTM(units=100, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=len(labels), activation='softmax'))

# Compile the model with learning rate scheduling
lr_schedule = ExponentialDecay(initial_learning_rate=0.001, decay_steps=10000, decay_rate=0.9)
model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on more epochs
model.fit(X, np.array(y), epochs=30, batch_size=1)

# Sample test data for prediction (replace this with your actual test data)
test_sentence = [("John",), ("lives",), ("in",), ("New",), ("York",)]

# Convert test sentence to sequence of indices using the same word2idx mapping
X_test = [[word2idx.get(word, 0) for word in test_sentence]]

# Pad the sequence
X_test = pad_sequences(X_test, maxlen=X.shape[1])

# Make predictions
predictions = model.predict(X_test)

# Convert predictions to labels
predicted_labels = [list(labels)[np.argmax(pred)] for pred in predictions[0]]

# Print the results
for (word, ), label in zip(test_sentence, predicted_labels):
    print(f"Word: {word}, Predicted Label: {label}")


In [None]:
#importing all the libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, LSTM, Dropout,Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers.schedules import ExponentialDecay


In [None]:
#trainign data is going to be a list of tuples, takek 2 sentences as 2 seperate lists of tupoles
train=[
     [("John", "B-PER"), ("Smith", "I-PER"), ("lives", "O"), ("in", "O"), ("New", "B-LOC"), ("York", "I-LOC")],
    [("Mary", "B-PER"), ("works", "O"), ("in", "O"), ("San", "B-LOC"), ("Francisco", "I-LOC")]
]

In [None]:
#put each of the wordsand labels into sets
words=set()
labels=set()
for sentence in train:
  for w,l in sentence:
    words.add(w),
    labels.add(l)


In [None]:
print(words)
print(labels)

{'York', 'John', 'lives', 'San', 'Francisco', 'in', 'New', 'Mary', 'works', 'Smith'}
{'B-PER', 'O', 'B-LOC', 'I-PER', 'I-LOC'}


In [None]:
#give each of these words indices
words2idx={word:idx+1 for idx,word in enumerate(words)}
labels2idx={label:idx for idx,label in enumerate(labels)}

In [None]:
print(words2idx)
print(labels2idx)

{'York': 1, 'John': 2, 'lives': 3, 'San': 4, 'Francisco': 5, 'in': 6, 'New': 7, 'Mary': 8, 'works': 9, 'Smith': 10}
{'B-PER': 0, 'O': 1, 'B-LOC': 2, 'I-PER': 3, 'I-LOC': 4}


In [None]:
#convert each of the sentences to sequences of indices
x=[[words2idx[word] for word,_ in sentence] for sentence in train]
y=[[labels2idx[word] for _,word in sentence] for sentence in train]

In [None]:
print(x)

[[2, 10, 3, 6, 7, 1], [8, 9, 6, 4, 5]]


In [None]:
for sentence in train:
  for word,_ in sentence:
    print(word)

John
Smith
lives
in
New
York
Mary
works
in
San
Francisco


In [None]:
x=pad_sequences(x)
y=pad_sequences(y)


In [None]:
y=[to_categorical(val,num_classes=len(labels)) for val in y]

In [None]:
print(y)

[array([[1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.]], dtype=float32), array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.]], dtype=float32)]


In [None]:
model=Sequential()
model.add(Embedding(input_dim=len(words)+1,output_dim=100, input_length=x.shape[1]))
model.add(Bidirectional(LSTM(units=200,return_sequences=True,dropout=0.2, recurrent_dropout=0.2)))

model.add(LSTM(units=100, return_sequences=True,dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=len(labels), activation='softmax'))

lr_schedule = ExponentialDecay(initial_learning_rate=0.001, decay_steps=10000, decay_rate=0.9)
model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.fit(x,np.array(y), epochs=50, batch_size=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7c9939cae530>

In [None]:
test_sentence = [("John",), ("lives",), ("in",), ("New",), ("York",)]

#converting this to sequences
xtest=[[words2idx.get(word,0) for word in test_sentence]]
xtest = pad_sequences(xtest, maxlen=x.shape[1])

predictions=model.predict(xtest)

prediction_label=[list(labels)[np.argmax(pred)] for pred in predictions[0]]
print(prediction_label)

['B-PER', 'B-PER', 'O', 'O', 'B-LOC', 'I-LOC']


In [None]:
print(predictions)


[[[9.9061841e-01 2.4934110e-04 1.3082847e-04 8.9198891e-03 8.1635539e-05]
  [6.4945531e-01 7.6214159e-03 5.6636095e-04 3.4226242e-01 9.4435360e-05]
  [2.4554125e-04 9.9697709e-01 6.7720964e-04 2.0971613e-03 3.0159317e-06]
  [1.2479983e-04 9.1122401e-01 8.8117555e-02 4.5829162e-04 7.5249030e-05]
  [5.6584147e-05 9.4424631e-04 6.7289615e-01 1.0659680e-04 3.2599637e-01]
  [1.4043136e-06 1.0932634e-06 4.8692862e-04 8.2960378e-07 9.9950969e-01]]]


In [None]:
print(predictions.shape)

(1, 6, 5)


In [None]:
print(predictions[0])

[[9.9061841e-01 2.4934110e-04 1.3082847e-04 8.9198891e-03 8.1635539e-05]
 [6.4945531e-01 7.6214159e-03 5.6636095e-04 3.4226242e-01 9.4435360e-05]
 [2.4554125e-04 9.9697709e-01 6.7720964e-04 2.0971613e-03 3.0159317e-06]
 [1.2479983e-04 9.1122401e-01 8.8117555e-02 4.5829162e-04 7.5249030e-05]
 [5.6584147e-05 9.4424631e-04 6.7289615e-01 1.0659680e-04 3.2599637e-01]
 [1.4043136e-06 1.0932634e-06 4.8692862e-04 8.2960378e-07 9.9950969e-01]]


In [None]:
print(labels)

{'B-PER', 'O', 'B-LOC', 'I-PER', 'I-LOC'}


In [None]:
for i in predictions[0]:
  print(np.argmax(i))

0
0
1
1
2
4
