In [12]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, LSTM, Bidirectional, TimeDistributed, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from seqeval.metrics import classification_report as seqeval_report
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import classification_report
from tests import test_data, test_model, test_validation_accuracy
import random

Write a function to load the dataset and fill missing values.


In [13]:
def load_dataset(filepath):
    """Load the dataset and fill missing values."""
    data = pd.read_csv(filepath, encoding="latin1").fillna(method="ffill")
    test_data(data)
    return data


Write a class to aggregate sentences from the dataset.


In [14]:
class SentenceGetter:
    def __init__(self, data):
        self.sentences = self.aggregate_sentences(data)

    def aggregate_sentences(self, data):
        agg_func = lambda s: [(w, t) for w, t in zip(s["Word"].values, s["Tag"].values)]
        return data.groupby("Sentence #").apply(agg_func).tolist()

Write a function to create vocabulary and tag mappings.


In [15]:
#%%writefile create_mappings.py

def create_mappings(data):
    """Create mappings for words and tags."""
    words = list(set(data["Word"].values))
    words.append("PAD")
    tags = list(set(data["Tag"].values))

    word2idx = {w: i for i, w in enumerate(words)}
    tag2idx = {t: i for i, t in enumerate(tags)}
    idx2tag = {i: t for t, i in tag2idx.items()}

    return words, tags, word2idx, tag2idx, idx2tag

Data Preparation
Write functions to prepare sequences and pad them.


In [16]:
def prepare_sequences(sentences, word2idx, tag2idx, max_len):
    """Prepare padded sequences for words and tags."""
    X = [[word2idx.get(w[0], word2idx["PAD"]) for w in s] for s in sentences]
    y = [[tag2idx.get(w[1], tag2idx["O"]) for w in s] for s in sentences]

    X = pad_sequences(X, maxlen=max_len, padding="post")
    y = pad_sequences(y, maxlen=max_len, padding="post", value=tag2idx["O"])
    
    y = tf.keras.utils.to_categorical(y, num_classes=len(tag2idx))
    return X, y

Model Definition
Write a function to define the model using the Keras functional API.

In [17]:
# Build Model
EMBEDDING_DIM = 128
LSTM_UNITS = 128
DROPOUT_RATE = 0.4
BATCH_SIZE = 64
EPOCHS = 3
LEARNING_RATE = 0.002

In [18]:
def build_model(input_dim, output_dim, input_length, lstm_units, dropout_rate):
    """Build the BiLSTM model."""
    input_layer = Input(shape=(input_length,))
    embedding = Embedding(input_dim=input_dim, output_dim=output_dim, input_length=input_length)(input_layer)
    dropout1 = Dropout(dropout_rate)(embedding)

    lstm1 = Bidirectional(LSTM(lstm_units, return_sequences=True, dropout=0.3, recurrent_dropout=0.3))(dropout1)
    dropout2 = Dropout(dropout_rate)(lstm1)
    lstm2 = Bidirectional(LSTM(lstm_units // 2, return_sequences=True, dropout=0.3, recurrent_dropout=0.3))(dropout2)
    dropout3 = Dropout(dropout_rate)(lstm2)

    output = TimeDistributed(Dense(len(tag2idx), activation="softmax"))(dropout3)

    model = Model(input_layer, output)
    
    test_model(model, input_dim, EMBEDDING_DIM, input_length, LSTM_UNITS, len(tag2idx))

    return model

Model Training
Write a function to compile, train, and evaluate the model.

In [19]:
def train_model(model, X_train, y_train, batch_size, epochs, learning_rate):
    """Compile and train the model."""
    optimizer = Nadam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

    lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1, min_lr=1e-5)

    history = model.fit(
        X_train, y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.1,
        verbose=1,
        callbacks=[lr_scheduler]
    )
    test_validation_accuracy(history)
    return history

Model Evaluation
Write a function to evaluate the model and generate predictions.

In [20]:
def evaluate_model(model, X_test, y_test, idx2tag):
    """Evaluate the model and generate predictions."""
    y_pred = model.predict(X_test)
    y_pred_tags = np.argmax(y_pred, axis=-1)
    y_true_tags = np.argmax(y_test, axis=-1)

    y_pred_flat = [idx2tag[i] for row in y_pred_tags for i in row]
    y_true_flat = [idx2tag[i] for row in y_true_tags for i in row]

    report = classification_report(y_true_flat, y_pred_flat)
    return report, y_pred_tags, y_true_tags

Display Predictions
Write a function to display random sample predictions.

In [21]:
def decode_tags(sentence_idx, y_pred_tags, y_true_tags, idx2tag, X_test, idx2word):
    """Decode tags for a given sentence index."""
    words = [idx2word[idx] for idx in X_test[sentence_idx] if idx != word2idx["PAD"]]
    true_tags = [idx2tag[idx] for idx in y_true_tags[sentence_idx] if idx != tag2idx["O"]]
    pred_tags = [idx2tag[idx] for idx in y_pred_tags[sentence_idx] if idx != tag2idx["O"]]
    return words, true_tags, pred_tags


def display_sample_predictions(X_test, y_pred_tags, y_true_tags, idx2tag, word2idx):
    """Display random sample predictions."""
    idx2word = {i: w for w, i in word2idx.items()}
    num_examples = 5
    print("\nSample Predictions vs Actual Tags:\n")

    for _ in range(num_examples):
        sentence_idx = random.randint(0, len(X_test) - 1)
        words, true_tags, pred_tags = decode_tags(sentence_idx, y_pred_tags, y_true_tags, idx2tag, X_test, idx2word)
        print("Sentence: ", " ".join(words))
        print("Actual Tags: ", " ".join(true_tags))
        print("Predicted Tags: ", " ".join(pred_tags))
        print("-" * 60)

Main Function
Bring everything together in a main function.

In [22]:
# Load Dataset
filepath = "ner_dataset.csv"
data = load_dataset(filepath)

# Prepare Sentences
getter = SentenceGetter(data)
sentences = getter.sentences

# Create Mappings
words, tags, word2idx, tag2idx, idx2tag = create_mappings(data)

# Prepare Data
MAX_LEN = 50
X, y = prepare_sequences(sentences, word2idx, tag2idx, MAX_LEN)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

model = build_model(len(words), EMBEDDING_DIM, MAX_LEN, LSTM_UNITS, DROPOUT_RATE)
model.summary()


# Train Model
train_model(model, X_train, y_train, BATCH_SIZE, EPOCHS, LEARNING_RATE)

# Evaluate Model
report, y_pred_tags, y_true_tags = evaluate_model(model, X_test, y_test, idx2tag)
print("Classification Report:\n", report)

# Display Sample Predictions
display_sample_predictions(X_test, y_pred_tags, y_true_tags, idx2tag, word2idx)

Data shape is correct.
Model test passed with 4932625 trainable parameters!
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 50)]              0         
                                                                 
 embedding_1 (Embedding)     (None, 50, 128)           4502912   
                                                                 
 dropout_3 (Dropout)         (None, 50, 128)           0         
                                                                 
 bidirectional_2 (Bidirecti  (None, 50, 256)           263168    
 onal)                                                           
                                                                 
 dropout_4 (Dropout)         (None, 50, 256)           0         
                                                                 
 bidirectional_3 (Bidirecti  (None, 50, 128)     



Validation accuracy is sufficient: 0.98 (>= 0.25).


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
               precision    recall  f1-score   support

       B-art       0.00      0.00      0.00        46
       B-eve       0.00      0.00      0.00        35
       B-geo       0.82      0.88      0.85      3796
       B-gpe       0.92      0.91      0.92      1592
       B-nat       0.00      0.00      0.00        20
       B-org       0.68      0.64      0.66      2053
       B-per       0.84      0.68      0.75      1666
       B-tim       0.90      0.76      0.83      2032
       I-art       0.00      0.00      0.00        40
       I-eve       0.00      0.00      0.00        39
       I-geo       0.74      0.74      0.74       712
       I-gpe       0.00      0.00      0.00        16
       I-nat       0.00      0.00      0.00         7
       I-org       0.72      0.70      0.71      1697
       I-per       0.78      0.88      0.82      1657
       I-tim       0.86      0.44      0.58       583
           O       0.99      1.00      1.00    223809

  

  _warn_prf(average, modifier, msg_start, len(result))
