## Named Entity Recognition using Bidirectional LSTM

**Bidirectional LSTM (BiLSTM)** is an extension of the traditional LSTM, which is a type of recurrent neural network (RNN) designed to handle sequential data effectively. While standard LSTMs process input data in one direction (typically from past to future), BiLSTMs process data in both forward and backward directions. This dual processing allows BiLSTMs to capture information from both past and future contexts of a sequence, making them particularly powerful for tasks that rely on context.

Key Features:
Forward and Backward Layers: A BiLSTM has two LSTM layers:
Forward Layer: Processes the sequence from the beginning to the end.
Backward Layer: Processes the sequence from the end to the beginning.
Concatenated Outputs: The outputs of the forward and backward layers are typically concatenated at each time step, providing a richer representation of the input sequence.


Official Keras documentation for Bi-directional LSTM: [Bi-drectional LSTM](https://keras.io/api/layers/recurrent_layers/bidirectional/)

## Imports and Libraries

In [5]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, LSTM, Bidirectional, TimeDistributed, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from seqeval.metrics import classification_report as seqeval_report
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import classification_report
from tests import test_data, test_model, test_validation_accuracy
import random

## Loading data

In [None]:
def load_dataset(filepath):
    data = # TODO:Load the dataset and fill missing values
    test_data(data)
    return data

## Creating mappings and sequences

In [None]:
class SentenceGetter:
    def __init__(self, data):
        self.sentences = self.aggregate_sentences(data)

    def aggregate_sentences(self, data):
        agg_func = # TODO: Aggregate sentences
        return data.groupby("Sentence #").apply(agg_func).tolist()

In [None]:
def create_mappings(data):
    #TODO: Create mappings for words and tags
    # CODE STARTS HERE

    #CODE ENDS HERE
    return words, tags, word2idx, tag2idx, idx2tag

In [None]:
def prepare_sequences(sentences, word2idx, tag2idx, max_len):
    # TODO: Prepare data for training by padding sequences for words and tags
    return X, y

## Model building

In [None]:

#TODO: Build Model
'''
Embedding dimension 128
128 lstm units
dropout rate of 0.4
batch size 64
3 epochs
learning rate of 0.002
'''
EMBEDDING_DIM = 
LSTM_UNITS = 
DROPOUT_RATE = 
BATCH_SIZE = 
EPOCHS =
LEARNING_RATE = 


In [None]:

def build_model(input_dim, output_dim, input_length, lstm_units, dropout_rate):
    """Build the BiLSTM model."""
    input_layer = # TODO: create an input layer of length input 
    embedding = # TODO: create an embedding layer for input layer of input dimension, output dimension, input length 
    dropout1 = # TODO: create a dropout layer with specified dropout rate for embedding layer

    lstm1 = # # TODO: Apply BIDIRECTIONAL LSTM on the output of 1st dropout layer
    dropout2 = # TODO: Apply dropout to 1st LSTM 
    lstm2 = # TODO: Processes the output of the first LSTM layer with half the neurons for further refinement
    dropout3 = # TODO: Apply dropout to 2nd LSTM 

    output = TimeDistributed(Dense(len(tag2idx), activation="softmax"))(dropout3)

    model = Model(input_layer, output)
    
    test_model(model, input_dim, EMBEDDING_DIM, input_length, LSTM_UNITS, len(tag2idx))

    return model

## Training model

In [None]:
def train_model(model, X_train, y_train, batch_size, epochs, learning_rate):
    """Compile and train the model."""
    optimizer = # TODO: Initialize Nadam optimizer with learning_rate parameter 

    # TODO: Compile model with categorical crossentropy loss and accuracy as metrics

    lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1, min_lr=1e-5)

    history = # TODO: Train the model by passing training data, specifying batch size, epochs, 0.1 as validation split, lr_scheduler as callback
    test_validation_accuracy(history)
    return history

## Model evaluation

In [None]:
def evaluate_model(model, X_test, y_test, idx2tag):
    """Evaluate the model and generate predictions."""
    y_pred = # TODO: Predict using the model
    y_pred_tags = # TODO: select the index of the maximum probability class from predicted value
    y_true_tags = # TODO: select the index of the maximum probability class from test value

    y_pred_flat = [idx2tag[i] for row in y_pred_tags for i in row]
    y_true_flat = [idx2tag[i] for row in y_true_tags for i in row]

    report = # TODO: Create classification report using flattened true and predicted values
    return report, y_pred_tags, y_true_tags

## Decoding tags

In [None]:
def decode_tags(sentence_idx, y_pred_tags, y_true_tags, idx2tag, X_test, idx2word):
    words = # TODO: Retrieves the words in the sentence at sentence_idx from X_test using idx2word excluding any padding

    true_tags = # TODO: Retrieves the true tags for the sentence at sentence_idx from y_true_tags using idx2tag. exclude "O" tag
    
    pred_tags = # TODO: Similar to the above, but for the predicted tags (y_pred_tags)

    return words, true_tags, pred_tags

def display_sample_predictions(X_test, y_pred_tags, y_true_tags, idx2tag, word2idx):
    """Display random sample predictions."""
    idx2word = {i: w for w, i in word2idx.items()}
    num_examples = 5
    print("\nSample Predictions vs Actual Tags:\n")

    for _ in range(num_examples):
        sentence_idx = random.randint(0, len(X_test) - 1)

        words, true_tags, pred_tags = # TODO: Call decode_tags function
        
        print("Sentence: ", " ".join(words))
        print("Actual Tags: ", " ".join(true_tags))
        print("Predicted Tags: ", " ".join(pred_tags))
        print("-" * 60)

## Putting it all together

In [None]:
# Load Dataset
filepath = "ner_dataset.csv"
data = # TODO: Load data using the filepath

# Prepare Sentences
getter = # TODO: Call SentenceGetter function
sentences = # TODO: Get sentences from getter

words, tags, word2idx, tag2idx, idx2tag = # TODO: Create mappings

# Prepare Data
MAX_LEN = 50
X, y = # TODO: Prepare sequences

X_train, X_test, y_train, y_test = # TODO: Create train test split with test size as 0.1 and random state 42

model = # TODO: Build model with below parameters or by passing the below parameters:
'''
Embedding dimension 128
128 lstm units
dropout rate of 0.4
batch size 64
3 epochs
learning rate of 0.002
'''
model.summary()

# TODO: Train the model

# Evaluate Model
report, y_pred_tags, y_true_tags = # TODO: Call evaluation method
print("Classification Report:\n", report)

# TODO: Display Sample Predictions
