In [2]:
import tensorflow as tf
from transformers import BertTokenizer
import numpy as np
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Attention, Input, TimeDistributed
from tensorflow.keras.models import Model

2024-12-15 21:51:16.803646: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734292276.856730  147381 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734292276.872919  147381 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-15 21:51:16.997291: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def intialize_model(VOCAB_SIZE,EMBEDDING_DIM,MAX_LEN,NUM_CLASSES):
    input_seq = Input(shape=(MAX_LEN,), dtype='int32', name="Input_Sequence")

    # Add dropout to embedding layer
    embedding = Embedding(input_dim=VOCAB_SIZE, output_dim=EMBEDDING_DIM, input_length=MAX_LEN, name="Embedding_Layer")(input_seq)
    embedding = tf.keras.layers.Dropout(0.2)(embedding)

    # Add recurrent dropout to LSTM
    encoder_outputs, forward_h, forward_c, backward_h, backward_c = Bidirectional(
        LSTM(128, return_sequences=True, return_state=True, 
             dropout=0.2, recurrent_dropout=0.2,
             kernel_regularizer=tf.keras.regularizers.l2(0.01),
             name="Encoder_LSTM"),
        name="Bidirectional_LSTM"
    )(embedding)

    state_h = tf.keras.layers.Concatenate()([forward_h, backward_h])
    state_c = tf.keras.layers.Concatenate()([forward_c, backward_c])

    # Add dropout and regularization to decoder LSTM
    decoder_lstm = LSTM(256, return_sequences=True,
                       dropout=0.2, recurrent_dropout=0.2,
                       kernel_regularizer=tf.keras.regularizers.l2(0.01),
                       name="Decoder_LSTM")
    decoder_outputs = decoder_lstm(encoder_outputs, initial_state=[state_h, state_c])

    attention = Attention(name="Attention_Layer")([decoder_outputs, encoder_outputs])
    
    # Add dropout after attention
    attention = tf.keras.layers.Dropout(0.2)(attention)

    combined = tf.keras.layers.Concatenate()([decoder_outputs, attention])

    # Add batch normalization and dropout before final layer
    combined = tf.keras.layers.BatchNormalization()(combined)
    combined = tf.keras.layers.Dropout(0.2)(combined)

    output = TimeDistributed(Dense(NUM_CLASSES, 
                                 activation="softmax",
                                 kernel_regularizer=tf.keras.regularizers.l2(0.01)),
                           name="Output_Layer")(combined)

    model = Model(inputs=input_seq, outputs=output, name="Encoder_Decoder_NER")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 loss="sparse_categorical_crossentropy",
                 metrics=["accuracy"])

    model.summary()

    return model

In [4]:
def load_model_from_checkpoint(checkpoint_path, VOCAB_SIZE, EMBEDDING_DIM, MAX_LEN, NUM_CLASSES):
    # Initialize the model architecture
    model = intialize_model(VOCAB_SIZE, EMBEDDING_DIM, MAX_LEN, NUM_CLASSES)
    
    # Load weights from checkpoint
    model.load_weights(checkpoint_path)
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 loss="sparse_categorical_crossentropy",
                 metrics=["accuracy"])
    
    return model

In [5]:
model = load_model_from_checkpoint("checkpoints/model_2_01-0.01.weights.h5", 30522, 128, 30, 5)

I0000 00:00:1734292281.445239  147381 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4273 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


  saveable.load_own_variables(weights_store.get(inner_path))


In [6]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


In [7]:
entity_labels=np.load('entity_labels2.npy',allow_pickle=True)
entity_labels

array(['B-PIZZAORDER', 'I-PIZZAORDER', 'B-DRINKORDER', 'I-DRINKORDER'],
      dtype='<U12')

In [8]:
entities_id = {e.item(): i+1 for i, e in enumerate(entity_labels)}
entities_id['0']=0
entities_id['O']=0
entities_id

{'B-PIZZAORDER': 1,
 'I-PIZZAORDER': 2,
 'B-DRINKORDER': 3,
 'I-DRINKORDER': 4,
 '0': 0,
 'O': 0}

In [9]:
reversed_entities_id = {v: k for k, v in entities_id.items() if k != 0}
reversed_entities_id[0]='O'

In [10]:
def get_prediction(sentence):
    sentence = sentence.split()
    encoded_input = tokenizer(sentence, 
                          truncation=True, 
                          padding="max_length", 
                          max_length=30, 
                          is_split_into_words=True)

    input_ids = np.array([encoded_input["input_ids"]]) 

    raw_predictions = model.predict(input_ids)

    predicted_label_indices = tf.argmax(raw_predictions, axis=-1).numpy()  # Shape: (batch_size, seq_len)

    predicted_label_indices = predicted_label_indices[0]
    output=predicted_label_indices[1:len(sentence)+1]
    output_entities=[reversed_entities_id[i] for i in output]
    return output_entities

In [14]:
sentence = input("Enter a sentence: ")

preds=get_prediction(sentence)
for word, label in zip(sentence.split(), preds):
    print(f"{word}--> {label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
i--> O
would--> O
like--> O
to--> B-PIZZAORDER
have--> I-PIZZAORDER
one--> I-PIZZAORDER
pie--> I-PIZZAORDER
along--> I-PIZZAORDER
with--> I-PIZZAORDER
ham--> I-PIZZAORDER
and--> I-PIZZAORDER
olives--> I-PIZZAORDER
without--> I-PIZZAORDER
pepperoni--> O
