In [2]:
import tensorflow as tf
from transformers import BertTokenizer
import numpy as np
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Attention, Input, TimeDistributed
from tensorflow.keras.models import Model

In [4]:
# from tensorflow.keras.utils import get_custom_objects

# # Add any custom objects if necessary
# custom_objects = get_custom_objects()

# model = tf.keras.models.load_model('encoder_decoder_with_positions.keras', custom_objects=custom_objects)
# model.summary()

I0000 00:00:1734261934.782070   64392 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4273 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [3]:
def intialize_model(VOCAB_SIZE,EMBEDDING_DIM,MAX_LEN,NUM_CLASSES):
    input_seq = Input(shape=(MAX_LEN,), dtype='int32', name="Input_Sequence")

    # Add dropout to embedding layer
    embedding = Embedding(input_dim=VOCAB_SIZE, output_dim=EMBEDDING_DIM, input_length=MAX_LEN, name="Embedding_Layer")(input_seq)
    embedding = tf.keras.layers.Dropout(0.2)(embedding)

    # Add recurrent dropout to LSTM
    encoder_outputs, forward_h, forward_c, backward_h, backward_c = Bidirectional(
        LSTM(128, return_sequences=True, return_state=True, 
             dropout=0.2, recurrent_dropout=0.2,
             kernel_regularizer=tf.keras.regularizers.l2(0.01),
             name="Encoder_LSTM"),
        name="Bidirectional_LSTM"
    )(embedding)

    state_h = tf.keras.layers.Concatenate()([forward_h, backward_h])
    state_c = tf.keras.layers.Concatenate()([forward_c, backward_c])

    # Add dropout and regularization to decoder LSTM
    decoder_lstm = LSTM(256, return_sequences=True,
                       dropout=0.2, recurrent_dropout=0.2,
                       kernel_regularizer=tf.keras.regularizers.l2(0.01),
                       name="Decoder_LSTM")
    decoder_outputs = decoder_lstm(encoder_outputs, initial_state=[state_h, state_c])

    attention = Attention(name="Attention_Layer")([decoder_outputs, encoder_outputs])
    
    # Add dropout after attention
    attention = tf.keras.layers.Dropout(0.2)(attention)

    combined = tf.keras.layers.Concatenate()([decoder_outputs, attention])

    # Add batch normalization and dropout before final layer
    combined = tf.keras.layers.BatchNormalization()(combined)
    combined = tf.keras.layers.Dropout(0.2)(combined)

    output = TimeDistributed(Dense(NUM_CLASSES, 
                                 activation="softmax",
                                 kernel_regularizer=tf.keras.regularizers.l2(0.01)),
                           name="Output_Layer")(combined)

    model = Model(inputs=input_seq, outputs=output, name="Encoder_Decoder_NER")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 loss="sparse_categorical_crossentropy",
                 metrics=["accuracy"])

    model.summary()

    return model

In [4]:
def load_model_from_checkpoint(checkpoint_path, VOCAB_SIZE, EMBEDDING_DIM, MAX_LEN, NUM_CLASSES):
    # Initialize the model architecture
    model = intialize_model(VOCAB_SIZE, EMBEDDING_DIM, MAX_LEN, NUM_CLASSES)
    
    # Load weights from checkpoint
    model.load_weights(checkpoint_path)
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 loss="sparse_categorical_crossentropy",
                 metrics=["accuracy"])
    
    return model

In [5]:
model = load_model_from_checkpoint("model_checkpoint1.weights.h5", 30522, 128, 30, 33)

I0000 00:00:1734292745.378642  149374 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4273 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


  saveable.load_own_variables(weights_store.get(inner_path))


In [6]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


In [7]:
entity_labels=np.load('entity_labels.npy',allow_pickle=True)
entity_labels

array(['B-QUANTITY', 'I-QUANTITY', 'B-STYLE', 'I-STYLE', 'B-SIZE',
       'I-SIZE', 'B-TOPPING', 'I-TOPPING', 'B-NOT_VOLUME', 'I-NOT_VOLUME',
       'B-NOT_TOPPING', 'I-NOT_TOPPING', 'B-NOT_SIZE', 'I-NOT_SIZE',
       'B-NUMBER', 'I-NUMBER', 'B-NOT_STYLE', 'I-NOT_STYLE', 'B-VOLUME',
       'I-VOLUME', 'B-CONTAINERTYPE', 'I-CONTAINERTYPE',
       'B-NOT_CONTAINERTYPE', 'I-NOT_CONTAINERTYPE', 'B-NOT_NUMBER',
       'I-NOT_NUMBER', 'B-NOT_DRINKTYPE', 'I-NOT_DRINKTYPE',
       'B-DRINKTYPE', 'I-DRINKTYPE', 'B-NOT_QUANTITY', 'I-NOT_QUANTITY'],
      dtype='<U19')

In [9]:
entities_id = {e.item(): i+1 for i, e in enumerate(entity_labels)}
entities_id['0']=0
entities_id['O']=0
entities_id

{'B-QUANTITY': 1,
 'I-QUANTITY': 2,
 'B-STYLE': 3,
 'I-STYLE': 4,
 'B-SIZE': 5,
 'I-SIZE': 6,
 'B-TOPPING': 7,
 'I-TOPPING': 8,
 'B-NOT_VOLUME': 9,
 'I-NOT_VOLUME': 10,
 'B-NOT_TOPPING': 11,
 'I-NOT_TOPPING': 12,
 'B-NOT_SIZE': 13,
 'I-NOT_SIZE': 14,
 'B-NUMBER': 15,
 'I-NUMBER': 16,
 'B-NOT_STYLE': 17,
 'I-NOT_STYLE': 18,
 'B-VOLUME': 19,
 'I-VOLUME': 20,
 'B-CONTAINERTYPE': 21,
 'I-CONTAINERTYPE': 22,
 'B-NOT_CONTAINERTYPE': 23,
 'I-NOT_CONTAINERTYPE': 24,
 'B-NOT_NUMBER': 25,
 'I-NOT_NUMBER': 26,
 'B-NOT_DRINKTYPE': 27,
 'I-NOT_DRINKTYPE': 28,
 'B-DRINKTYPE': 29,
 'I-DRINKTYPE': 30,
 'B-NOT_QUANTITY': 31,
 'I-NOT_QUANTITY': 32,
 '0': 0,
 'O': 0}

In [10]:
reversed_entities_id = {v: k for k, v in entities_id.items() if k != 0}
reversed_entities_id[0]='O'

In [11]:
def get_prediction(sentence):
    sentence = sentence.split()
    encoded_input = tokenizer(sentence, 
                          truncation=True, 
                          padding="max_length", 
                          max_length=30, 
                          is_split_into_words=True)

    input_ids = np.array([encoded_input["input_ids"]]) 

    raw_predictions = model.predict(input_ids)

    predicted_label_indices = tf.argmax(raw_predictions, axis=-1).numpy()  # Shape: (batch_size, seq_len)

    predicted_label_indices = predicted_label_indices[0]
    output=predicted_label_indices[1:len(sentence)+1]
    output_entities=[reversed_entities_id[i] for i in output]
    return output_entities

In [12]:
sentence = input("Enter a sentence: ")

preds=get_prediction(sentence)
for word, label in zip(sentence.split(), preds):
    print(f"{word}--> {label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
i--> O
want--> O
a--> B-NUMBER
pizza--> O
with--> O
pesto--> B-TOPPING
and--> B-TOPPING
mushrooms--> O
but--> B-TOPPING
no--> O
pineapple--> O
