In [90]:
import pandas as pd

In [91]:
df = pd.read_csv('test_set.csv')

In [92]:
df.head()

Unnamed: 0,id,order
0,0,let me prefer a extra large sauteed spinach an...
1,1,could you give me a exta large pizza with roas...
2,2,can you handle this order a pizza make it etra...
3,3,let me try 5 lnch pizzas with garlic onions an...
4,4,i need you to order me a pie in med size along...


In [93]:
from transformers import BertTokenizer

In [94]:
import tensorflow as tf
from transformers import BertTokenizer
import numpy as np
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Attention, Input, TimeDistributed
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model

In [95]:
from transformers import BertTokenizer, TFBertModel


In [96]:
def create_encoder_decoder_model(bert_model, hidden_dim, num_labels_pizza, num_labels_drinks, max_length):
    # Define BERT input layers
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
    attention_mask_pizza = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name="attention_mask_pizza")
    attention_mask_drinks = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name="attention_mask_drinks")

    for layer in bert_model.layers:
        layer.trainable = False
    # BERT output
    bert_output = bert_model(input_ids=input_ids, attention_mask=attention_mask_pizza)
    bert_embeddings = bert_output.last_hidden_state  # Shape: (batch_size, seq_len, hidden_dim)

    # Add Gaussian noise layer after BERT embeddings
    x = tf.keras.layers.GaussianNoise(0.1)(bert_embeddings)

    # Add Bidirectional LSTM layers
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(hidden_dim, return_sequences=True))(x)
    x = tf.keras.layers.LSTM(hidden_dim, return_sequences=True)(x)

    # Add dropout and regularization to decoder LSTM
    decoder_lstm = tf.keras.layers.LSTM(256, return_sequences=True,
                       dropout=0.2, recurrent_dropout=0.2,
                       kernel_regularizer=tf.keras.regularizers.l2(0.01),
                       name="Decoder_LSTM")
    decoder_outputs = decoder_lstm(x)

    # Project both tensors to the same dimension (256)
    x_projected = tf.keras.layers.Dense(256)(x)
    attention_pizza = tf.keras.layers.Attention(name="Attention_Layer_Pizza")([decoder_outputs, x_projected])
    attention_drinks = tf.keras.layers.Attention(name="Attention_Layer_Drinks")([decoder_outputs, x_projected])
    
    # Add dropout after attention
    attention_pizza = tf.keras.layers.Dropout(0.2)(attention_pizza)
    attention_drinks = tf.keras.layers.Dropout(0.2)(attention_drinks)

    combined_pizza = tf.keras.layers.Concatenate()([decoder_outputs, attention_pizza])
    combined_drinks = tf.keras.layers.Concatenate()([decoder_outputs, attention_drinks])

    # Add batch normalization and dropout before final layers
    combined_pizza = tf.keras.layers.BatchNormalization()(combined_pizza)
    combined_pizza = tf.keras.layers.Dropout(0.2)(combined_pizza)
    combined_drinks = tf.keras.layers.BatchNormalization()(combined_drinks)
    combined_drinks = tf.keras.layers.Dropout(0.2)(combined_drinks)

    # Pizza Output
    pizza_output = tf.keras.layers.TimeDistributed(
        tf.keras.layers.Dense(num_labels_pizza, 
                              activation="softmax", 
                              kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        name="Pizza_Output_Layer"
    )(combined_pizza)

    # Drinks Output
    drinks_output = tf.keras.layers.TimeDistributed(
        tf.keras.layers.Dense(num_labels_drinks, 
                              activation="softmax", 
                              kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        name="Drinks_Output_Layer"
    )(combined_drinks)

    # Define the model with two outputs
    model = tf.keras.Model(inputs=[input_ids, attention_mask_pizza, attention_mask_drinks], outputs=[pizza_output, drinks_output], name="Hybrid_Encoder_Decoder_NER")

    return model

In [97]:
bert_model = TFBertModel.from_pretrained("bert-base-uncased")

model = create_encoder_decoder_model(bert_model, 64, num_labels_pizza=21,num_labels_drinks=21, max_length=30)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


checkpoint_path = "./model_checkpoint.h5"
model.load_weights(checkpoint_path)

model.summary()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Exception encountered when calling layer 'tf_bert_model' (type TFBertModel).

Data of type <class 'keras.src.backend.common.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for attention_mask.

Call arguments received by layer 'tf_bert_model' (type TFBertModel):
  • input_ids=<KerasTensor shape=(None, 30), dtype=int32, sparse=False, name=input_ids>
  • attention_mask=<KerasTensor shape=(None, 30), dtype=int32, sparse=False, name=attention_mask_pizza>
  • token_type_ids=None
  • position_ids=None
  • head_mask=None
  • inputs_embeds=None
  • encoder_hidden_states=None
  • encoder_attention_mask=None
  • past_key_values=None
  • use_cache=None
  • output_attentions=None
  • output_hidden_states=None
  • return_dict=None
  • training=False

In [None]:
model1 = load_model('models/shared_encoder_decoder02_.keras')

In [7]:
model2 = load_model('models/shared_encoder_decoder2.keras')

In [8]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [9]:
entity_labels_drink = np.load('data/processed/entity_labels_drink.npy')
entity_labels_pizza = np.load('data/processed/entity_labels_pizza.npy')
enitity_labels_second = np.load('data/processed/entity_labels_second.npy')

In [10]:
entities_id_drink = {i+1: str(e) for i, e in enumerate(entity_labels_drink)}
entities_id_drink[0] = 'O'
entities_id_drink

{1: 'B-SIZE',
 2: 'I-SIZE',
 3: 'B-VOLUME',
 4: 'I-VOLUME',
 5: 'B-NOT_DRINKTYPE',
 6: 'I-NOT_DRINKTYPE',
 7: 'B-NOT_NUMBER',
 8: 'I-NOT_NUMBER',
 9: 'B-NOT_CONTAINERTYPE',
 10: 'I-NOT_CONTAINERTYPE',
 11: 'B-CONTAINERTYPE',
 12: 'I-CONTAINERTYPE',
 13: 'B-NOT_VOLUME',
 14: 'I-NOT_VOLUME',
 15: 'B-NUMBER',
 16: 'I-NUMBER',
 17: 'B-DRINKTYPE',
 18: 'I-DRINKTYPE',
 19: 'B-NOT_SIZE',
 20: 'I-NOT_SIZE',
 0: 'O'}

In [11]:
entities_id_pizza = {i+1: str(e) for i, e in enumerate(entity_labels_pizza)}
entities_id_pizza[0] = 'O'
entities_id_pizza

{1: 'B-SIZE',
 2: 'I-SIZE',
 3: 'B-NOT_STYLE',
 4: 'I-NOT_STYLE',
 5: 'B-STYLE',
 6: 'I-STYLE',
 7: 'B-NOT_NUMBER',
 8: 'I-NOT_NUMBER',
 9: 'B-QUANTITY',
 10: 'I-QUANTITY',
 11: 'B-NOT_TOPPING',
 12: 'I-NOT_TOPPING',
 13: 'B-NUMBER',
 14: 'I-NUMBER',
 15: 'B-TOPPING',
 16: 'I-TOPPING',
 17: 'B-NOT_QUANTITY',
 18: 'I-NOT_QUANTITY',
 19: 'B-NOT_SIZE',
 20: 'I-NOT_SIZE',
 0: 'O'}

In [12]:
enitities_id_second = {i+1: str(e)
                       for i, e in enumerate(enitity_labels_second)}
enitities_id_second[0] = 'O'
enitities_id_second

{1: 'B-PIZZAORDER',
 2: 'I-PIZZAORDER',
 3: 'B-DRINKORDER',
 4: 'I-DRINKORDER',
 0: 'O'}

In [13]:
def get_TOP(words, predictions1, predictions2, predictions3):
    words = words.split()
    result = "(ORDER "
    i = 0
    not_flag = False
    c = min(len(words), len(predictions1), len(
        predictions2), len(predictions3))
    while i < c:
        if i < c and predictions3[i].startswith('B-'):
            tag = predictions3[i][2:]
            result += f"({tag} "
            while i < c and (predictions3[i].startswith('B-') or predictions3[i].startswith('I-')):
                if predictions1[i].startswith('B-'):
                    if (predictions1[i].startswith('B-NOT_')):
                        result += f"(NOT ({predictions1[i][6:]} {words[i]} ) "
                        not_flag = True
                    else:
                        result += f"({predictions1[i][2:]} {words[i]} ) "
                elif predictions2[i].startswith('B-'):
                    # Handle multi-word drink types
                    if i+1 < c and predictions2[i+1].startswith('I-'):
                        result += f"({predictions2[i][2:]} {words[i]} {words[i+1]} ) "
                        i += 1
                    else:
                        result += f"({predictions2[i][2:]} {words[i]} ) "
                else:
                    result += f"{words[i]} "
                i += 1
                if i < c and predictions3[i] == 'O':
                    break
            if not_flag:
                result = result.rstrip() + " ) ) "
                not_flag = False
            else:
                result = result.rstrip() + " ) "
        else:
            result += f"{words[i]} "
            i += 1
    result = result.rstrip() + " )"
    return result

In [15]:
def get_prediction_entities1(processed_sentences, predictions, entities_id):
    pred_entities = []
    for i in range(predictions.shape[0]):
        sen = [entities_id[np.argmax(predictions[i][j])]
               for j in range(predictions.shape[1])]
        pred_entities.append(sen[1:len(processed_sentences)+1])
    return pred_entities

def get_prediction(sentence,model1,model2):
    print(sentence)
    sentence = sentence.split()
    encoded_input = tokenizer(sentence,
                              truncation=True,
                              padding="max_length",
                              max_length=30,
                              is_split_into_words=True)

    input_ids = np.array([encoded_input["input_ids"]])
    raw_predictions1 = np.array(model1.predict(input_ids))
    raw_predictions2 = np.array(model2.predict(input_ids))
    

    
    
    entities_pizza = np.array(get_prediction_entities1(
        sentence, raw_predictions1[0], entities_id_pizza), dtype=object)
    entities_drink = np.array(get_prediction_entities1(
        sentence, raw_predictions1[1], entities_id_drink), dtype=object)
    entities_second = np.array(get_prediction_entities1(sentence, raw_predictions2, enitities_id_second),dtype=object)
    return sentence, entities_pizza[0], entities_drink[0], entities_second[0]


# ws,p1,p2,p3=get_prediction('I want to order one pizza with mashroom', model1, model2)
# get_TOP(ws,p1,p2,p3)

In [65]:
df.head()

Unnamed: 0,id,order
0,0,let me prefer a extra large sauteed spinach an...
1,1,could you give me a exta large pizza with roas...
2,2,can you handle this order a pizza make it etra...
3,3,let me try 5 lnch pizzas with garlic onions an...
4,4,i need you to order me a pie in med size along...


In [39]:
# results = []

# for order in df['order']:
#     ws, p1, p2, p3 = get_prediction(order, model1, model2)
#     top_result = get_TOP(ws, p1, p2, p3)
#     results.append(top_result)

# df['TOP'] = results
# df.head()

In [56]:
from spellchecker import SpellChecker

spell = SpellChecker()


def spell_check(sentence):
    words = sentence.split()
    corrected_words = [spell.correction(word) or word for word in words]
    return ' '.join(corrected_words)

In [58]:
df['order'] = df['order'].apply(lambda x: spell_check(x))

In [59]:
input_ids = []

for sentence in df['order']:
    encoding = tokenizer(sentence,
                            truncation=True,
                            padding='max_length',
                            max_length=30,
                            return_tensors="tf")  # Use "tf" to return TensorFlow tensors

    input_ids.append(encoding["input_ids"])  # TensorFlow tensor
    
input_ids = tf.concat(input_ids, axis=0)

In [60]:
predictions1 = np.array(model1.predict(input_ids))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step


In [61]:
predictions2 = np.array(model2.predict(input_ids))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step


In [62]:
def get_prediction_entities(processed_sentences, predictions, entities_id):
    pred_entities = []
    for i in range(predictions.shape[0]):
        sen = [entities_id[np.argmax(predictions[i][j])]
               for j in range(predictions.shape[1])]
        pred_entities.append(sen[1:len(processed_sentences[i].split())+1])
    return pred_entities

In [63]:
entities_pizza = np.array(get_prediction_entities(
    df['order'], predictions1[0], entities_id_pizza), dtype=object)
entities_drink = np.array(get_prediction_entities(
    df['order'], predictions1[1], entities_id_drink), dtype=object)
entities_second = np.array(get_prediction_entities(df['order'], predictions2, enitities_id_second), dtype=object)

In [64]:
def process_second_entities(entities_second):
    current_entity = 'O'
    processed_entities = []
    for entity in entities_second:
        if entity == 'O' and current_entity == 'O':
            processed_entities.append('O')
        elif entity == 'O' and current_entity != 'O':
            if current_entity.startswith('B-'):
                current_entity = 'I-'+current_entity[2:]
            processed_entities.append(current_entity)
        elif entity.startswith('I-') and current_entity == 'O':
            current_entity = 'B-'+entity[2:]
            processed_entities.append(current_entity)
        else:
            current_entity = entity
            processed_entities.append(current_entity)

    return processed_entities

In [65]:
processed_entities_second = [
    process_second_entities(es) for es in entities_second]

In [66]:
df['order'][0]

'let me prefer a extra large sauted spinach and tomatoes pizza without any sausage'

In [82]:
spell_check('gtillid')

'grilled'

In [72]:
df.head(10)

Unnamed: 0,id,order
0,0,let me prefer a extra large sauted spinach and...
1,1,could you give me a extra large pizza with roa...
2,2,can you handle this order a pizza make it extr...
3,3,let me try 5 lunch pizzas with garlic onions a...
4,4,i need you to order me a pie in me size along ...
5,5,i'd ry a large-size pie with gilled chicken an...
6,6,let's put i for a me pet sauce and ham pizza w...
7,7,fetch me three extra large chard pizza and thr...
8,8,i'm go for a total of one me shrimp pizza and ...
9,9,i prefer to have eleven pizzas with a lot f me...


In [83]:
tops = [get_TOP(df['order'][i], entities_pizza[i], entities_drink[i],
                processed_entities_second[i]) for i in range(len(df['order']))]

In [84]:
tops

['(ORDER let me (PIZZAORDER prefer (NUMBER a ) (SIZE extra ) large sauted spinach (TOPPING and ) tomatoes (TOPPING pizza ) without any sausage ) )',
 '(ORDER (PIZZAORDER could you give me (NUMBER a ) (SIZE extra ) large pizza with (TOPPING roasted ) (TOPPING red ) peppers and (TOPPING more ) (TOPPING cheese ) but no (NOT (TOPPING sausage ) ) ) )',
 "(ORDER (PIZZAORDER can you handle this order (NUMBER a ) pizza make it (SIZE extra ) large please i want (STYLE new ) york style (TOPPING tomato ) (TOPPING sauce ) but absolutely no (NOT (TOPPING onions ) that's important that's all ) ) )",
 '(ORDER let me (PIZZAORDER try (NUMBER 5 ) (SIZE lunch ) pizzas with (TOPPING garlic ) (TOPPING onions ) and (TOPPING pesto ) ) )',
 '(ORDER i need (PIZZAORDER you to order me (NUMBER a ) pie in me (SIZE size ) along with (TOPPING pesto ) (TOPPING pan ) and (TOPPING ham ) ) )',
 "(ORDER i'd ry (PIZZAORDER a large-size pie (SIZE with ) gilled chicken and please add (TOPPING some ) a tiny bit of cheese (S

In [88]:
dfff = pd.DataFrame({'order': df['id'], 'output': tops})
dfff.head()

Unnamed: 0,order,output
0,0,(ORDER let me (PIZZAORDER prefer (NUMBER a ) (...
1,1,(ORDER (PIZZAORDER could you give me (NUMBER a...
2,2,(ORDER (PIZZAORDER can you handle this order (...
3,3,(ORDER let me (PIZZAORDER try (NUMBER 5 ) (SIZ...
4,4,(ORDER i need (PIZZAORDER you to order me (NUM...


In [89]:
dfff.to_csv('team_6_output3.csv', index=False)

In [85]:
dfff=pd.DataFrame()
dfff['id'] = df['id']
dfff['tops'] = tops
dfff['order'] = df['order']

In [30]:
dfff.to_csv('team_6_output3.csv',index=False)

In [35]:
dfff.head(20)

dfff.to_csv('team_6_output_dummy2.csv', index=False)