In [3]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rank_bm25 import BM25Okapi
import torch
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Bidirectional
from tensorflow.keras.optimizers import Adam
import numpy as np
import random
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import re

# Preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text.strip()

# Load intent configuration
with open('/Users/yash/MagilHub/Project 1/OfflineChatBot/OfflineChatBotCode/intent1.json', 'r') as f:
    intents_config = json.load(f)['intents']

# Load menu data
with open('/Users/yash/MagilHub/Project 1/OfflineChatBot/OfflineChatBotCode/mockMenu 1.json', 'r') as f:
    menu_data = json.load(f)['menu']

# Prepare the corpus for BM25 by combining 'itemName' and 'description' fields
corpus = [
    (item['itemName'] + " " + (item.get('description', '') if item.get('description') else ""))
    for item in menu_data
]
bm25 = BM25Okapi([doc.split() for doc in corpus])

# Data preprocessing for intent classification
intents = []
unique_intents = []
text_input = []
response_for_intent = {}
query_for_intent = {}

for intent in intents_config:
    intent_name = intent['intent']

    if intent_name not in unique_intents:
        unique_intents.append(intent_name)

    for text in intent['text']:
        preprocessed_text = preprocess_text(text)
        text_input.append(preprocessed_text)
        intents.append(intent_name)

    if intent_name not in response_for_intent:
        response_for_intent[intent_name] = []
    for response in intent['responses']:
        response_for_intent[intent_name].append(response)

    if intent_name not in query_for_intent:
        query_for_intent[intent_name] = intent.get('query', '')

tokenizer = Tokenizer(filters='', oov_token='<unk>')
tokenizer.fit_on_texts(text_input)
sequences = tokenizer.texts_to_sequences(text_input)
padded_sequences = pad_sequences(sequences, padding='pre')

intent_to_index = {}
categorical_target = []
index = 0

for intent in intents:
    if intent not in intent_to_index:
        intent_to_index[intent] = index
        index += 1
    categorical_target.append(intent_to_index[intent])

num_classes = len(intent_to_index)
index_to_intent = {index: intent for intent, index in intent_to_index.items()}

categorical_vec = tf.keras.utils.to_categorical(categorical_target, num_classes=num_classes)
categorical_vec = categorical_vec.astype('int32')

# Model parameters
epochs = 100
embed_dim = 300
lstm_num = 50
output_dim = categorical_vec.shape[1]

# Define model
model = Sequential([
    Embedding(len(tokenizer.word_index) + 1, embed_dim),
    Bidirectional(LSTM(lstm_num, dropout=0.1)),
    Dense(lstm_num, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    Dense(output_dim, activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(padded_sequences, categorical_vec, epochs=epochs, verbose=1)

# Predict intent with FuzzyWuzzy integration
def predict_intent(sentence, threshold=90):
    preprocessed_sentence = preprocess_text(sentence)
    # First, try to predict intent using the trained model
    sent_tokens = []
    words = preprocessed_sentence.split()
    for word in words:
        if word in tokenizer.word_index:
            sent_tokens.append(tokenizer.word_index[word])
        else:
            sent_tokens.append(tokenizer.word_index.get('<unk>', 0))
    sent_tokens = tf.expand_dims(sent_tokens, 0)
    pred = model(sent_tokens)
    pred_class = np.argmax(pred.numpy(), axis=1)
    intent_name = index_to_intent[pred_class[0]]
    
    # Now, use FuzzyWuzzy to verify or adjust the intent
    # Gather all possible intent phrases
    intent_phrases = {}
    for intent in intents_config:
        intent_phrases[intent['intent']] = [preprocess_text(text) for text in intent['text']]
    
    # Find the best matching intent based on FuzzyWuzzy
    best_intent = None
    highest_score = 0
    for intent, phrases in intent_phrases.items():
        for phrase in phrases:
            score = fuzz.ratio(preprocessed_sentence, phrase)
            if score > highest_score:
                highest_score = score
                best_intent = intent
    
    # If the highest fuzzy score is above the threshold, use it
    if highest_score >= threshold:
        intent_name = best_intent
    
    return intent_name

# BM25 Document Retrieval with FuzzyWuzzy integration
def retrieve_document(query, top_n=1):
    preprocessed_query = preprocess_text(query)
    # Use BM25 to get initial scores
    tokenized_query = preprocessed_query.split()
    doc_scores = bm25.get_scores(tokenized_query)
    
    # Find top N documents based on BM25
    top_indices = torch.topk(torch.tensor(doc_scores), k=top_n).indices.tolist()
    retrieved_items = [menu_data[idx] for idx in top_indices]
    
    # If BM25 confidence is low, use FuzzyWuzzy to find better matches
    if max(doc_scores) < 1:  # Threshold can be adjusted
        # Create a list of all item names
        item_names = [preprocess_text(item['itemName']) for item in menu_data]
        
        # Find the best match using FuzzyWuzzy
        best_match = process.extractOne(preprocessed_query, item_names, scorer=fuzz.token_set_ratio)
        if best_match and best_match[1] >= 80:
            # Retrieve the item with the best match
            matched_item = next((item for item in menu_data if preprocess_text(item['itemName']) == best_match[0]), None)
            if matched_item:
                retrieved_items = [matched_item]
    
    return retrieved_items[0] if retrieved_items else None

# Function Definitions (unchanged)
def get_spicy_dishes_for_fever():
    return [item['itemName'] for item in menu_data if 'spicy' in (item.get('description', '') or '').lower()]

def get_kids_friendly_dishes():
    return [item['itemName'] for item in menu_data if item.get('kidsFriendly') == True]

def get_vegan_dishes():
    vegan_dishes = []
    for item in menu_data:
        if 'itemFilter' in item:
            for filter_item in item['itemFilter']:
                if filter_item['name'].lower() == 'vegan':
                    vegan_dishes.append(item['itemName'])
                    break
    return vegan_dishes

def get_nut_free_dishes():
    return [item['itemName'] for item in menu_data if 'nuts' not in f"{item.get('description', '')} {item.get('allergicInfo', '')}".lower()]

def get_fish_free_dishes():
    return [item['itemName'] for item in menu_data if 'fish' not in f"{item.get('description', '')} {item.get('allergicInfo', '')}".lower()]

def find_min_prep_time_dish_by_subcategory(subcategory):
    filtered_items = [
        item for item in menu_data 
        if item.get('subCategory') and item.get('subCategory', '').lower() == subcategory.lower()
    ]
    if filtered_items:
        return min(filtered_items, key=lambda x: int(x['prepTimeInMins']))['itemName']
    return None

def retrieve_dish_description(query):
    item = retrieve_document(query)
    return item['description'] if item else 'Description not available.'

def retrieve_dish_allergic_info(query):
    item = retrieve_document(query)
    return item['allergicInfo'] if item else 'Allergic info not available.'

def retrieve_dish_price(query):
    item = retrieve_document(query)
    return item['price'] if item else 'Price not available.'

# Modified handle_intent to return natural language responses
def handle_intent(intent, query):
    switcher = {
        "GetSpicyDishesForFever": get_spicy_dishes_for_fever,
        "GetKidsFriendlyDishes": get_kids_friendly_dishes,
        "GetVeganDishes": get_vegan_dishes,
        "GetNutFreeDishes": get_nut_free_dishes,
        "GetFishFreeDishes": get_fish_free_dishes,
        "FindDishWithLeastPrepTime": find_min_prep_time_dish_by_subcategory,
        "RetrieveDishDescription": retrieve_dish_description,
        "RetrieveDishAllergicInfo": retrieve_dish_allergic_info,
        "RetrieveDishPrice": retrieve_dish_price,
    }

    func = switcher.get(intent)
    if func:
        # Handle cases where subcategory is needed
        if intent == "FindDishWithLeastPrepTime":
            subcategory = query.split('for')[-1].strip() if 'for' in query else None
            if subcategory:
                dish = func(subcategory)
                if dish:
                    return f"The dish in the {subcategory} category that takes the least time to prepare is {dish}.", intent
                else:
                    return f"Sorry, I couldn't find any dish in the {subcategory} category.", intent
            else:
                return "Please specify a subcategory, so I can help you find the dish with the least preparation time.", intent
        # Pass the query for functions that need it (like RetrieveDishPrice)
        elif intent in ["RetrieveDishDescription", "RetrieveDishAllergicInfo", "RetrieveDishPrice"]:
            result = func(query)
            if intent == "RetrieveDishDescription":
                return f"The description of the dish is: {result}.", intent
            elif intent == "RetrieveDishAllergicInfo":
                return f"The allergic information for the dish is: {result}.", intent
            elif intent == "RetrieveDishPrice":
                return f"The price of the dish is {result}.", intent
        # For other functions like GetNutFreeDishes and GetVeganDishes
        else:
            items = func()
            if isinstance(items, list) and items:
                item_list = ', '.join(items)
                return f"Here are some options: {item_list}.", intent
            else:
                return "Sorry, I couldn't find any relevant items.", intent
    else:
        return "I'm not sure how to help with that.", intent

# Modify the response generation to be more conversational
def response(sentence):
    intent_name = predict_intent(sentence)
    intent_config = next((i for i in intents_config if i['intent'] == intent_name), None)

    if intent_config and 'query' in intent_config:
        result, intent = execute_query(intent_name, sentence)
        return f"I think you're asking about {intent_name}. {result}", intent_name
    else:
        preprocessed_sentence = preprocess_text(sentence)
        if 'description' in preprocessed_sentence:
            result = retrieve_dish_description(preprocessed_sentence)
            return f"Let me tell you about this dish. {result}", intent_name
        elif 'price' in preprocessed_sentence:
            result = retrieve_dish_price(preprocessed_sentence)
            return f"You might be wondering about the cost. The price is {result}.", intent_name
        else:
            # Handle specific intent cases
            result, intent = handle_intent(intent_name, sentence)
            if result:
                return f"It seems like you're asking about {intent_name}. {result}", intent_name
            else:
                # Use FuzzyWuzzy to suggest possible intents
                possible_intents = [intent['intent'] for intent in intents_config]
                best_match = process.extractOne(sentence, possible_intents, scorer=fuzz.partial_ratio)
                if best_match and best_match[1] >= 60:
                    suggested_intent = best_match[0]
                    return f"I'm not sure I understand. Did you mean {suggested_intent}?", suggested_intent
                else:
                    return "I'm not sure I understand. Could you clarify?", intent_name

# Execute query for specific intent
def execute_query(intent_name, query):
    if intent_name in query_for_intent:
        query_code = query_for_intent[intent_name]
        # Process the query_code directly if it's valid
        return handle_intent(intent_name, query)
    return "It seems like I don't have an answer for that right now.", intent_name

# Test the system with user input
print("Note: Enter 'quit' to exit the conversation.")
while True:
    query = input('You: ')
    if query.lower() == 'quit':
        print("Goodbye! If you need help again, just ask.")
        break
    bot_response, intent_type = response(query)
    print(f'Bot: {bot_response} -- Intent: {intent_type}')
    print()


Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.1678 - loss: 2.1901
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2020 - loss: 2.1678 
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2104 - loss: 2.1554 
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2277 - loss: 2.1223 
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1736 - loss: 2.1038 
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2859 - loss: 2.0669 
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2742 - loss: 2.0309 
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3184 - loss: 2.0000 
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37