In [1]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rank_bm25 import BM25Okapi
import torch

# Load T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Load menu data
with open('mockMenu 1.json', 'r') as f:
    menu_data = json.load(f)['menu']

# Load intent data
with open('intent.json', 'r') as f:
    intent_data = json.load(f)['intents']

# Step 1: Prepare the corpus for BM25 (combine 'itemName' and 'description' fields)
corpus = [
    (item['itemName'] + " " + (item['description'] if item['description'] else ""))
    for item in menu_data
]

# Initialize BM25 with the corpus
bm25 = BM25Okapi([doc.split() for doc in corpus])

# Step 2: Intent Recognition using T5
def classify_intent_with_t5(query):
    input_text = f"classify intent: {query}"
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=20, num_beams=2)
    intent = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return intent

# Step 3: Document Retrieval with BM25
def retrieve_document(query):
    # Exact match: first check if the menu item name directly matches the query
    for item in menu_data:
        if item['itemName'].lower() in query.lower():
            return item
    
    # If no exact match, use BM25 for ranking
    tokenized_query = query.split()
    doc_scores = bm25.get_scores(tokenized_query)
    top_doc_index = torch.argmax(torch.tensor(doc_scores)).item()
    retrieved_item = menu_data[top_doc_index]
    return retrieved_item

# Step 4: Find Minimum Prep Time Dish
def find_min_prep_time_dish():
    min_item = min(menu_data, key=lambda x: int(x['prepTimeInMins']))
    return min_item

# Step 5: Handle Query Based on Intent and Menu Data
def handle_query(query):
    # First classify the intent of the query
    intent = classify_intent_with_t5(query)
    
    # Match the intent with known intents in the intent data
    matching_intent = None
    for intent_info in intent_data:
        if any(example.lower() in query.lower() for example in intent_info['examples']):
            matching_intent = intent_info['intent']
            break
    
    # Generate response based on classified intent
    if matching_intent == "MenuItemDetails":
        retrieved_item = retrieve_document(query)
        response = f"{retrieved_item['itemName']} is described as: {retrieved_item.get('description', 'No description available')}."
    elif matching_intent == "PricingInquiry":
        retrieved_item = retrieve_document(query)
        response = f"The price of {retrieved_item['itemName']} is ${retrieved_item['price']}."
    elif matching_intent == "AllergenInquiry":
        retrieved_item = retrieve_document(query)
        allergens = retrieved_item.get('allergicInfo', '').upper()
        response = f"Allergens for {retrieved_item['itemName']}: {allergens}" if allergens else f"No allergen information for {retrieved_item['itemName']}."
    elif matching_intent == "WaitTimeInquiry":
        min_dish = find_min_prep_time_dish()
        response = f"The dish with the least prep time is {min_dish['itemName']} with a prep time of {min_dish['prepTimeInMins']} minutes."
    else:
        # Fallback to a general response using T5 generation
        retrieved_item = retrieve_document(query)
        input_text = f"generate response: {query} Context: {retrieved_item.get('description', 'No description available')}."
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        outputs = model.generate(inputs, max_length=50, num_beams=5, early_stopping=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return response

# Example usage
query = "Tell me about the Rava Kichadi"
response = handle_query(query)
print(response)


  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Tell me about the Rava Kichadi Context: A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection.
