In [1]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rank_bm25 import BM25Okapi
import torch

# Load T5 model and tokenizer
model_name = "t5-small"  # You can use 't5-base' or 't5-large' if needed
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Example menu JSON data (use your full menu here)
menu_json = '''
{
    "menu": [
        {
            "itemId": "008341a8-e73c-4400-9143-4521f9e1befd",
            "itemName": "Rava Kichadi",
            "description": "A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection",
            "price": 10.49,
            "prepTimeInMins": "75",
            "allergicInfo": "NUTS and FISH."
        },
        {
            "itemId": "01cb3741-3755-4c98-a5ea-0262d1948d59",
            "itemName": "South Indian Thali",
            "description": "Steamed rice, sambar, rasam, kootu, poriyal, kuzhambu, yogurt, appalam, chapati, kurma, pickle & Sweet",
            "price": 19,
            "prepTimeInMins": "15",
            "allergicInfo": "The item has an allergic content."
        }
    ]
}
'''

# Load menu data
menu_data = json.loads(menu_json)
corpus = [item['description'] for item in menu_data['menu']]

# Setup BM25 for document retrieval
bm25 = BM25Okapi([doc.split() for doc in corpus])

# Example user query
input_json = '''
{
    "query": "Does Rava Kichadi have nuts?"
}
'''

# Parse input JSON
input_data = json.loads(input_json)
query = input_data['query']

### Step 1: Intent Recognition using T5 ###

def classify_intent_with_t5(query):
    input_text = f"classify intent: {query}"
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=10, num_beams=2)
    intent = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return intent

### Step 2: Document Retrieval using BM25 ###

def retrieve_document(query):
    tokenized_query = query.split()
    doc_scores = bm25.get_scores(tokenized_query)
    top_doc_index = torch.argmax(torch.tensor(doc_scores)).item()
    retrieved_item = menu_data['menu'][top_doc_index]
    return retrieved_item

def generate_response_with_t5(query, retrieved_item, intent):
    # Convert allergen information to uppercase for consistent comparison
    allergens = retrieved_item['allergicInfo'].upper()
    
    if 'allergy' in intent.lower() or 'nuts' in query.lower():
        # Check if the item contains nuts in its allergic information
        if 'NUTS' in allergens:
            response = f"Yes, {retrieved_item['itemName']} contains nuts."
        else:
            response = f"No, {retrieved_item['itemName']} does not contain nuts."
    elif 'price' in intent.lower():
        response = f"The price of {retrieved_item['itemName']} is ${retrieved_item['price']}."
    elif 'description' in intent.lower():
        response = f"{retrieved_item['itemName']} is described as: {retrieved_item['description']}."
    else:
        # Generate a general response based on the description if the intent is not recognized
        input_text = f"generate response: {query} Context: {retrieved_item['description']}."
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        outputs = model.generate(inputs, max_length=50, num_beams=5, early_stopping=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response


### Main Flow ###

# Step 1: Classify the intent using T5
intent = classify_intent_with_t5(query)

# Step 2: Retrieve the relevant item using BM25
retrieved_item = retrieve_document(query)

# Step 3: Generate the response based on the intent and the retrieved document
response = generate_response_with_t5(query, retrieved_item, intent)

# Step 4: Output the result in JSON format
output_json = {
    "intent": intent,
    "response": response,
    # "retrieved_item": {
    #     "itemName": retrieved_item['itemName'],
    #     "description": retrieved_item['description'],
    #     "price": retrieved_item['price'],
    #     "prepTimeInMins": retrieved_item['prepTimeInMins'],
    #     "allergicInfo": retrieved_item['allergicInfo']
    # }
}

# Print the output in JSON format
output = json.dumps(output_json, indent=4)
print(output)

  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


{
    "intent": "Ist Rava Kichadi",
    "response": "Yes, Rava Kichadi contains nuts."
}


In [6]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rank_bm25 import BM25Okapi
import torch

# Load T5 model and tokenizer
model_name = "t5-small"  # You can use 't5-base' or 't5-large' if needed
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Example menu JSON data
menu_json = '''
{
    "menu": [
        {
            "itemId": "008341a8-e73c-4400-9143-4521f9e1befd",
            "itemName": "Rava Kichadi",
            "description": "A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection",
            "price": 10.49,
            "prepTimeInMins": "75",
            "allergicInfo": "NUTS and FISH.",
            "subCategory": "South Indian Favorites"
        },
        {
            "itemId": "01cb3741-3755-4c98-a5ea-0262d1948d59",
            "itemName": "South Indian Thali",
            "description": "Steamed rice, sambar, rasam, kootu, poriyal, kuzhambu, yogurt, appalam, chapati, kurma, pickle & Sweet",
            "price": 19,
            "prepTimeInMins": "15",
            "allergicInfo": "The item has an allergic content.",
            "subCategory": "Thali's"
        }
    ]
}
'''

# Load menu data
menu_data = json.loads(menu_json)
corpus = [item['description'] for item in menu_data['menu']]

# Setup BM25 for document retrieval
bm25 = BM25Okapi([doc.split() for doc in corpus])

### Step 1: Intent Recognition using T5 ###

def classify_intent_with_t5(query):
    input_text = f"classify intent: {query}"
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=20, num_beams=2)
    intent = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return intent

### Step 2: Document Retrieval using BM25 ###

def retrieve_document(query):
    tokenized_query = query.split()
    doc_scores = bm25.get_scores(tokenized_query)
    top_doc_index = torch.argmax(torch.tensor(doc_scores)).item()
    retrieved_item = menu_data['menu'][top_doc_index]
    return retrieved_item

### Step 3: Find Minimum Prep Time Dish ###

def find_min_prep_time_dish():
    # Find the item with the minimum prep time
    min_item = min(menu_data['menu'], key=lambda x: int(x['prepTimeInMins']))
    return min_item

def generate_response_with_t5(query):
    # Determine intent based on the query
    if 'description' in query.lower():
        # Retrieve the correct item based on the query
        retrieved_item = retrieve_document(query)
        response = f"{retrieved_item['itemName']} is described as: {retrieved_item['description']}."
    elif 'price' in query.lower():
        retrieved_item = retrieve_document(query)
        response = f"The price of {retrieved_item['itemName']} is ${retrieved_item['price']}."
    elif 'allergy' in query.lower() or 'nuts' in query.lower():
        retrieved_item = retrieve_document(query)
        allergens = retrieved_item['allergicInfo'].upper()
        if 'NUTS' in allergens:
            response = f"Yes, {retrieved_item['itemName']} contains nuts."
        else:
            response = f"No, {retrieved_item['itemName']} does not contain nuts."
    elif 'least prep time' in query.lower() or 'min prep time' in query.lower():
        min_dish = find_min_prep_time_dish()
        response = f"The dish with the least prep time is {min_dish['itemName']} with a prep time of {min_dish['prepTimeInMins']} minutes."
    else:
        # Fallback to a general response
        retrieved_item = retrieve_document(query)
        input_text = f"generate response: {query} Context: {retrieved_item['description']}."
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        outputs = model.generate(inputs, max_length=50, num_beams=5, early_stopping=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

### Main Flow ###

# Example user query
input_json = '''
{
    "query": "Does Rava Kichadi have nuts?"
}
'''

# Parse input JSON
input_data = json.loads(input_json)
query = input_data['query']

# Step 1: Classify the intent using T5
intent = classify_intent_with_t5(query)

# Generate the response based on the query
response = generate_response_with_t5(query)

# Output the result in JSON format
output_json = {
    'intent': intent,
    "response": response
}

# Print the output in JSON format
output = json.dumps(output_json, indent=4)
print(output)

{
    "intent": "Ist Rava Kichadi nut?",
    "response": "Yes, Rava Kichadi contains nuts."
}


In [3]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rank_bm25 import BM25Okapi
import torch

# Load T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Example menu JSON data
menu_json = '''
{
    "menu": [
        {
            "itemId": "008341a8-e73c-4400-9143-4521f9e1befd",
            "itemName": "Rava Kichadi",
            "description": "A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection",
            "price": 10.49,
            "prepTimeInMins": "75",
            "allergicInfo": "NUTS and FISH.",
            "subCategory": "South Indian Favorites"
        },
        {
            "itemId": "01cb3741-3755-4c98-a5ea-0262d1948d59",
            "itemName": "South Indian Thali",
            "description": "Steamed rice, sambar, rasam, kootu, poriyal, kuzhambu, yogurt, appalam, chapati, kurma, pickle & Sweet",
            "price": 19,
            "prepTimeInMins": "15",
            "allergicInfo": "The item has an allergic content.",
            "subCategory": "Thali's"
        }
    ]
}
'''

# Load menu data
menu_data = json.loads(menu_json)
# Combine itemName and description for BM25 corpus
corpus = [item['itemName'] + " " + item['description'] for item in menu_data['menu']]

# Setup BM25 for document retrieval
bm25 = BM25Okapi([doc.split() for doc in corpus])

### Step 1: Intent Recognition using T5 ###

def classify_intent_with_t5(query):
    input_text = f"classify intent: {query}"
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=20, num_beams=2)
    intent = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return intent

### Step 2: Improved Document Retrieval with BM25 and Exact Match ###

def retrieve_document(query):
    # First check if any menu item name directly matches the query
    for item in menu_data['menu']:
        if item['itemName'].lower() in query.lower():
            return item
    
    # If no exact match, use BM25 for ranking based on itemName + description
    tokenized_query = query.split()
    doc_scores = bm25.get_scores(tokenized_query)
    top_doc_index = torch.argmax(torch.tensor(doc_scores)).item()
    retrieved_item = menu_data['menu'][top_doc_index]
    return retrieved_item

### Step 3: Find Minimum Prep Time Dish ###

def find_min_prep_time_dish():
    min_item = min(menu_data['menu'], key=lambda x: int(x['prepTimeInMins']))
    return min_item

def generate_response_with_t5(query):
    # Determine intent based on the query
    if 'description' in query.lower():
        # Retrieve the correct item based on the query
        retrieved_item = retrieve_document(query)
        response = f"{retrieved_item['itemName']} is described as: {retrieved_item['description']}."
    elif 'price' in query.lower():
        retrieved_item = retrieve_document(query)
        response = f"The price of {retrieved_item['itemName']} is ${retrieved_item['price']}."
    elif 'allergy' in query.lower() or 'nuts' in query.lower():
        retrieved_item = retrieve_document(query)
        allergens = retrieved_item['allergicInfo'].upper()
        if 'NUTS' in allergens:
            response = f"Yes, {retrieved_item['itemName']} contains nuts."
        else:
            response = f"No, {retrieved_item['itemName']} does not contain nuts."
    elif 'least prep time' in query.lower() or 'min prep time' in query.lower():
        min_dish = find_min_prep_time_dish()
        response = f"The dish with the least prep time is {min_dish['itemName']} with a prep time of {min_dish['prepTimeInMins']} minutes."
    else:
        # Fallback to a general response
        retrieved_item = retrieve_document(query)
        input_text = f"generate response: {query} Context: {retrieved_item['description']}."
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        outputs = model.generate(inputs, max_length=50, num_beams=5, early_stopping=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

### Main Flow ###

# Example user query
input_json = '''
{
    "query": "least prep time?"
}
'''

# Parse input JSON
input_data = json.loads(input_json)
query = input_data['query']

# Step 1: Classify the intent using T5
intent = classify_intent_with_t5(query)

# Generate the response based on the query
response = generate_response_with_t5(query)

# Output the result in JSON format
output_json = {
    'intent': intent,
    "response": response
}

# Print the output in JSON format
output = json.dumps(output_json, indent=4)
print(output)

{
    "intent": "classify intent: least prep time?",
    "response": "The dish with the least prep time is South Indian Thali with a prep time of 15 minutes."
}


In [7]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rank_bm25 import BM25Okapi
import torch

# Load T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Load menu data from the JSON file
with open('mockMenu 1.json', 'r') as file:
    menu_data = json.load(file)

# Combine itemName and description for BM25 corpus, handling None values
corpus = [
    (item.get('itemName') or '') + " " + (item.get('description') or '')
    for item in menu_data['menu']
]

# Setup BM25 for document retrieval
bm25 = BM25Okapi([doc.split() for doc in corpus])

### Step 1: Intent Recognition using T5 ###

def classify_intent_with_t5(query):
    input_text = f"classify intent: {query}"
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=20, num_beams=2)
    intent = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return intent

### Step 2: Improved Document Retrieval with BM25 and Exact Match ###

def retrieve_document(query):
    # First check if any menu item name directly matches the query
    for item in menu_data['menu']:
        if item['itemName'].lower() in query.lower():
            return item
    
    # If no exact match, use BM25 for ranking based on itemName + description
    tokenized_query = query.split()
    doc_scores = bm25.get_scores(tokenized_query)
    top_doc_index = torch.argmax(torch.tensor(doc_scores)).item()
    retrieved_item = menu_data['menu'][top_doc_index]
    return retrieved_item

### Step 3: Find Minimum Prep Time Dish ###

def find_min_prep_time_dish():
    min_item = min(menu_data['menu'], key=lambda x: int(x['prepTimeInMins']))
    return min_item

def generate_response_with_t5(query):
    # Determine intent based on the query
    if 'description' in query.lower():
        # Retrieve the correct item based on the query
        retrieved_item = retrieve_document(query)
        response = f"{retrieved_item['itemName']} is described as: {retrieved_item['description']}."
    elif 'price' in query.lower():
        retrieved_item = retrieve_document(query)
        response = f"The price of {retrieved_item['itemName']} is ${retrieved_item['price']}."
    elif 'allergy' in query.lower() or 'nuts' in query.lower():
        retrieved_item = retrieve_document(query)
        allergens = retrieved_item['allergicInfo'].upper()
        if 'NUTS' in allergens:
            response = f"Yes, {retrieved_item['itemName']} contains nuts."
        else:
            response = f"No, {retrieved_item['itemName']} does not contain nuts."
    elif 'least prep time' in query.lower() or 'min prep time' in query.lower():
        min_dish = find_min_prep_time_dish()
        response = f"The dish with the least prep time is {min_dish['itemName']} with a prep time of {min_dish['prepTimeInMins']} minutes."
    else:
        # Fallback to a general response
        retrieved_item = retrieve_document(query)
        input_text = f"generate response: {query} Context: {retrieved_item['description']}."
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        outputs = model.generate(inputs, max_length=50, num_beams=5, early_stopping=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

### Main Flow ###

# Example user query
input_json = '''
{
    "query": "Fever Recommendations??"
}
'''

# Parse input JSON
input_data = json.loads(input_json)
query = input_data['query']

# Step 1: Classify the intent using T5
intent = classify_intent_with_t5(query)

# Generate the response based on the query
response = generate_response_with_t5(query)

# Output the result in JSON format
output_json = {
    'intent': intent,
    "response": response
}

# Print the output in JSON format
output = json.dumps(output_json, indent=4)
print(output)


{
    "intent": "- Classify intent intent: Fever Recommendations??",
    "response": "Fever Recommendations?? Context: A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection."
}
