In [1]:
menu_data = [
    {
        "itemId": "008341a8-e73c-4400-9143-4521f9e1befd",
        "itemName": "Rava Kichadi",
        "description": "A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection",
        "subCategory": "South Indian Favorites",
        "specialInstructions": "MAKE IT LITTLE SPICY",
        "allergicInfo": "NUTS and FISH."
    },
    {
        "itemId": "01cb3741-3755-4c98-a5ea-0262d1948d59",
        "itemName": "South Indian Thali",
        "description": "Steamed rice, sambar, rasam, kootu, poriyal, kuzhambu, yogurt, appalam, chapati, kurma, pickle & Sweet",
        "subCategory": "Thali's",
        "specialInstructions": "This is spl instruction",
        "allergicInfo": "The item has allergic content."
    }
]


In [2]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [3]:
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import nltk

# Download NLTK tokenization data
nltk.download('punkt')

# Preprocess menu data
def preprocess_menu(menu_data):
    return [
        f"{item['itemName']} {item['description']} {item['subCategory']}" for item in menu_data
    ]

# Tokenize the menu data
tokenized_menu = [word_tokenize(doc.lower()) for doc in preprocess_menu(menu_data)]

# Create BM25 object
bm25 = BM25Okapi(tokenized_menu)

# Function to retrieve relevant menu items
def bm25_retrieve(query, top_n=3):
    tokenized_query = word_tokenize(query.lower())
    scores = bm25.get_scores(tokenized_query)
    top_n_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_n]
    retrieved_items = [menu_data[i] for i in top_n_indices]
    return retrieved_items


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [4]:
query = "spicy South Indian food"
retrieved_items = bm25_retrieve(query)

for item in retrieved_items:
    print(f"Item: {item['itemName']}, Description: {item['description']}")


Item: South Indian Thali, Description: Steamed rice, sambar, rasam, kootu, poriyal, kuzhambu, yogurt, appalam, chapati, kurma, pickle & Sweet
Item: Rava Kichadi, Description: A healthy breakfast option made with semolina and vegetables lightly tempered with spices cooked to perfection


In [5]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load the T5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Generate a response using T5
def generate_response(context, query):
    input_text = f"question: {query} context: {context}"

    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    outputs = model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [6]:
def conversational_bot(query):
    # Step 1: Retrieve relevant menu items using BM25
    retrieved_items = bm25_retrieve(query)

    # Step 2: Combine the retrieved descriptions as context for T5
    context = " ".join([f"{item['itemName']}: {item['description']}" for item in retrieved_items])

    # Step 3: Generate response using T5 based on the context
    response = generate_response(context, query)

    return response

In [7]:
query = "Tell me about spicy South Indian dishes."
response = conversational_bot(query)
print(response)

Steamed rice, sambar, rasam, kootu, poriyal
