In [1]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [6]:
from collections import defaultdict
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from rank_bm25 import BM25Okapi
import nltk
import re

# Download NLTK data for tokenization, stopwords, and lemmatization
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Sample menu data (same as before)
menu_data = [
    {
        "itemId": "1",
        "itemName": "Chicken Curry",
        "description": "Hot and spicy chicken dish",
        "specialInstructions": "Extra spicy",
        "allergicInfo": "Contains nuts",
        "price": "15"
    },
    {
        "itemId": "2",
        "itemName": "Mild Chicken Pasta",
        "description": "Creamy pasta with chicken",
        "specialInstructions": "No garlic",
        "allergicInfo": "Dairy",
        "price": "12"
    },
    {
        "itemId": "3",
        "itemName": "Spicy Pasta",
        "description": "Pasta with a spicy tomato sauce",
        "specialInstructions": "",
        "allergicInfo": "",
        "price": "10"
    },
    {
        "itemId": "4",
        "itemName": "Beef Stroganoff",
        "description": "Creamy beef with mushrooms and onions",
        "specialInstructions": "Less sauce",
        "allergicInfo": "Dairy, Mushrooms",
        "price": "18"
    },
    {
        "itemId": "5",
        "itemName": "Vegetarian Pizza",
        "description": "Wood-fired pizza topped with vegetables",
        "specialInstructions": "Extra cheese",
        "allergicInfo": "Gluten, Dairy",
        "price": "14"
    },
    {
        "itemId": "6",
        "itemName": "Grilled Salmon",
        "description": "Freshly grilled salmon with herbs",
        "specialInstructions": "No oil",
        "allergicInfo": "Fish",
        "price": "20"
    },
    {
        "itemId": "7",
        "itemName": "Caesar Salad",
        "description": "Crisp romaine lettuce with Caesar dressing",
        "specialInstructions": "No croutons",
        "allergicInfo": "Dairy, Gluten, Anchovies",
        "price": "8"
    },
    {
        "itemId": "8",
        "itemName": "Chocolate Lava Cake",
        "description": "Rich chocolate cake with molten center",
        "specialInstructions": "No whipped cream",
        "allergicInfo": "Dairy, Eggs",
        "price": "7"
    },
    {
        "itemId": "9",
        "itemName": "Lamb Biryani",
        "description": "Fragrant rice with tender lamb and spices",
        "specialInstructions": "No cilantro",
        "allergicInfo": "Contains nuts",
        "price": "17"
    },
    {
        "itemId": "10",
        "itemName": "Tofu Stir-Fry",
        "description": "Stir-fried tofu with mixed vegetables",
        "specialInstructions": "No soy sauce",
        "allergicInfo": "Soy",
        "price": "13"
    },
    {
        "itemId": "11",
        "itemName": "Garlic Bread",
        "description": "Crispy garlic bread slices",
        "specialInstructions": "No butter",
        "allergicInfo": "Gluten, Dairy",
        "price": "5"
    },
    {
        "itemId": "12",
        "itemName": "Fruit Salad",
        "description": "Fresh seasonal fruits",
        "specialInstructions": "No bananas",
        "allergicInfo": "",
        "price": "6"
    },
    {
        "itemId": "13",
        "itemName": "Butter Chicken",
        "description": "Chicken cooked in a creamy tomato sauce",
        "specialInstructions": "Extra gravy",
        "allergicInfo": "Dairy",
        "price": "16"
    },
    {
        "itemId": "14",
        "itemName": "Margherita Pizza",
        "description": "Classic pizza with mozzarella and basil",
        "specialInstructions": "Add olives",
        "allergicInfo": "Gluten, Dairy",
        "price": "12"
    },
    {
        "itemId": "15",
        "itemName": "Greek Salad",
        "description": "Salad with cucumbers, tomatoes, and feta cheese",
        "specialInstructions": "No onions",
        "allergicInfo": "Dairy",
        "price": "9"
    }
]

# Step 1: Tokenize and preprocess the document (menu items)
def tokenize_and_process(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
    return tokens

# Step 2: Preprocess menu data into a list of combined item details
def preprocess_menu(menu_data):
    return [
        f"{item['itemName']} {item['description']} {item.get('specialInstructions', '')} {item.get('allergicInfo', '')} {item.get('price', '')}"
        for item in menu_data
    ]

# Step 3: Build an inverted index
def build_inverted_index(menu_data):
    inverted_index = defaultdict(list)
    for doc_id, item in enumerate(menu_data):
        text = f"{item['itemName']} {item['description']} {item.get('specialInstructions', '')} {item.get('allergicInfo', '')}"
        tokens = tokenize_and_process(text)
        for token in set(tokens):  # Use set to avoid duplicate entries for the same word
            inverted_index[token].append(doc_id)
    return inverted_index

# Step 4: Retrieve relevant documents using inverted index
def retrieve_documents(query, inverted_index):
    query_tokens = tokenize_and_process(query)
    relevant_docs = set()

    # Collect documents that contain at least one query term
    for token in query_tokens:
        if token in inverted_index:
            relevant_docs.update(inverted_index[token])

    return list(relevant_docs)

# Step 5: Rank the retrieved documents using BM25
def rank_documents(query, menu_data, relevant_doc_ids):
    preprocessed_menu = preprocess_menu(menu_data)
    tokenized_menu = [tokenize_and_process(doc) for doc in preprocessed_menu]
    bm25 = BM25Okapi(tokenized_menu)
    tokenized_query = tokenize_and_process(query)
    scores = bm25.get_scores(tokenized_query)
    relevant_scores = [(doc_id, scores[doc_id]) for doc_id in relevant_doc_ids]
    ranked_docs = sorted(relevant_scores, key=lambda x: x[1], reverse=True)
    return ranked_docs

# Step 6: Build an inverted index from the ranked documents
def build_inverted_index_from_ranked_docs(ranked_docs, menu_data):
    inverted_index = defaultdict(list)
    for doc_id, score in ranked_docs:
        item = menu_data[doc_id]
        text = f"{item['itemName']} {item['description']} {item.get('specialInstructions', '')} {item.get('allergicInfo', '')}"
        tokens = tokenize_and_process(text)
        for token in set(tokens):
            inverted_index[token].append(doc_id)
    return inverted_index

# Step 7: Define and detect intent based on query patterns
def detect_intent(query):
    query = query.lower()

    # Basic intent detection patterns
    if re.search(r"\bprice\b", query):
        return "price"
    elif re.search(r"\ballergi(?:es|c|info)\b", query):
        return "allergic_info"
    elif re.search(r"\binstruction\b|\bpreparation\b", query):
        return "special_instructions"
    else:
        return "general"

# Step 8: Generate a response based on detected intent and retrieved documents
def generate_response(query, ranked_docs, menu_data):
    intent = detect_intent(query)

    if not ranked_docs:
        return "Sorry, we couldn't find any matching items for your query."

    response = ""
    for doc_id, score in ranked_docs:
        item = menu_data[doc_id]

        if intent == "price":
            response += f"Item: {item['itemName']} | Price: ${item['price']}\n"
        elif intent == "allergic_info":
            response += f"Item: {item['itemName']} | Allergic Info: {item.get('allergicInfo', 'No specific allergic info')}\n"
        elif intent == "special_instructions":
            response += f"Item: {item['itemName']} | Special Instructions: {item.get('specialInstructions', 'None')}\n"
        else:
            response += f"Item: {item['itemName']} | Description: {item['description']}\n"

    return response.strip()

# Build the inverted index
inverted_index = build_inverted_index(menu_data)

# Example query
query = "Fever Recommendations?"

# Retrieve relevant documents based on the query
relevant_docs = retrieve_documents(query, inverted_index)
print(f"Relevant document IDs for query '{query}':", relevant_docs)

# Rank the retrieved documents using BM25
ranked_docs = rank_documents(query, menu_data, relevant_docs)
print(f"Ranked document IDs and scores for query '{query}':", ranked_docs)

# Build the inverted index from ranked documents
ranked_inverted_index = build_inverted_index_from_ranked_docs(ranked_docs, menu_data)

# Generate a response based on the ranked documents and detected intent
response = generate_response(query, ranked_docs, menu_data)
print("//////////////////////////")
print("\nResponse based on detected intent:")
print(response)
print("//////////////////////////")

Relevant document IDs for query 'Fever Recommendations?': []
Ranked document IDs and scores for query 'Fever Recommendations?': []
//////////////////////////

Response based on detected intent:
Sorry, we couldn't find any matching items for your query.
//////////////////////////


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [11]:
from collections import defaultdict
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from rank_bm25 import BM25Okapi
import nltk
import re

# Download NLTK data for tokenization, stopwords, and lemmatization
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Sample menu data (same as before)
menu_data = [
    {
        "itemId": "1",
        "itemName": "Chicken Curry",
        "description": "Hot and spicy chicken dish",
        "specialInstructions": "Extra spicy",
        "allergicInfo": "Contains nuts",
        "price": "15"
    },
    {
        "itemId": "2",
        "itemName": "Mild Chicken Pasta",
        "description": "Creamy pasta with chicken",
        "specialInstructions": "No garlic",
        "allergicInfo": "Dairy",
        "price": "12"
    },
    {
        "itemId": "3",
        "itemName": "Spicy Pasta",
        "description": "Pasta with a spicy tomato sauce",
        "specialInstructions": "",
        "allergicInfo": "",
        "price": "10"
    },
    {
        "itemId": "4",
        "itemName": "Beef Stroganoff",
        "description": "Creamy beef with mushrooms and onions",
        "specialInstructions": "Less sauce",
        "allergicInfo": "Dairy, Mushrooms",
        "price": "18"
    },
    {
        "itemId": "5",
        "itemName": "Vegetarian Pizza",
        "description": "Wood-fired pizza topped with vegetables",
        "specialInstructions": "Extra cheese",
        "allergicInfo": "Gluten, Dairy",
        "price": "14"
    },
    {
        "itemId": "6",
        "itemName": "Grilled Salmon",
        "description": "Freshly grilled salmon with herbs",
        "specialInstructions": "No oil",
        "allergicInfo": "Fish",
        "price": "20"
    },
    {
        "itemId": "7",
        "itemName": "Caesar Salad",
        "description": "Crisp romaine lettuce with Caesar dressing",
        "specialInstructions": "No croutons",
        "allergicInfo": "Dairy, Gluten, Anchovies",
        "price": "8"
    },
    {
        "itemId": "8",
        "itemName": "Chocolate Lava Cake",
        "description": "Rich chocolate cake with molten center",
        "specialInstructions": "No whipped cream",
        "allergicInfo": "Dairy, Eggs",
        "price": "7"
    },
    {
        "itemId": "9",
        "itemName": "Lamb Biryani",
        "description": "Fragrant rice with tender lamb and spices",
        "specialInstructions": "No cilantro",
        "allergicInfo": "Contains nuts",
        "price": "17"
    },
    {
        "itemId": "10",
        "itemName": "Tofu Stir-Fry",
        "description": "Stir-fried tofu with mixed vegetables",
        "specialInstructions": "No soy sauce",
        "allergicInfo": "Soy",
        "price": "13"
    },
    {
        "itemId": "11",
        "itemName": "Garlic Bread",
        "description": "Crispy garlic bread slices",
        "specialInstructions": "No butter",
        "allergicInfo": "Gluten, Dairy",
        "price": "5"
    },
    {
        "itemId": "12",
        "itemName": "Fruit Salad",
        "description": "Fresh seasonal fruits",
        "specialInstructions": "No bananas",
        "allergicInfo": "",
        "price": "6"
    },
    {
        "itemId": "13",
        "itemName": "Butter Chicken",
        "description": "Chicken cooked in a creamy tomato sauce",
        "specialInstructions": "Extra gravy",
        "allergicInfo": "Dairy",
        "price": "16"
    },
    {
        "itemId": "14",
        "itemName": "Margherita Pizza",
        "description": "Classic pizza with mozzarella and basil",
        "specialInstructions": "Add olives",
        "allergicInfo": "Gluten, Dairy",
        "price": "12"
    },
    {
        "itemId": "15",
        "itemName": "Greek Salad",
        "description": "Salad with cucumbers, tomatoes, and feta cheese",
        "specialInstructions": "No onions",
        "allergicInfo": "Dairy",
        "price": "9"
    }
]

# Step 1: Tokenize and preprocess the document (menu items)
def tokenize_and_process(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
    return tokens

# Step 2: Preprocess menu data into a list of combined item details
def preprocess_menu(menu_data):
    return [
        f"{item['itemName']} {item['description']} {item.get('specialInstructions', '')} {item.get('allergicInfo', '')} {item.get('price', '')}"
        for item in menu_data
    ]

# Step 3: Build an inverted index
def build_inverted_index(menu_data):
    inverted_index = defaultdict(list)
    for doc_id, item in enumerate(menu_data):
        text = f"{item['itemName']} {item['description']} {item.get('specialInstructions', '')} {item.get('allergicInfo', '')}"
        tokens = tokenize_and_process(text)
        for token in set(tokens):  # Use set to avoid duplicate entries for the same word
            inverted_index[token].append(doc_id)
    return inverted_index

# Step 4: Retrieve relevant documents using inverted index
def retrieve_documents(query, inverted_index):
    query_tokens = tokenize_and_process(query)
    relevant_docs = set()

    # Collect documents that contain at least one query term
    for token in query_tokens:
        if token in inverted_index:
            relevant_docs.update(inverted_index[token])

    return list(relevant_docs)

# Step 5: Rank the retrieved documents using BM25
def rank_documents(query, menu_data, relevant_doc_ids):
    preprocessed_menu = preprocess_menu(menu_data)
    tokenized_menu = [tokenize_and_process(doc) for doc in preprocessed_menu]
    bm25 = BM25Okapi(tokenized_menu)
    tokenized_query = tokenize_and_process(query)
    scores = bm25.get_scores(tokenized_query)
    relevant_scores = [(doc_id, scores[doc_id]) for doc_id in relevant_doc_ids]
    ranked_docs = sorted(relevant_scores, key=lambda x: x[1], reverse=True)
    return ranked_docs

# Step 6: Build an inverted index from the ranked documents
def build_inverted_index_from_ranked_docs(ranked_docs, menu_data):
    inverted_index = defaultdict(list)
    for doc_id, score in ranked_docs:
        item = menu_data[doc_id]
        text = f"{item['itemName']} {item['description']} {item.get('specialInstructions', '')} {item.get('allergicInfo', '')}"
        tokens = tokenize_and_process(text)
        for token in set(tokens):
            inverted_index[token].append(doc_id)
    return inverted_index

# Step 7: Define and detect intent based on query patterns
def detect_intent(query):
    query = query.lower()

    # Basic intent detection patterns
    if re.search(r"\bfever\b", query):
        return "fever_recommendation"
    elif re.search(r"\bspicy\b", query):
        return "spicy_food"
    elif re.search(r"\bprice\b", query):
        return "price"
    elif re.search(r"\ballergi(?:es|c|info)\b", query):
        return "allergic_info"
    elif re.search(r"\binstruction\b|\bpreparation\b", query):
        return "special_instructions"
    else:
        return "general"

# Step 8: Recommend menu items based on query intent
def recommend_based_on_intent(intent, menu_data):
    recommendations = []

    if intent == "fever_recommendation":
        # Recommend non-spicy food for fever
        recommendations = [item for item in menu_data if "spicy" in item['description'].lower()]
    elif intent == "spicy_food":
        # Return spicy food items
        recommendations = [item for item in menu_data if "spicy" in item['description'].lower()]

    return recommendations

# Step 9: Generate a response based on detected intent and retrieved documents
# Modified: Generate a response based on detected intent and apply rule-based filtering for fever
def generate_response(query, ranked_docs, menu_data):
    intent = detect_intent(query)

    # Special case: If intent is for a fever recommendation, filter the recommendations
    if intent == "fever_recommendation":
        recommendations = recommend_based_on_intent(intent, menu_data)
        if recommendations:
            response = "Here are some recommended items for fever (non-spicy):\n"
            for item in recommendations:
                response += f"Item: {item['itemName']} | Description: {item['description']}\n"
            return response
        else:
            return "Sorry, no non-spicy items found in the menu."

    # Default fallback: Use BM25 ranked results
    if not ranked_docs:
        return "Sorry, we couldn't find any matching items for your query."

    # Otherwise, return the items based on BM25 ranking and other intents
    response = ""
    for doc_id, score in ranked_docs:
        item = menu_data[doc_id]

        if intent == "price":
            response += f"Item: {item['itemName']} | Price: ${item['price']}\n"
        elif intent == "allergic_info":
            response += f"Item: {item['itemName']} | Allergic Info: {item.get('allergicInfo', 'No specific allergic info')}\n"
        elif intent == "special_instructions":
            response += f"Item: {item['itemName']} | Special Instructions: {item.get('specialInstructions', 'None')}\n"
        else:
            response += f"Item: {item['itemName']} | Description: {item['description']}\n"

    return response.strip()

# Step 8: Recommend menu items based on query intent (updated to ensure non-spicy for fever)
def recommend_based_on_intent(intent, menu_data):
    recommendations = []

    if intent == "fever_recommendation":
        # Recommend non-spicy food for fever
        recommendations = [item for item in menu_data if "spicy" in item['description'].lower()]
    elif intent == "spicy_food":
        # Return spicy food items
        recommendations = [item for item in menu_data if "spicy" in item['description'].lower()]

    return recommendations

# Example query
query = "Can you recommend something for fever?"

# Detect intent and filter based on fever recommendation directly
intent = detect_intent(query)
response = generate_response(query, [], menu_data)  # Ranked docs are skipped for fever intent
print(response)


Here are some recommended items for fever (non-spicy):
Item: Chicken Curry | Description: Hot and spicy chicken dish
Item: Spicy Pasta | Description: Pasta with a spicy tomato sauce



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
