#**Part 1: Setup Vector Database**

# **Create an In-Memory Vector Database**

In [2]:
!pip install annoy

Collecting annoy
  Downloading annoy-1.17.3.tar.gz (647 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/647.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/647.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━[0m [32m532.5/647.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.3-cp310-cp310-linux_x86_64.whl size=552452 sha256=62d7efc84a911453f00e951fc96fd21f4509492887b0859311f97913a84ca57c
  Stored in directory: /root/.cache/pip/wheels/64/8a/da/f714bcf46c5efdcfcac0559e63370c21abe961c48

In [3]:
from annoy import AnnoyIndex

dimension = 100  # Vector dimensionality
n_trees = 10  # More trees, better precision, slower build
index = AnnoyIndex(dimension, 'angular')  # Using angular distance

# Example: Add item with ID 0
# Replace "vector" with actual 300-dimensional vector
index.add_item(0, [0.5]*dimension)

# Build the index
index.build(n_trees)

True

In [4]:
# Saving the index to disk for reuse
index.save('menu_index.ann')
print("Index is built and saved.")

Index is built and saved.


In [5]:
# Load the index back
index.load('menu_index.ann')
print("Index is loaded.")

Index is loaded.


In [6]:
# Example query: Find 3 nearest neighbors of item 0
print(index.get_nns_by_item(0, 3))

[0]


# **End of Setting Up In-Memory Vector Database**

# **Convert Menu to Vectors**

In [7]:
import json

file_path = 'menu.json'  # Change this path if your file is located elsewhere

# Open the file and load its content into a variable
with open(file_path, 'r') as file:
    menu_data = json.load(file)

# Now, 'menu_data' contains the content of 'menu.json'
print(menu_data)  # This will print the content of the menu data

{'Location': {'name': 'KFC'}, 'Chicken': {'C1': ['Original Recipe', 3.5, {'nutritionalInfo': {'kcal': 400, 'fat': 22, 'protein': 28, 'itemId': 4, 'allergens': ['wheat', 'soy']}, 'available': False}], 'C2': ['Popcorn Chicken', 4, {'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25, 'itemId': 6, 'allergens': ['wheat', 'soy']}, 'available': False}], 'C4': ['Hot Wings', 3, {'nutritionalInfo': {'kcal': 270, 'fat': 18, 'protein': 19, 'itemId': 5, 'allergens': ['wheat']}, 'available': False}], 'C5': ['Snackbox', 15, {'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 69, 'allergens': ['']}, 'available': False}], 'C6': ['Crispy Tenders', 15, {'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 70, 'allergens': ['']}, 'available': False}], 'C7': ['Original Piece', 15, {'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 76, 'allergens': ['']}, 'available': False}], 'C8': ['Tender Chicken', 15, {'nutritionalInfo': {'kcal': 150, 'fat':

In [8]:
restructured_chicken = []

for item_id, details in menu_data["Chicken"].items():
    item_name, price, item_details = details
    nutritional_info = item_details['nutritionalInfo']
    available = item_details['available']
    allergens = nutritional_info.get('allergens', [])

    restructured_item = {
        "id": item_id,
        "name": item_name,
        "type": "Chicken",
        "price": price,
        "nutritionalInfo": nutritional_info,
        "available": available,
        "allergens": allergens
    }

    restructured_chicken.append(restructured_item)

# Now, restructured_items contains all items in the new format

In [9]:
print(restructured_chicken)

[{'id': 'C1', 'name': 'Original Recipe', 'type': 'Chicken', 'price': 3.5, 'nutritionalInfo': {'kcal': 400, 'fat': 22, 'protein': 28, 'itemId': 4, 'allergens': ['wheat', 'soy']}, 'available': False, 'allergens': ['wheat', 'soy']}, {'id': 'C2', 'name': 'Popcorn Chicken', 'type': 'Chicken', 'price': 4, 'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25, 'itemId': 6, 'allergens': ['wheat', 'soy']}, 'available': False, 'allergens': ['wheat', 'soy']}, {'id': 'C4', 'name': 'Hot Wings', 'type': 'Chicken', 'price': 3, 'nutritionalInfo': {'kcal': 270, 'fat': 18, 'protein': 19, 'itemId': 5, 'allergens': ['wheat']}, 'available': False, 'allergens': ['wheat']}, {'id': 'C5', 'name': 'Snackbox', 'type': 'Chicken', 'price': 15, 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 69, 'allergens': ['']}, 'available': False, 'allergens': ['']}, {'id': 'C6', 'name': 'Crispy Tenders', 'type': 'Chicken', 'price': 15, 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemI

In [10]:
# Function to concatenate item information into a text string
def process_text(chicken_items):
    processed_items = []
    for item in chicken_items:
        # Combine name, type, and allergens into a string
        name_type = f"{item['name']} {item['type']}"
        allergens = " ".join(item['allergens']) if item['allergens'] else "No allergens"
        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text,
            'nutritionalInfo': item['nutritionalInfo'],
            'available': item['available']
        })
    return processed_items

# Process the chicken items to get text representation
processed_chicken_items = process_text(restructured_chicken)

In [11]:
menu = []

In [12]:
menu

[]

In [13]:
# Example output
for item in processed_chicken_items:  # Showing first 2 items for brevity
    print(item)

{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy', 'nutritionalInfo': {'kcal': 400, 'fat': 22, 'protein': 28, 'itemId': 4, 'allergens': ['wheat', 'soy']}, 'available': False}
{'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy', 'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25, 'itemId': 6, 'allergens': ['wheat', 'soy']}, 'available': False}
{'id': 'C4', 'text': 'Hot Wings Chicken wheat', 'nutritionalInfo': {'kcal': 270, 'fat': 18, 'protein': 19, 'itemId': 5, 'allergens': ['wheat']}, 'available': False}
{'id': 'C5', 'text': 'Snackbox Chicken ', 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 69, 'allergens': ['']}, 'available': False}
{'id': 'C6', 'text': 'Crispy Tenders Chicken ', 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 70, 'allergens': ['']}, 'available': False}
{'id': 'C7', 'text': 'Original Piece Chicken ', 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 76, 'allergens': ['']}, 'available'

In [14]:
# Function to concatenate item information into a text string
def process_text(chicken_items):
    processed_items = []
    for item in chicken_items:
        # Combine name, type, and allergens into a string
        name_type = f"{item['name']} {item['type']}"
        allergens = " ".join(item['allergens']) if item['allergens'] else "No allergens"
        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Process the chicken items to get text representation
processed_chicken_items = process_text(restructured_chicken)

# Example output
for item in processed_chicken_items:  # Showing first 2 items for brevity
    print(item)

{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'}
{'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'}
{'id': 'C4', 'text': 'Hot Wings Chicken wheat'}
{'id': 'C5', 'text': 'Snackbox Chicken '}
{'id': 'C6', 'text': 'Crispy Tenders Chicken '}
{'id': 'C7', 'text': 'Original Piece Chicken '}
{'id': 'C8', 'text': 'Tender Chicken Chicken '}


In [15]:
menu.extend(processed_chicken_items)

In [16]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '}]

In [17]:
# Assuming 'menu_data' contains your JSON data

# Initialize a list to hold restructured drink items
restructured_drinks = []

# Iterate over the "Drinks" category in your original JSON structure
for drink_id, details in menu_data["Drinks"].items():
    drink_name = details[0]
    price = details[1]
    # Some drinks might not have 'nutritionalInfo', so we use .get() method to safely extract it
    nutritional_info = details[2].get('nutritionalInfo', {})
    available = details[2].get('available', False)

    # Create a new dictionary for each drink including all its details and a 'type' field
    restructured_drink = {
        "id": drink_id,
        "name": drink_name,
        "type": "Drink",
        "price": price,
        "nutritionalInfo": nutritional_info,
        "available": available
    }

    # Append this dictionary to the list
    restructured_drinks.append(restructured_drink)

# The 'restructured_drinks' list now contains all drinks in the new format

In [18]:
print(restructured_drinks)

[{'id': 'D1', 'name': 'Iced Tea', 'type': 'Drink', 'price': 2.8, 'nutritionalInfo': {'kcal': 340, 'fat': 17, 'protein': 4, 'itemId': 12, 'allergens': ['wheat']}, 'available': False}, {'id': 'D2', 'name': 'Pepsi', 'type': 'Drink', 'price': 2.8, 'nutritionalInfo': {'kcal': 170, 'fat': 10, 'protein': 2, 'itemId': 9, 'allergens': ['dairy']}, 'available': False}, {'id': 'D3', 'name': '7Up', 'type': 'Drink', 'price': 2.8, 'nutritionalInfo': {'kcal': 340, 'fat': 17, 'protein': 4, 'itemId': 10, 'allergens': ['wheat']}, 'available': False}, {'id': 'D4', 'name': 'Fanta', 'type': 'Drink', 'price': 2.8, 'nutritionalInfo': {'kcal': 170, 'fat': 10, 'protein': 2, 'itemId': 11, 'allergens': ['dairy']}, 'available': False}, {'id': 'D5', 'name': 'Sourcy', 'type': 'Drink', 'price': 15, 'nutritionalInfo': {}, 'available': False}, {'id': 'D6', 'name': 'Tropicana Apple', 'type': 'Drink', 'price': 15, 'nutritionalInfo': {}, 'available': False}, {'id': 'D7', 'name': 'Guava', 'type': 'Drink', 'price': 15, 'nut

In [19]:
def process_drink_text(drink_items):
    processed_items = []
    for item in drink_items:
        # Combine name and type
        name_type = f"{item['name']} {item['type']}"

        # Handle allergens, defaulting to "No allergens" if none are present
        allergens = " ".join(item['nutritionalInfo'].get('allergens', ['No allergens'])) if 'nutritionalInfo' in item and 'allergens' in item['nutritionalInfo'] else "No allergens"

        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Process the drinks items to get text representation
processed_drinks_items = process_drink_text(restructured_drinks)

In [20]:
# Example output
for item in processed_drinks_items:
    print(item)

{'id': 'D1', 'text': 'Iced Tea Drink wheat'}
{'id': 'D2', 'text': 'Pepsi Drink dairy'}
{'id': 'D3', 'text': '7Up Drink wheat'}
{'id': 'D4', 'text': 'Fanta Drink dairy'}
{'id': 'D5', 'text': 'Sourcy Drink No allergens'}
{'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'}
{'id': 'D7', 'text': 'Guava Drink No allergens'}
{'id': 'D8', 'text': 'Tea Drink No allergens'}
{'id': 'D9', 'text': 'Latte Drink No allergens'}
{'id': 'D10', 'text': 'Espresso Drink No allergens'}
{'id': 'D11', 'text': 'Coffee Drink No allergens'}
{'id': 'D12', 'text': 'Sisi Drink No allergens'}
{'id': 'D13', 'text': 'Fernandes Drink No allergens'}
{'id': 'D14', 'text': 'Lipton Drink No allergens'}


In [21]:
menu.extend(processed_drinks_items)

In [22]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '},
 {'id': 'D1', 'text': 'Iced Tea Drink wheat'},
 {'id': 'D2', 'text': 'Pepsi Drink dairy'},
 {'id': 'D3', 'text': '7Up Drink wheat'},
 {'id': 'D4', 'text': 'Fanta Drink dairy'},
 {'id': 'D5', 'text': 'Sourcy Drink No allergens'},
 {'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 'D7', 'text': 'Guava Drink No allergens'},
 {'id': 'D8', 'text': 'Tea Drink No allergens'},
 {'id': 'D9', 'text': 'Latte Drink No allergens'},
 {'id': 'D10', 'text': 'Espresso Drink No allergens'},
 {'id': 'D11', 'text': 'Coffee Drink No allergens'},
 {'id': 'D12', 'text': 'Sisi Drink No allergens'},
 {'id': 'D13', 'text': 'Fernandes Dr

In [23]:
# New structure for items including nutritional information handling
restructured_burgers = []

# Iterate over Burgers
for burger_key, burger_value in menu_data["Burgers"].items():
    # Initialize a dictionary for each burger
    burger_dict = {
        "id": burger_key,
        "name": burger_value[0],
        "price": burger_value[1],
        "type": "Burger",
        "available": burger_value[2].get("available", True)  # Default to True if not specified
    }

    # Check if nutritionalInfo exists, then include it
    if "nutritionalInfo" in burger_value[2]:
        burger_dict["nutritionalInfo"] = burger_value[2]["nutritionalInfo"]

    # Append the burger dictionary to the items list
    restructured_burgers.append(burger_dict)

In [24]:
print(restructured_burgers)

[{'id': 'B1', 'name': 'Crunch Burger', 'price': 15, 'type': 'Burger', 'available': False, 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 77, 'allergens': ['']}}, {'id': 'B2', 'name': 'Original Fillet Breaded', 'price': 15, 'type': 'Burger', 'available': False, 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 84, 'allergens': ['']}}, {'id': 'B3', 'name': 'Original Fillet', 'price': 15, 'type': 'Burger', 'available': False}, {'id': 'B4', 'name': 'Filet Burger', 'price': 4.7, 'type': 'Burger', 'available': False, 'nutritionalInfo': {'kcal': 400, 'fat': 20, 'protein': 27, 'itemId': 3, 'allergens': ['wheat', 'soy']}}, {'id': 'B5', 'name': 'Zinger Burger', 'price': 4.5, 'type': 'Burger', 'available': False, 'nutritionalInfo': {'kcal': 451, 'fat': 25, 'protein': 28, 'itemId': 1, 'allergens': ['wheat', 'soy']}}, {'id': 'B6', 'name': 'Tower Burger', 'price': 5.2, 'type': 'Burger', 'available': False, 'nutritionalInfo': {'kcal': 500, 'fat': 30, 'protei

In [25]:
def process_burger_text(burger_items):
    processed_items = []
    for item in burger_items:
        # Combine name and type
        name_type = f"{item['name']} {item['type']}"

        # Check for allergens in nutritionalInfo, defaulting to "No allergens" if none are present
        allergens = " ".join(item['nutritionalInfo'].get('allergens', ['No allergens'])) if 'nutritionalInfo' in item and 'allergens' in item['nutritionalInfo'] else "No allergens"

        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Assuming restructured_burgers is your list of burgers with the new structure
processed_burgers_items = process_burger_text(restructured_burgers)

In [26]:
# Example output
for item in processed_burgers_items:
    print(item)

{'id': 'B1', 'text': 'Crunch Burger Burger '}
{'id': 'B2', 'text': 'Original Fillet Breaded Burger '}
{'id': 'B3', 'text': 'Original Fillet Burger No allergens'}
{'id': 'B4', 'text': 'Filet Burger Burger wheat soy'}
{'id': 'B5', 'text': 'Zinger Burger Burger wheat soy'}
{'id': 'B6', 'text': 'Tower Burger Burger wheat soy'}
{'id': 'B7', 'text': 'Veggie Burger Burger whey soy'}
{'id': 'B8', 'text': 'Fire Zinger Stacker Burger '}
{'id': 'B9', 'text': 'Fire Zinger Burger '}
{'id': 'B10', 'text': 'Colonel Stacker Burger '}
{'id': 'B11', 'text': 'Colonel Burger Burger '}
{'id': 'B12', 'text': 'Veggie Tender Burger '}
{'id': 'B13', 'text': 'Filet Bites Burger '}
{'id': 'B14', 'text': 'Original Burger Burger '}
{'id': 'B15', 'text': 'Zinger Burger Burger '}
{'id': 'B16', 'text': 'Cheeseburger Burger '}
{'id': 'B17', 'text': 'Zinger Filet Burger '}


In [27]:
menu.extend(processed_burgers_items)

In [28]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '},
 {'id': 'D1', 'text': 'Iced Tea Drink wheat'},
 {'id': 'D2', 'text': 'Pepsi Drink dairy'},
 {'id': 'D3', 'text': '7Up Drink wheat'},
 {'id': 'D4', 'text': 'Fanta Drink dairy'},
 {'id': 'D5', 'text': 'Sourcy Drink No allergens'},
 {'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 'D7', 'text': 'Guava Drink No allergens'},
 {'id': 'D8', 'text': 'Tea Drink No allergens'},
 {'id': 'D9', 'text': 'Latte Drink No allergens'},
 {'id': 'D10', 'text': 'Espresso Drink No allergens'},
 {'id': 'D11', 'text': 'Coffee Drink No allergens'},
 {'id': 'D12', 'text': 'Sisi Drink No allergens'},
 {'id': 'D13', 'text': 'Fernandes Dr

In [29]:
# New structure for sauces
restructured_sauces = []

# Iterate over the "Sauces" section
for sauce_id, details in menu_data["Sauces"].items():
    sauce_dict = {
        "id": sauce_id,
        "name": details[0],
        "price": details[1],
        "type": "Sauce",
        # Default availability to True if not specified
        "available": details[2].get("available", True)
    }
    # Add the structured sauce dictionary to the sauces list
    restructured_sauces.append(sauce_dict)

In [30]:
print(restructured_sauces)

[{'id': 'S1', 'name': 'Apple Sauce', 'price': 15, 'type': 'Sauce', 'available': False}]


In [31]:
def process_sauce_text(sauce_items):
    processed_items = []
    for item in sauce_items:
        # Since there's no nutritional info or allergens provided, just combine name and type
        name_type = f"{item['name']} {item['type']}"

        processed_text = name_type  # For sauces, we might not have allergens or other detailed info

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Process the sauces items to get text representation
processed_sauces_items = process_sauce_text(restructured_sauces)

In [32]:
# Example output
for item in processed_sauces_items:
    print(item)

{'id': 'S1', 'text': 'Apple Sauce Sauce'}


In [33]:
menu.extend(processed_sauces_items)

In [34]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '},
 {'id': 'D1', 'text': 'Iced Tea Drink wheat'},
 {'id': 'D2', 'text': 'Pepsi Drink dairy'},
 {'id': 'D3', 'text': '7Up Drink wheat'},
 {'id': 'D4', 'text': 'Fanta Drink dairy'},
 {'id': 'D5', 'text': 'Sourcy Drink No allergens'},
 {'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 'D7', 'text': 'Guava Drink No allergens'},
 {'id': 'D8', 'text': 'Tea Drink No allergens'},
 {'id': 'D9', 'text': 'Latte Drink No allergens'},
 {'id': 'D10', 'text': 'Espresso Drink No allergens'},
 {'id': 'D11', 'text': 'Coffee Drink No allergens'},
 {'id': 'D12', 'text': 'Sisi Drink No allergens'},
 {'id': 'D13', 'text': 'Fernandes Dr

In [35]:
# Initialize a list to hold restructured drink items
restructured_sd = []

# Iterate over the "Drinks" category in your original JSON structure
for sd_id, details in menu_data["Side dishes"].items():
    sd_name = details[0]
    price = details[1]
    # Some drinks might not have 'nutritionalInfo', so we use .get() method to safely extract it
    nutritional_info = details[2].get('nutritionalInfo', {})
    available = details[2].get('available', False)

    # Create a new dictionary for each drink including all its details and a 'type' field
    restructured_dict = {
        "id": sd_id,
        "name": sd_name,
        "type": "Side dish",
        "price": price,
        "nutritionalInfo": nutritional_info,
        "available": available
    }

    # Append this dictionary to the list
    restructured_sd.append(restructured_dict)

In [36]:
restructured_sd

[{'id': 'SD1',
  'name': 'Coleslaw',
  'type': 'Side dish',
  'price': 1.7,
  'nutritionalInfo': {'kcal': 170,
   'fat': 10,
   'protein': 2,
   'itemId': 7,
   'allergens': ['dairy']},
  'available': False},
 {'id': 'SD2',
  'name': 'Fries',
  'type': 'Side dish',
  'price': 2.5,
  'nutritionalInfo': {'kcal': 340,
   'fat': 17,
   'protein': 4,
   'itemId': 8,
   'allergens': ['wheat']},
  'available': False},
 {'id': 'SD3',
  'name': 'Corn',
  'type': 'Side dish',
  'price': 15,
  'nutritionalInfo': {'kcal': 150,
   'fat': 100,
   'protein': 10,
   'itemId': 73,
   'allergens': ['']},
  'available': False}]

In [37]:
def process_side_dish_text(side_dish_items):
    processed_items = []
    for item in side_dish_items:
        # Combine name and type
        name_type = f"{item['name']} {item['type']}"

        # Handle allergens, defaulting to "No allergens" if none are present
        allergens = " ".join(item['nutritionalInfo'].get('allergens', ['No allergens'])) if 'nutritionalInfo' in item and 'allergens' in item['nutritionalInfo'] and item['nutritionalInfo']['allergens'] else "No allergens"

        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Process the side dishes items to get text representation
processed_side_dishes_items = process_side_dish_text(restructured_sd)

In [38]:
# Example output
for item in processed_side_dishes_items:
    print(item)

{'id': 'SD1', 'text': 'Coleslaw Side dish dairy'}
{'id': 'SD2', 'text': 'Fries Side dish wheat'}
{'id': 'SD3', 'text': 'Corn Side dish '}


In [39]:
menu.extend(processed_side_dishes_items)

In [40]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '},
 {'id': 'D1', 'text': 'Iced Tea Drink wheat'},
 {'id': 'D2', 'text': 'Pepsi Drink dairy'},
 {'id': 'D3', 'text': '7Up Drink wheat'},
 {'id': 'D4', 'text': 'Fanta Drink dairy'},
 {'id': 'D5', 'text': 'Sourcy Drink No allergens'},
 {'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 'D7', 'text': 'Guava Drink No allergens'},
 {'id': 'D8', 'text': 'Tea Drink No allergens'},
 {'id': 'D9', 'text': 'Latte Drink No allergens'},
 {'id': 'D10', 'text': 'Espresso Drink No allergens'},
 {'id': 'D11', 'text': 'Coffee Drink No allergens'},
 {'id': 'D12', 'text': 'Sisi Drink No allergens'},
 {'id': 'D13', 'text': 'Fernandes Dr

In [41]:
# Initialize a list to hold restructured drink items
restructured_dessert = []

# Iterate over the "Drinks" category in your original JSON structure
for dessert_id, details in menu_data["Desserts"].items():
    dessert_name = details[0]
    price = details[1]
    # Some drinks might not have 'nutritionalInfo', so we use .get() method to safely extract it
    nutritional_info = details[2].get('nutritionalInfo', {})
    available = details[2].get('available', False)

    # Create a new dictionary for each drink including all its details and a 'type' field
    restructured_dessert_dict = {
        "id": dessert_id,
        "name": dessert_name,
        "type": "Desserts",
        "price": price,
        "nutritionalInfo": nutritional_info,
        "available": available
    }

    # Append this dictionary to the list
    restructured_dessert.append(restructured_dessert_dict)

In [42]:
restructured_dessert

[{'id': 'DE1',
  'name': 'Chocolate Sundae',
  'type': 'Desserts',
  'price': 2.5,
  'nutritionalInfo': {'kcal': 220,
   'fat': 9,
   'protein': 3,
   'itemId': 13,
   'allergens': ['dairy', 'soy']},
  'available': False},
 {'id': 'DE2',
  'name': 'Apple Pie',
  'type': 'Desserts',
  'price': 2.2,
  'nutritionalInfo': {'kcal': 250,
   'fat': 12,
   'protein': 2,
   'itemId': 14,
   'allergens': ['wheat', 'soy']},
  'available': False},
 {'id': 'DE3',
  'name': 'Ice Cream',
  'type': 'Desserts',
  'price': 15,
  'nutritionalInfo': {'kcal': 150,
   'fat': 100,
   'protein': 10,
   'itemId': 95,
   'allergens': ['']},
  'available': False}]

In [43]:
def process_dessert_text(dessert_items):
    processed_items = []
    for item in dessert_items:
        # Combine name and type
        name_type = f"{item['name']} {item['type']}"

        # Handle allergens, defaulting to "No allergens" if none are present
        allergens = " ".join(item['nutritionalInfo'].get('allergens', ['No allergens'])) if 'nutritionalInfo' in item and 'allergens' in item['nutritionalInfo'] and item['nutritionalInfo']['allergens'] else "No allergens"

        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Process the desserts items to get text representation
processed_desserts_items = process_dessert_text(restructured_dessert)

In [44]:
# Example output
for item in processed_desserts_items:
    print(item)

{'id': 'DE1', 'text': 'Chocolate Sundae Desserts dairy soy'}
{'id': 'DE2', 'text': 'Apple Pie Desserts wheat soy'}
{'id': 'DE3', 'text': 'Ice Cream Desserts '}


In [45]:
menu.extend(processed_desserts_items)

In [46]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '},
 {'id': 'D1', 'text': 'Iced Tea Drink wheat'},
 {'id': 'D2', 'text': 'Pepsi Drink dairy'},
 {'id': 'D3', 'text': '7Up Drink wheat'},
 {'id': 'D4', 'text': 'Fanta Drink dairy'},
 {'id': 'D5', 'text': 'Sourcy Drink No allergens'},
 {'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 'D7', 'text': 'Guava Drink No allergens'},
 {'id': 'D8', 'text': 'Tea Drink No allergens'},
 {'id': 'D9', 'text': 'Latte Drink No allergens'},
 {'id': 'D10', 'text': 'Espresso Drink No allergens'},
 {'id': 'D11', 'text': 'Coffee Drink No allergens'},
 {'id': 'D12', 'text': 'Sisi Drink No allergens'},
 {'id': 'D13', 'text': 'Fernandes Dr

In [47]:
# Initialize a list to hold restructured drink items
restructured_vegetarian = []

# Iterate over the "Drinks" category in your original JSON structure
for vegetarian_id, details in menu_data["Vegetarian"].items():
    vegetarian_name = details[0]
    price = details[1]
    # Some drinks might not have 'nutritionalInfo', so we use .get() method to safely extract it
    nutritional_info = details[2].get('nutritionalInfo', {})
    available = details[2].get('available', False)

    # Create a new dictionary for each drink including all its details and a 'type' field
    restructured_vegetarian_dict = {
        "id": vegetarian_id,
        "name": vegetarian_name,
        "type": "Vegetarian",
        "price": price,
        "nutritionalInfo": nutritional_info,
        "available": available
    }

    # Append this dictionary to the list
    restructured_vegetarian.append(restructured_vegetarian_dict)

In [48]:
restructured_vegetarian

[{'id': 'V1',
  'name': 'Veggie',
  'type': 'Vegetarian',
  'price': 15,
  'nutritionalInfo': {'kcal': 150,
   'fat': 100,
   'protein': 10,
   'itemId': 72,
   'allergens': ['']},
  'available': False},
 {'id': 'V2',
  'name': 'Veggie Tenders',
  'type': 'Vegetarian',
  'price': 15,
  'nutritionalInfo': {'kcal': 150,
   'fat': 100,
   'protein': 10,
   'itemId': 78,
   'allergens': ['']},
  'available': False}]

In [49]:
def process_vegetarian_text(vegetarian_items):
    processed_items = []
    for item in vegetarian_items:
        # Combine name and type
        name_type = f"{item['name']} {item['type']}"

        # Handle allergens, defaulting to "No allergens" if none are present
        allergens = " ".join(item['nutritionalInfo'].get('allergens', ['No allergens'])) if 'nutritionalInfo' in item and 'allergens' in item['nutritionalInfo'] and item['nutritionalInfo']['allergens'] else "No allergens"

        processed_text = f"{name_type} {allergens}"

        processed_items.append({
            'id': item['id'],
            'text': processed_text
        })
    return processed_items

# Process the vegetarian items to get text representation
processed_vegetarian_items = process_vegetarian_text(restructured_vegetarian)

In [50]:
# Example output
for item in processed_vegetarian_items:
    print(item)

{'id': 'V1', 'text': 'Veggie Vegetarian '}
{'id': 'V2', 'text': 'Veggie Tenders Vegetarian '}


In [51]:
menu.extend(processed_vegetarian_items)

In [52]:
menu

[{'id': 'C1', 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 'C2', 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 'C4', 'text': 'Hot Wings Chicken wheat'},
 {'id': 'C5', 'text': 'Snackbox Chicken '},
 {'id': 'C6', 'text': 'Crispy Tenders Chicken '},
 {'id': 'C7', 'text': 'Original Piece Chicken '},
 {'id': 'C8', 'text': 'Tender Chicken Chicken '},
 {'id': 'D1', 'text': 'Iced Tea Drink wheat'},
 {'id': 'D2', 'text': 'Pepsi Drink dairy'},
 {'id': 'D3', 'text': '7Up Drink wheat'},
 {'id': 'D4', 'text': 'Fanta Drink dairy'},
 {'id': 'D5', 'text': 'Sourcy Drink No allergens'},
 {'id': 'D6', 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 'D7', 'text': 'Guava Drink No allergens'},
 {'id': 'D8', 'text': 'Tea Drink No allergens'},
 {'id': 'D9', 'text': 'Latte Drink No allergens'},
 {'id': 'D10', 'text': 'Espresso Drink No allergens'},
 {'id': 'D11', 'text': 'Coffee Drink No allergens'},
 {'id': 'D12', 'text': 'Sisi Drink No allergens'},
 {'id': 'D13', 'text': 'Fernandes Dr

In [53]:
# Initialize variables
id_to_number = {}
number_to_id = {}
current_number = 1  # Start numbering from 1

# Iterate over each item in the dataset and assign numbers to IDs
for item in menu:
    item_id = item['id']
    # Check if the ID already has a number assigned
    if item_id not in id_to_number:
        # Assign a new number to the ID
        id_to_number[item_id] = current_number
        number_to_id[current_number] = item_id
        current_number += 1  # Increment the number for the next ID

# Update IDs in the menu to their corresponding numbers
for item in menu:
    item['id'] = id_to_number[item['id']]

# Example output
print("ID to Number Mapping:", id_to_number)
print("Number to ID Mapping:", number_to_id)
print("Updated Menu:", menu)  # Print first 5 items for brevity

ID to Number Mapping: {'C1': 1, 'C2': 2, 'C4': 3, 'C5': 4, 'C6': 5, 'C7': 6, 'C8': 7, 'D1': 8, 'D2': 9, 'D3': 10, 'D4': 11, 'D5': 12, 'D6': 13, 'D7': 14, 'D8': 15, 'D9': 16, 'D10': 17, 'D11': 18, 'D12': 19, 'D13': 20, 'D14': 21, 'B1': 22, 'B2': 23, 'B3': 24, 'B4': 25, 'B5': 26, 'B6': 27, 'B7': 28, 'B8': 29, 'B9': 30, 'B10': 31, 'B11': 32, 'B12': 33, 'B13': 34, 'B14': 35, 'B15': 36, 'B16': 37, 'B17': 38, 'S1': 39, 'SD1': 40, 'SD2': 41, 'SD3': 42, 'DE1': 43, 'DE2': 44, 'DE3': 45, 'V1': 46, 'V2': 47}
Number to ID Mapping: {1: 'C1', 2: 'C2', 3: 'C4', 4: 'C5', 5: 'C6', 6: 'C7', 7: 'C8', 8: 'D1', 9: 'D2', 10: 'D3', 11: 'D4', 12: 'D5', 13: 'D6', 14: 'D7', 15: 'D8', 16: 'D9', 17: 'D10', 18: 'D11', 19: 'D12', 20: 'D13', 21: 'D14', 22: 'B1', 23: 'B2', 24: 'B3', 25: 'B4', 26: 'B5', 27: 'B6', 28: 'B7', 29: 'B8', 30: 'B9', 31: 'B10', 32: 'B11', 33: 'B12', 34: 'B13', 35: 'B14', 36: 'B15', 37: 'B16', 38: 'B17', 39: 'S1', 40: 'SD1', 41: 'SD2', 42: 'SD3', 43: 'DE1', 44: 'DE2', 45: 'DE3', 46: 'V1', 47: 

In [54]:
id_to_number

{'C1': 1,
 'C2': 2,
 'C4': 3,
 'C5': 4,
 'C6': 5,
 'C7': 6,
 'C8': 7,
 'D1': 8,
 'D2': 9,
 'D3': 10,
 'D4': 11,
 'D5': 12,
 'D6': 13,
 'D7': 14,
 'D8': 15,
 'D9': 16,
 'D10': 17,
 'D11': 18,
 'D12': 19,
 'D13': 20,
 'D14': 21,
 'B1': 22,
 'B2': 23,
 'B3': 24,
 'B4': 25,
 'B5': 26,
 'B6': 27,
 'B7': 28,
 'B8': 29,
 'B9': 30,
 'B10': 31,
 'B11': 32,
 'B12': 33,
 'B13': 34,
 'B14': 35,
 'B15': 36,
 'B16': 37,
 'B17': 38,
 'S1': 39,
 'SD1': 40,
 'SD2': 41,
 'SD3': 42,
 'DE1': 43,
 'DE2': 44,
 'DE3': 45,
 'V1': 46,
 'V2': 47}

In [55]:
number_to_id

{1: 'C1',
 2: 'C2',
 3: 'C4',
 4: 'C5',
 5: 'C6',
 6: 'C7',
 7: 'C8',
 8: 'D1',
 9: 'D2',
 10: 'D3',
 11: 'D4',
 12: 'D5',
 13: 'D6',
 14: 'D7',
 15: 'D8',
 16: 'D9',
 17: 'D10',
 18: 'D11',
 19: 'D12',
 20: 'D13',
 21: 'D14',
 22: 'B1',
 23: 'B2',
 24: 'B3',
 25: 'B4',
 26: 'B5',
 27: 'B6',
 28: 'B7',
 29: 'B8',
 30: 'B9',
 31: 'B10',
 32: 'B11',
 33: 'B12',
 34: 'B13',
 35: 'B14',
 36: 'B15',
 37: 'B16',
 38: 'B17',
 39: 'S1',
 40: 'SD1',
 41: 'SD2',
 42: 'SD3',
 43: 'DE1',
 44: 'DE2',
 45: 'DE3',
 46: 'V1',
 47: 'V2'}

In [56]:
menu

[{'id': 1, 'text': 'Original Recipe Chicken wheat soy'},
 {'id': 2, 'text': 'Popcorn Chicken Chicken wheat soy'},
 {'id': 3, 'text': 'Hot Wings Chicken wheat'},
 {'id': 4, 'text': 'Snackbox Chicken '},
 {'id': 5, 'text': 'Crispy Tenders Chicken '},
 {'id': 6, 'text': 'Original Piece Chicken '},
 {'id': 7, 'text': 'Tender Chicken Chicken '},
 {'id': 8, 'text': 'Iced Tea Drink wheat'},
 {'id': 9, 'text': 'Pepsi Drink dairy'},
 {'id': 10, 'text': '7Up Drink wheat'},
 {'id': 11, 'text': 'Fanta Drink dairy'},
 {'id': 12, 'text': 'Sourcy Drink No allergens'},
 {'id': 13, 'text': 'Tropicana Apple Drink No allergens'},
 {'id': 14, 'text': 'Guava Drink No allergens'},
 {'id': 15, 'text': 'Tea Drink No allergens'},
 {'id': 16, 'text': 'Latte Drink No allergens'},
 {'id': 17, 'text': 'Espresso Drink No allergens'},
 {'id': 18, 'text': 'Coffee Drink No allergens'},
 {'id': 19, 'text': 'Sisi Drink No allergens'},
 {'id': 20, 'text': 'Fernandes Drink No allergens'},
 {'id': 21, 'text': 'Lipton Drink

#Convert Text to Vector

In [57]:
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
from io import StringIO
import numpy as np

# Simulate a file-like object for gensim LineSentence
text_data = "\n".join([item['text'] for item in menu])
file_like_text_data = StringIO(text_data)

# Training the Word2Vec model
model = Word2Vec(LineSentence(file_like_text_data), vector_size=100, window=5, min_count=1, workers=4)

# Converting text to vectors
vector_data = []
for item in menu:
    words = item['text'].split()
    vector = np.mean([model.wv[word] for word in words if word in model.wv], axis=0)
    vector_data.append({'id': item['id'], 'vector': vector})

In [58]:
# Now vector_data contains the vectors for each item
for item in vector_data:  # Example: showing vectors for the first two items
    print(item['id'], item['vector'])

1 [-8.8615454e-04 -1.9558282e-03 -9.8371343e-04 -2.4144743e-03
 -2.2195640e-03 -4.0555885e-04  2.2489713e-03 -1.0841541e-03
 -1.9156088e-03 -1.8200704e-03 -2.7639654e-03  1.7959175e-03
 -3.9712959e-03 -5.0929002e-04 -2.0093061e-03 -3.6150091e-03
 -7.5639301e-04  3.3101813e-05  1.2081666e-03 -3.1747431e-03
 -6.0038816e-04 -4.9682020e-04  6.2101260e-03  1.0091830e-03
 -1.3176150e-03  2.5987781e-03  4.5471545e-04  1.2289261e-03
 -4.4092704e-03  2.9023106e-03  4.3408684e-03  9.4132358e-04
 -2.3570745e-03 -4.7339844e-03 -7.2896417e-04  2.2024414e-04
 -2.2364906e-03  1.2068751e-03 -1.8911154e-03 -7.8552344e-04
  4.7924011e-03 -2.8405930e-03 -3.2602870e-03 -1.2520101e-03
  2.6713200e-03 -2.7545064e-03 -3.3542509e-03 -9.3297922e-04
  2.2663889e-03 -1.7566124e-03  5.6913320e-04 -3.1498582e-03
  4.1890023e-03 -5.8451435e-05 -3.3443850e-03  6.8749563e-04
 -2.6003618e-03 -4.0705614e-03 -4.0887492e-03 -4.1368203e-03
  2.8595745e-03 -2.8496883e-03  1.8868301e-03 -2.1208248e-04
  1.2002033e-04  3.341

In [59]:
vector_data

[{'id': 1,
  'vector': array([-8.8615454e-04, -1.9558282e-03, -9.8371343e-04, -2.4144743e-03,
         -2.2195640e-03, -4.0555885e-04,  2.2489713e-03, -1.0841541e-03,
         -1.9156088e-03, -1.8200704e-03, -2.7639654e-03,  1.7959175e-03,
         -3.9712959e-03, -5.0929002e-04, -2.0093061e-03, -3.6150091e-03,
         -7.5639301e-04,  3.3101813e-05,  1.2081666e-03, -3.1747431e-03,
         -6.0038816e-04, -4.9682020e-04,  6.2101260e-03,  1.0091830e-03,
         -1.3176150e-03,  2.5987781e-03,  4.5471545e-04,  1.2289261e-03,
         -4.4092704e-03,  2.9023106e-03,  4.3408684e-03,  9.4132358e-04,
         -2.3570745e-03, -4.7339844e-03, -7.2896417e-04,  2.2024414e-04,
         -2.2364906e-03,  1.2068751e-03, -1.8911154e-03, -7.8552344e-04,
          4.7924011e-03, -2.8405930e-03, -3.2602870e-03, -1.2520101e-03,
          2.6713200e-03, -2.7545064e-03, -3.3542509e-03, -9.3297922e-04,
          2.2663889e-03, -1.7566124e-03,  5.6913320e-04, -3.1498582e-03,
          4.1890023e-03, -5.84

#**End of Converting Menu to Vector**

#**Save the Vector to in-memory Vector Database**

In [60]:
from annoy import AnnoyIndex

# Assuming each vector is 100-dimensional
dimension = 100
index = AnnoyIndex(dimension, 'angular')  # Initialize Annoy index

# Insert vectors into the index, assuming vector_data is a list of dictionaries as shown in your output
for item in vector_data:
    item_id = item['id']  # This should already be an integer
    vector = item['vector']
    index.add_item(item_id, vector)  # Directly use item_id without conversion

# Build the index
index.build(10)  # 10 trees

# Save the index to disk (optional but recommended for persistence)
index.save('menu_index.ann')

True

In [62]:
# Assuming 'vector_data' list's 'id' values have been converted to integers as per the mapping
first_item_id = vector_data[0]['id']  # This is already an integer now
first_vector = index.get_item_vector(first_item_id)

print(f"Vector for ID {first_item_id}: {first_vector}")

Vector for ID 1: [-0.0008861545356921852, -0.0019558281637728214, -0.0009837134275585413, -0.0024144742637872696, -0.0022195640485733747, -0.00040555885061621666, 0.0022489712573587894, -0.0010841541225090623, -0.0019156087655574083, -0.0018200704362243414, -0.0027639653999358416, 0.0017959175165742636, -0.0039712958969175816, -0.000509290024638176, -0.002009306102991104, -0.003615009132772684, -0.0007563930121250451, 3.310181273263879e-05, 0.001208166591823101, -0.0031747431494295597, -0.0006003881571814418, -0.0004968201974406838, 0.006210125982761383, 0.0010091830044984818, -0.001317615038715303, 0.002598778111860156, 0.00045471545308828354, 0.0012289261212572455, -0.0044092703610658646, 0.002902310574427247, 0.004340868443250656, 0.0009413235820829868, -0.002357074525207281, -0.004733984358608723, -0.0007289641653187573, 0.00022024413920007646, -0.0022364906035363674, 0.001206875080242753, -0.0018911154475063086, -0.0007855234434828162, 0.004792401101440191, -0.0028405929915606976,

#**End of Save the Vector to in-memory Vector Database**

#**Part 2: Query Processing**

##**1. Query Interpretation**
##**2. Vector Representation**
##**3. Vector Matching**
##**4. Retrieving Matched Items**

##**1. Query Interpretation**

In [63]:
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

def process_query(query):
    # Process the query with spaCy
    doc = nlp(query)

    # Extract relevant parts of the query
    items = [token.lemma_ for token in doc if token.pos_ in ['NOUN', 'PROPN']]
    attributes = [token.lemma_ for token in doc if token.pos_ == 'ADJ']

    # Construct a representation similar to menu item entries
    query_representation = ' '.join(items + attributes)

    return query_representation

In [64]:
# Example query
query = "Can I get a Popcorn Chicken?"
query_representation = process_query(query)

print(f"Processed Query: {query_representation}")

Processed Query: Popcorn Chicken


##**2. Vector Representation**

In [65]:
from gensim.models import Word2Vec
import numpy as np

# Assuming `model` is your Word2Vec model trained on the menu data
# and `query_representation` is the output from your query processing script

# Function to convert query representation to vector using the Word2Vec model
def query_to_vector(query_representation, model):
    words = query_representation.split()
    # Ensure all words are in the model's vocabulary
    vector = np.mean([model.wv[word] for word in words if word in model.wv], axis=0)
    return vector

# Example usage
query = "Can I get a Popcorn Chicken?"
processed_query = process_query(query)  # Assuming process_query is your function from the previous step
query_vector_representation = query_to_vector(processed_query, model)

print(f"Query Vector: {query_vector_representation}")

Query Vector: [-1.6897775e-03  5.9821708e-03 -3.9663780e-03 -4.9451645e-03
 -7.2448514e-03  2.4940274e-03  7.0514772e-03  1.7011807e-03
 -6.9909520e-04 -1.8164353e-04 -2.9890332e-04 -5.1790080e-03
 -2.0843945e-03 -5.5355714e-03 -2.4453532e-03 -1.4858029e-03
 -3.6648740e-03  2.1479584e-03 -1.4984350e-03 -7.4453058e-04
 -5.6214854e-03 -4.6265968e-03 -8.0996007e-04  6.3106162e-03
 -6.9002132e-03 -3.4808309e-04  2.9311092e-03 -8.2843695e-03
 -8.4795803e-04  3.6333152e-04 -1.3847430e-03 -4.3722312e-03
  2.6445615e-03 -7.4350564e-03 -2.1576688e-03  8.8066412e-03
  7.6469500e-03  7.4616252e-03 -2.4068365e-03  8.9616107e-04
  1.4633485e-03 -2.5891652e-04 -5.8422191e-04 -2.8132708e-03
  3.0519324e-03 -5.5508828e-03  1.9680478e-03  6.0382979e-03
  2.6296992e-03  2.1309941e-03  4.4436776e-05 -2.2417293e-03
  8.7447860e-04  1.0438042e-03 -3.4253644e-03 -3.5037952e-03
  4.8179771e-03 -3.3011578e-03  5.0479872e-04 -7.3569845e-03
  3.0437210e-03 -2.3900131e-03  1.9728600e-03 -4.3165605e-03
  4.178919

##**3. Vector Matching**

In [66]:
from annoy import AnnoyIndex

# Assuming dimension is the dimensionality of your vectors
dimension = 100
index = AnnoyIndex(dimension, 'angular')

# Assuming you've already built and saved your index
index.load('menu_index.ann')  # Load the saved index

def find_similar_items(query_vector, n=1):
    """
    Find `n` most similar items in the vector database for the given query vector.

    Parameters:
    - query_vector: The vector representation of the user's query.
    - n: The number of similar items to retrieve.

    Returns:
    A list of tuples containing the index of the similar items and their similarity scores.
    """
    similar_items = index.get_nns_by_vector(query_vector, n, include_distances = True)
    return similar_items

In [67]:
# Example usage
query_vector = query_vector_representation # Your query vector here
similar_items = find_similar_items(query_vector_representation, n = 5)  # Adjust `n` as needed

print(f"Similar Items: {similar_items}")

Similar Items: ([2, 7, 4, 6, 3], [0.5936204195022583, 0.7856465578079224, 0.9567821025848389, 0.97920823097229, 1.059191107749939])


In [68]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Assuming `vector_data` is your list of dictionaries with 'id' and 'vector' from the in-memory database
# And `query_vector` is the vector representation of your query

def find_most_similar(vector, vector_data, top_n=1):
    # Convert vector_data to a list of vectors for comparison
    vectors = [item['vector'] for item in vector_data]
    # Calculate cosine similarity between the query vector and all vectors in the database
    similarities = cosine_similarity([vector], vectors)[0]
    # Get the top N indices of the most similar vectors
    top_indices = np.argsort(similarities)[-top_n:][::-1]
    # Retrieve IDs and similarities for the most similar items
    similar_items = [(vector_data[i]['id'], similarities[i]) for i in top_indices]
    return similar_items

# Example usage
top_similar_items = find_most_similar(query_vector_representation, vector_data, top_n=5)
print(f"Top Similar Items: {top_similar_items}")

Top Similar Items: [(2, 0.82380736), (7, 0.6913797), (4, 0.54228395), (6, 0.52057564), (3, 0.43905717)]


In [69]:
top_similar_items

[(2, 0.82380736),
 (7, 0.6913797),
 (4, 0.54228395),
 (6, 0.52057564),
 (3, 0.43905717)]

In [70]:
similarity_index = top_similar_items[0][0]

In [71]:
similarity_index

2

##**4. Retrieving Matched Items**

In [72]:
import json

file_path = 'menu.json'  # Change this path if your file is located elsewhere

# Open the file and load its content into a variable
with open(file_path, 'r') as file:
    menu_data = json.load(file)

# Now, 'menu_data' contains the content of 'menu.json'
print(menu_data)  # This will print the content of the menu data

{'Location': {'name': 'KFC'}, 'Chicken': {'C1': ['Original Recipe', 3.5, {'nutritionalInfo': {'kcal': 400, 'fat': 22, 'protein': 28, 'itemId': 4, 'allergens': ['wheat', 'soy']}, 'available': False}], 'C2': ['Popcorn Chicken', 4, {'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25, 'itemId': 6, 'allergens': ['wheat', 'soy']}, 'available': False}], 'C4': ['Hot Wings', 3, {'nutritionalInfo': {'kcal': 270, 'fat': 18, 'protein': 19, 'itemId': 5, 'allergens': ['wheat']}, 'available': False}], 'C5': ['Snackbox', 15, {'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 69, 'allergens': ['']}, 'available': False}], 'C6': ['Crispy Tenders', 15, {'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 70, 'allergens': ['']}, 'available': False}], 'C7': ['Original Piece', 15, {'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10, 'itemId': 76, 'allergens': ['']}, 'available': False}], 'C8': ['Tender Chicken', 15, {'nutritionalInfo': {'kcal': 150, 'fat':

In [73]:
# Function to retrieve menu item details using vector ID
def retrieve_matched_items(vector_id, menu_data, mapping):
    # Translate vector ID to menu ID
    menu_id = mapping[vector_id]

    # Initialize result variable
    matched_item_details = {}

    # Iterate through menu categories and items
    for category, items in menu_data.items():
        if menu_id in items:
            matched_item_details = items[menu_id]
            break

    return matched_item_details

In [74]:
# Example usage
# vector_id = 36  # Example vector ID
vector_id = similarity_index

mapping = number_to_id

matched_item_details = retrieve_matched_items(vector_id, menu_data, mapping)

print(f"Matched Item Details: {matched_item_details}")

Matched Item Details: ['Popcorn Chicken', 4, {'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25, 'itemId': 6, 'allergens': ['wheat', 'soy']}, 'available': False}]


#**End of Query Processing**

#**Part 3: Response Generation**

In [75]:
def generate_response(matched_item):
    # Extracting item details from the matched item
    item_name, price, item_info = matched_item
    response = f"You have ordered {item_name}, priced at ${price}."
    return response

# Input example
# matched_item = ["Popcorn Chicken", 4, {'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25, 'itemId': 6, 'allergens': ['wheat', 'soy']}, 'available': False}]
matched_item = matched_item_details

In [76]:
# Generating the response
response = generate_response(matched_item)
print(response)

You have ordered Popcorn Chicken, priced at $4.


#**Final Testing**

In [77]:
import time

In [101]:
# Start the timer
start_time = time.time()

# query = "Can I get a Popcorn Chicken?"
query = "Can you please order an Apple Pie for me?"

# RAG based Processing
processed_query = process_query(query)  # Assuming process_query is your function from the previous step
query_vector_representation = query_to_vector(processed_query, model)
query_vector = query_vector_representation # Your query vector here
similar_items = find_similar_items(query_vector_representation, n = 3)  # Adjust `n` as needed
top_similar_items = find_most_similar(query_vector_representation, vector_data, top_n=3)
similarity_index = top_similar_items[0][0]
matched_item_details = retrieve_matched_items(similarity_index, menu_data, mapping)
response = generate_response(matched_item_details)

# End the timer
end_time = time.time()

# Calculate the duration in milliseconds
duration_ms = (end_time - start_time) * 1000

# Print the response and the time taken
print(response)
print(f"Response generated in {duration_ms:.2f} milliseconds.")

You have ordered Apple Pie, priced at $2.2.
Response generated in 35.19 milliseconds.


#**The End**