In [None]:
!python -m pip install -q openai

In [None]:
import os
import pandas as pd
import sqlite3
import json
from openai import OpenAI

In [38]:
current_dir = os.getcwd()
db_path = os.path.join(current_dir, "data/mydb.sqlite")

In [5]:

conn = sqlite3.connect(db_path)
cur = conn.cursor()

In [12]:
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
cur.fetchall()

[('restaurants',)]

In [9]:
cur.execute("PRAGMA table_info(restaurants);")
cur.fetchall()

[(0, 'id', 'INTEGER', 0, None, 0),
 (1, 'position', 'INTEGER', 0, None, 0),
 (2, 'name', 'TEXT', 0, None, 0),
 (3, 'score', 'REAL', 0, None, 0),
 (4, 'ratings', 'REAL', 0, None, 0),
 (5, 'category', 'TEXT', 0, None, 0),
 (6, 'price_range', 'TEXT', 0, None, 0),
 (7, 'full_address', 'TEXT', 0, None, 0),
 (8, 'zip_code', 'TEXT', 0, None, 0),
 (9, 'lat', 'REAL', 0, None, 0),
 (10, 'lng', 'REAL', 0, None, 0)]

In [7]:
cur.execute("SELECT * from restaurants limit 5");
cur.fetchall()

[(1,
  19,
  'PJ Fresh (224 Daniel Payne Drive)',
  None,
  None,
  'Burgers, American, Sandwiches',
  '$',
  '224 Daniel Payne Drive, Birmingham, AL, 35207',
  '35207',
  33.5623653,
  -86.8307025),
 (2,
  9,
  "J' ti`'z Smoothie-N-Coffee Bar",
  None,
  None,
  'Coffee and Tea, Breakfast and Brunch, Bubble Tea',
  None,
  '1521 Pinson Valley Parkway, Birmingham, AL, 35217',
  '35217',
  33.58364,
  -86.77333),
 (3,
  6,
  'Philly Fresh Cheesesteaks (541-B Graymont Ave)',
  None,
  None,
  'American, Cheesesteak, Sandwiches, Alcohol',
  '$',
  '541-B Graymont Ave, Birmingham, AL, 35204',
  '35204',
  33.5098,
  -86.85464),
 (4,
  17,
  "Papa Murphy's (1580 Montgomery Highway)",
  None,
  None,
  'Pizza',
  '$',
  '1580 Montgomery Highway, Hoover, AL, 35226',
  '35226',
  33.4044388,
  -86.8066142),
 (5,
  162,
  'Nelson Brothers Cafe (17th St N)',
  4.7,
  22.0,
  'Breakfast and Brunch, Burgers, Sandwiches',
  None,
  '314 17th St N, Birmingham, AL, 35203',
  '35203',
  33.51473,
  -8

In [13]:
menus_csv_path = os.path.join(current_dir, "data", "restaurant-menus.csv")
menus_df = pd.read_csv(menus_csv_path)
menus_df.to_sql("menus", conn, if_exists="replace", index=False, chunksize=1000)

5117217

In [14]:
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
cur.fetchall()

[('restaurants',), ('menus',)]

In [15]:
cur.execute("PRAGMA table_info(menus);")
cur.fetchall()

[(0, 'restaurant_id', 'INTEGER', 0, None, 0),
 (1, 'category', 'TEXT', 0, None, 0),
 (2, 'name', 'TEXT', 0, None, 0),
 (3, 'description', 'TEXT', 0, None, 0),
 (4, 'price', 'TEXT', 0, None, 0)]

In [16]:
cur.execute("SELECT * from menus limit 5");
cur.fetchall()

[(1,
  'Extra Large Pizza',
  'Extra Large Meat Lovers',
  'Whole pie.',
  '15.99 USD'),
 (1, 'Extra Large Pizza', 'Extra Large Supreme', 'Whole pie.', '15.99 USD'),
 (1, 'Extra Large Pizza', 'Extra Large Pepperoni', 'Whole pie.', '14.99 USD'),
 (1,
  'Extra Large Pizza',
  'Extra Large BBQ Chicken &amp; Bacon',
  'Whole Pie',
  '15.99 USD'),
 (1, 'Extra Large Pizza', 'Extra Large 5 Cheese', 'Whole pie.', '14.99 USD')]

In [17]:
conn.close()

In [31]:
# ChatGPT (gpt-4o)
with open(os.path.join(current_dir, "API_Key"), "r") as f:
    chatgpt_api_key = f.read().strip()

chatgpt_base_url = "https://aiapi-prod.stanford.edu/v1"
chatgpt_model = "gpt-5"

chatgpt_api_version = "2025-03-01-preview"

chatgpt_client = OpenAI(
    api_key=chatgpt_api_key,
    base_url=chatgpt_base_url,
    default_query={"api-version": chatgpt_api_version},
)
chatgpt_menu_items = "1 item_id: 1; restaurant_id: 1; Extra Large Meat Lovers Whole Pie 2 item_id: 10; restaurant_id: 1; Jumbo Chicken Wings Five Pieces 3 item_id: 19; restaurant_id: 1; Bon Appetit Cheese and Berry Danish Single"
chatgpt_json_schema = {
    "format": {
        "type": "json_schema",
        "name": "results",
        "strict": True,
        "schema": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "additionalProperties": False,
                        "properties": {
                            "item_id": {"type": "string"},
                            "item_name": {"type": "string"},
                            "restaurant_id": {"type": "string"},
                            "ingredients": {
                                "type": "array",
                                "items": {"type": "string"}
                            },
                            "reasoning": {"type": "string"},
                        },
                        "required": [
                            "item_id",
                            "item_name",
                            "restaurant_id",
                            "ingredients",
                            "reasoning"
                        ]
                    }
                }
            },
            "required": ["results"]
        }
    }
}

chatgpt_instruction = f'''
You are a food menu analysis assistant.
Task:
Decompose each restaurant menu item into its likely ingredients.

Input:
You will be given a list of menu items. Each item contains:
- item_id
- item_name
- restaurant_id
- optional description

Output requirements (STRICT):
- Output MUST be machine-readable.
- Output MUST follow the exact numbered format shown below.
- Do NOT include extra commentary, explanations, or markdown.
- Do NOT invent fields that are not requested.
- Use only plain text.
- Each ingredient must be a short noun phrase.
- Ingredients must be ordered from most prominent to least prominent.

For each menu item, infer:
1) The most likely ingredients commonly used in this dish
2) The reasoning explaining WHY those ingredients are present, based on:
   - culinary conventions
   - dish name semantics
   - regional cooking practices
   - typical restaurant preparation methods

Constraints:
- If an ingredient is uncertain, still include it but reflect uncertainty in the reasoning.
- Do NOT include cooking utensils, heat, or generic terms like seasoning(salt, pepper, etc) unless necessary.
- Avoid overly granular items (e.g., “Himalayan pink salt” → “salt”).
'''

chatgpt_response = chatgpt_client.responses.create(
    model=chatgpt_model,
    input=chatgpt_menu_items,
    instructions=chatgpt_instruction,
    text=chatgpt_json_schema,
)

# print(chatgpt_response.output_text)

In [32]:
print(chatgpt_response)

Response(id='resp_d8Ttb5Ep0jktS3QC8Bi95aVzWBNFsJJiAtDP4MRAxiIeybo4cFRm2MUOlVM7GpGmXL-dRRhD16a1RQ0vKM9TJvUMO3Ob3Qt1DLuMBP2QaYALWbYbbio5QjnMuF17C6YYqMXBu6CZ4lFqCNPfJTtcfB3Vv4Ctj_9E6q7H7dGPWsjwFkNHFhpr_AmXptRhHvvRkHS6Md0CrLDZbI_enmy-nosgFgPzI7LRkoDAnvAZNO4l84Rh8Q_729DgkP0_V6ZjHUw-oYltp9cUtkVeOSc2qnOQZJHMlumP2N_VN8eSZnPbRHPzxNlxU0Hhce3hFqM40eOdqYY0vBWJ6tntTtNiioIBC77uuMY-9dkYIE5qA672_6-3DTknuO332hFLEIdFl0ugKgJrC8Y2VYkjTtztft4XW_0kTXJUneQJdX0agQWWy5PP3CzxKf24lzAckgjxi_x_xBW1gbzm41CgJwO_MnP3nX33PFCFMrFDOYrkqifjwVRrhnUqxB9y6gABrobdof3ubo6yu2lb01BR-9_Y3vqLw5YDmUVu7-zp', created_at=1769593190.0, error=None, incomplete_details=None, instructions='\nYou are a food menu analysis assistant.\nTask:\nDecompose each restaurant menu item into its likely ingredients.\n\nInput:\nYou will be given a list of menu items. Each item contains:\n- item_id\n- item_name\n- restaurant_id\n- optional description\n\nOutput requirements (STRICT):\n- Output MUST be machine-readable.\n- Output MUST follow the exact num

In [None]:
print(chatgpt_response)
chatgpt_data = json.loads(chatgpt_response.output_text)
print(type(chatgpt_data))
print(chatgpt_data["results"][1])  # individual item result

<class 'dict'>
{'item_id': '10', 'item_name': 'Jumbo Chicken Wings Five Pieces', 'restaurant_id': '1', 'ingredients': ['chicken wings', 'Buffalo wing sauce', 'blue cheese dressing', 'celery sticks', 'ranch dressing'], 'reasoning': 'Jumbo wings typically refer to large fried or baked chicken wings served sauced. In many US pizza-shop menus, the default or most common sauce is Buffalo wing sauce, and they are commonly accompanied by blue cheese or ranch dressing and celery sticks. Exact sauce and dip can vary by restaurant, so the accompaniments are somewhat uncertain.'}


In [40]:
chatgpt_instruction_detailed = f'''
You are a food menu analysis assistant.

Goal:
Given menu items, infer ingredients as detailed as possible while staying realistic.

Input:
Each menu item contains:
- item_id
- item_name
- restaurant_id
- optional description

Output requirements (STRICT):
- Output MUST be valid JSON matching the provided JSON schema.
- Do NOT include any extra keys beyond the schema.
- Do NOT include any markdown or commentary.

Ingredient inference guidelines:
- Provide a comprehensive ingredient list for each item.
- Include likely sub-components where appropriate (e.g., crust/dough, sauce, cheese, protein, vegetables, toppings, fillings, garnish).
- Include likely condiments/sauces/dips and typical sides when the dish commonly comes with them (only if plausible).
- Prefer common, restaurant-standard ingredients over niche/brand-specific ones.
- Avoid tools/heat words; avoid generic placeholders like "seasoning" unless necessary.
- Do not overfit: if uncertain, still include plausible ingredients but explain uncertainty in the reasoning.

Reasoning requirements:
- 3-6 sentences.
- Explicitly justify major ingredients (proteins, starch base, key sauces, signature toppings).
- Mention common variations briefly when relevant.

Now produce the JSON for the input.
'''

chatgpt_response_detailed = chatgpt_client.responses.create(
    model=chatgpt_model,
    input=chatgpt_menu_items,
    instructions=chatgpt_instruction_detailed,
    text=chatgpt_json_schema,
)

chatgpt_data_detailed = json.loads(chatgpt_response_detailed.output_text)
print(chatgpt_data_detailed["results"][1])

{'item_id': '10', 'item_name': 'Jumbo Chicken Wings Five Pieces', 'restaurant_id': '1', 'ingredients': ['chicken wings (drumettes and flats)', 'salt', 'black pepper', 'garlic powder', 'paprika', 'canola oil', 'Buffalo wing sauce (hot sauce, butter, vinegar, salt)', 'celery sticks', 'carrot sticks', 'ranch dressing'], 'reasoning': 'Jumbo wings typically mean large, unbreaded chicken wings seasoned simply with salt, pepper, and a few common spices, then served in a set count. Buffalo sauce is a prevalent default for wings, so listing a hot sauce–butter–vinegar style sauce is realistic. Celery and sometimes carrot sticks plus a creamy dip are customary accompaniments; ranch is a common default, though blue cheese is also widely offered. Some venues serve them plain or with alternative sauces (BBQ, garlic parmesan), but Buffalo with ranch and vegetable sticks is a plausible standard.'}


In [None]:
# Gemini (gemini-2.5-pro)
with open(os.path.join(current_dir, "API_Key"), "r") as f:
    gemini_api_key = f.read().strip()

gemini_base_url = "https://aiapi-prod.stanford.edu/v1"
gemini_model = "gemini-2.5-pro"

gemini_client = OpenAI(api_key=gemini_api_key, base_url=gemini_base_url)
gemini_menu_items = "1 item_id: 1; restaurant_id: 1; Extra Large Meat Lovers Whole Pie 2 item_id: 10; restaurant_id: 1; Jumbo Chicken Wings Five Pieces 3 item_id: 19; restaurant_id: 1; Bon Appetit Cheese and Berry Danish Single"
gemini_json_schema = {
    "format": {
        "type": "json_schema",
        "name": "results",
        "strict": True,
        "schema": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "additionalProperties": False,
                        "properties": {
                            "item_id": {"type": "string"},
                            "item_name": {"type": "string"},
                            "restaurant_id": {"type": "string"},
                            "ingredients": {
                                "type": "array",
                                "items": {"type": "string"}
                            },
                            "reasoning": {"type": "string"},
                        },
                        "required": [
                            "item_id",
                            "item_name",
                            "restaurant_id",
                            "ingredients",
                            "reasoning"
                        ]
                    }
                }
            },
            "required": ["results"]
        }
    }
}

gemini_instruction = f'''
You are a food menu analysis assistant.
Task:
Decompose each restaurant menu item into its likely ingredients.

Input:
You will be given a list of menu items. Each item contains:
- item_id
- item_name
- restaurant_id
- optional description

Output requirements (STRICT):
- Output MUST be machine-readable.
- Output MUST follow the exact numbered format shown below.
- Do NOT include extra commentary, explanations, or markdown.
- Do NOT invent fields that are not requested.
- Use only plain text.
- Each ingredient must be a short noun phrase.
- Ingredients must be ordered from most prominent to least prominent.

For each menu item, infer:
1) The most likely ingredients commonly used in this dish
2) The reasoning explaining WHY those ingredients are present, based on:
   - culinary conventions
   - dish name semantics
   - regional cooking practices
   - typical restaurant preparation methods

Constraints:
- If an ingredient is uncertain, still include it but reflect uncertainty in the reasoning.
- Do NOT include cooking utensils, heat, or generic terms like seasoning(salt, pepper, etc) unless necessary.
- Avoid overly granular items (e.g., “Himalayan pink salt” → “salt”).
'''

gemini_response = gemini_client.responses.create(
    model=gemini_model,
    input=gemini_menu_items,
    instructions=gemini_instruction,
    text=gemini_json_schema,
)

# print(gemini_response.output_text)

In [28]:
print(gemini_response)

Response(id='resp_SBydTsmkdrDQLsZbUrtvKkLO1WNjUwsYK0Yej3hZMokJqoeEc14h0ieagIw6A9Mz5AMuDAFSaHJxBHygMKGz7rV-31r63x7h3eXBKjNZY21RCk8wFUqZcwtM4zoCn4XH6TmwzPjwoFh1bkxh0Y8BFQWjU4M5HQM_eJGPSS_gdD3428410hA2k143QT-pvhRtnXTU91i-VCOiBrMBmnTT2t5wivmf3zjkI7pXOo2ktPC1UfFEw1Exr0LVlkjFUQZDAPCcuI5Un969qolSZjh3lCG_WOmDkyevPtKe4iVzcBclgy3Nl4KZfJKI2xhRCrOA8XZujnj8pFpWlps3N06aiyADK9nwFNVwly883FA24oSmIjFd3O3ypmElEtvgmXwmNAJVnA8xtd2hn5W4WxXuCEuc3-shEL8rYpFiyHu-SgRGPqeaTRjReTxXS-VL9KZyORzXgrCJ7Mp6JOR1NG-npzc6bHKgzUiJNTHc-U2gRTVks6o=', created_at=1769592156.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gemini-2.5-pro', object='chat.completion', output=[ResponseOutputMessage(id='XdV5abCoHMi8gLUP8euNoAk', content=[ResponseOutputText(annotations=[], text='{\n  "results": [\n    {\n      "item_id": "1",\n      "item_name": "Extra Large Meat Lovers Whole Pie",\n      "restaurant_id": "1",\n      "ingredients": [\n        "Pizza dough",\n        "Tomato sauce",\n        "Mozzarella c

In [29]:
gemini_data = json.loads(gemini_response.output_text)
print(type(gemini_data))
print(gemini_data["results"][1])  # individual item result

<class 'dict'>
{'item_id': '10', 'item_name': 'Jumbo Chicken Wings Five Pieces', 'restaurant_id': '1', 'ingredients': ['Chicken wings', 'Flour coating', 'Wing sauce', 'Vegetable oil'], 'reasoning': "The primary ingredient is 'Chicken wings' as specified in the item name. Restaurant-style wings are typically coated in flour for a crispy texture when fried in vegetable oil. After frying, they are tossed in a 'wing sauce,' which could be Buffalo, BBQ, or another variety."}


In [34]:
# Claude (claude-4-sonnet)
with open(os.path.join(current_dir, "API_Key"), "r") as f:
    claude_api_key = f.read().strip()

claude_base_url = "https://aiapi-prod.stanford.edu/v1"
claude_model = "claude-4-sonnet"

claude_client = OpenAI(api_key=claude_api_key, base_url=claude_base_url)
claude_menu_items = "1 item_id: 1; restaurant_id: 1; Extra Large Meat Lovers Whole Pie 2 item_id: 10; restaurant_id: 1; Jumbo Chicken Wings Five Pieces 3 item_id: 19; restaurant_id: 1; Bon Appetit Cheese and Berry Danish Single"
claude_json_schema = {
    "format": {
        "type": "json_schema",
        "name": "results",
        "strict": True,
        "schema": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "additionalProperties": False,
                        "properties": {
                            "item_id": {"type": "string"},
                            "item_name": {"type": "string"},
                            "restaurant_id": {"type": "string"},
                            "ingredients": {
                                "type": "array",
                                "items": {"type": "string"}
                            },
                            "reasoning": {"type": "string"},
                        },
                        "required": [
                            "item_id",
                            "item_name",
                            "restaurant_id",
                            "ingredients",
                            "reasoning"
                        ]
                    }
                }
            },
            "required": ["results"]
        }
    }
}

claude_instruction = f'''
You are a food menu analysis assistant.
Task:
Decompose each restaurant menu item into its likely ingredients.

Input:
You will be given a list of menu items. Each item contains:
- item_id
- item_name
- restaurant_id
- optional description

Output requirements (STRICT):
- Output MUST be machine-readable.
- Output MUST follow the exact numbered format shown below.
- Do NOT include extra commentary, explanations, or markdown.
- Do NOT invent fields that are not requested.
- Use only plain text.
- Each ingredient must be a short noun phrase.
- Ingredients must be ordered from most prominent to least prominent.

For each menu item, infer:
1) The most likely ingredients commonly used in this dish
2) The reasoning explaining WHY those ingredients are present, based on:
   - culinary conventions
   - dish name semantics
   - regional cooking practices
   - typical restaurant preparation methods

Constraints:
- If an ingredient is uncertain, still include it but reflect uncertainty in the reasoning.
- Do NOT include cooking utensils, heat, or generic terms like seasoning(salt, pepper, etc) unless necessary.
- Avoid overly granular items (e.g., “Himalayan pink salt” → “salt”).
'''

claude_response = claude_client.responses.create(
    model=claude_model,
    input=claude_menu_items,
    instructions=claude_instruction,
    text=claude_json_schema,
)

In [36]:
print(claude_response)

Response(id='resp_kcf-juCg2Wt-agkZF_sG8CgVkx-RY9JniDOhOiSZWMXGPYLgg7dcdZIQX7r0_4k8oM_pdqcgnAYRB4GUMNmXhxftDEPtwkqamTBUNvHydWTvpMxCmysgyX73k3ONyqkEi5vLpdne67irOZIZ9Hu812EZCfVDR9JYoZmUUru6GnA6iX5Q6LZ0Ru8nerji3ELc_gKOKy1sXwHGPJahXfn_o8h9FIv_Wt0dr3iy61XhN9mEKpjOB8UikFKvwWy_ZO3bPbaS2k2FrDi8gfjkyi5l2bgGVtrZwGl5eiXHMfFnOQwOMYcw2XxFTHb5XtqA9HddzLI_NTuK8cR25SNaD66s7MbJJTEA7s-6jcFV_2gPHyFcoDkPeRXm-8qvOPbueylWfUTRWdjdXRi9xTlu8DEeCPWb7ZIj6Q6VD6Blq1R-XBJ6P4soLdvAVdCBe8BcrlMwHlvAZ2QIpC7Hpk-N2quzP9Jte4LsONCtCo3SLDfBGlbUitsy5w0LQ61BaTB-8uDxJHF1hxOR1HeWPJmDkCFv79x63A==', created_at=1769593615.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='claude-sonnet-4@20250514', object='chat.completion', output=[ResponseOutputMessage(id='chatcmpl-70f5f58a-70a1-4202-9c34-537b98591e56', content=[ResponseOutputText(annotations=[], text='{"results": [{"item_id": "1", "item_name": "Extra Large Meat Lovers Whole Pie", "restaurant_id": "1", "ingredients": ["pizza dough", "tomato sauce", "moz

In [37]:
claude_data = json.loads(claude_response.output_text)
print(type(claude_data))
print(claude_data["results"][1])  # individual item result

<class 'dict'>
{'item_id': '10', 'item_name': 'Jumbo Chicken Wings Five Pieces', 'restaurant_id': '1', 'ingredients': ['chicken wings', 'flour', 'hot sauce', 'butter', 'garlic powder', 'paprika'], 'reasoning': 'Chicken wings are typically breaded with flour and deep-fried, then tossed in a sauce. Buffalo-style wings commonly use hot sauce and butter as the base sauce, with garlic powder and paprika as common seasonings for flavor and color.'}
