In [None]:
!python -m pip install -q openai

Mounted at /content/drive


In [1]:
import os
import pandas as pd
import sqlite3
import json
from openai import OpenAI

In [2]:
current_dir = os.getcwd()
db_path = os.path.join(current_dir, "data/mydb.sqlite")

In [3]:
conn = sqlite3.connect(db_path)
cur = conn.cursor()

In [4]:
cur.execute("PRAGMA table_info(restaurants);")
cur.fetchall()

[(0, 'id', 'INTEGER', 0, None, 0),
 (1, 'position', 'INTEGER', 0, None, 0),
 (2, 'name', 'TEXT', 0, None, 0),
 (3, 'score', 'REAL', 0, None, 0),
 (4, 'ratings', 'REAL', 0, None, 0),
 (5, 'category', 'TEXT', 0, None, 0),
 (6, 'price_range', 'TEXT', 0, None, 0),
 (7, 'full_address', 'TEXT', 0, None, 0),
 (8, 'zip_code', 'TEXT', 0, None, 0),
 (9, 'lat', 'REAL', 0, None, 0),
 (10, 'lng', 'REAL', 0, None, 0)]

In [5]:
cur.execute("SELECT * from restaurants limit 5");
cur.fetchall()

[(1,
  19,
  'PJ Fresh (224 Daniel Payne Drive)',
  None,
  None,
  'Burgers, American, Sandwiches',
  '$',
  '224 Daniel Payne Drive, Birmingham, AL, 35207',
  '35207',
  33.5623653,
  -86.8307025),
 (2,
  9,
  "J' ti`'z Smoothie-N-Coffee Bar",
  None,
  None,
  'Coffee and Tea, Breakfast and Brunch, Bubble Tea',
  None,
  '1521 Pinson Valley Parkway, Birmingham, AL, 35217',
  '35217',
  33.58364,
  -86.77333),
 (3,
  6,
  'Philly Fresh Cheesesteaks (541-B Graymont Ave)',
  None,
  None,
  'American, Cheesesteak, Sandwiches, Alcohol',
  '$',
  '541-B Graymont Ave, Birmingham, AL, 35204',
  '35204',
  33.5098,
  -86.85464),
 (4,
  17,
  "Papa Murphy's (1580 Montgomery Highway)",
  None,
  None,
  'Pizza',
  '$',
  '1580 Montgomery Highway, Hoover, AL, 35226',
  '35226',
  33.4044388,
  -86.8066142),
 (5,
  162,
  'Nelson Brothers Cafe (17th St N)',
  4.7,
  22.0,
  'Breakfast and Brunch, Burgers, Sandwiches',
  None,
  '314 17th St N, Birmingham, AL, 35203',
  '35203',
  33.51473,
  -8

In [7]:
menu_df = pd.read_csv("data/restaurant-menus.csv")
table = "menus"
menu_df.to_sql(table, conn, if_exists="replace", index=False, chunksize = 1000)

5117217

In [10]:
cur.execute("PRAGMA table_info(menus);")
cur.fetchall()

[(0, 'restaurant_id', 'INTEGER', 0, None, 0),
 (1, 'category', 'TEXT', 0, None, 0),
 (2, 'name', 'TEXT', 0, None, 0),
 (3, 'description', 'TEXT', 0, None, 0),
 (4, 'price', 'TEXT', 0, None, 0)]

In [11]:
cur.execute("SELECT * from menus limit 5");
cur.fetchall()

[(1,
  'Extra Large Pizza',
  'Extra Large Meat Lovers',
  'Whole pie.',
  '15.99 USD'),
 (1, 'Extra Large Pizza', 'Extra Large Supreme', 'Whole pie.', '15.99 USD'),
 (1, 'Extra Large Pizza', 'Extra Large Pepperoni', 'Whole pie.', '14.99 USD'),
 (1,
  'Extra Large Pizza',
  'Extra Large BBQ Chicken &amp; Bacon',
  'Whole Pie',
  '15.99 USD'),
 (1, 'Extra Large Pizza', 'Extra Large 5 Cheese', 'Whole pie.', '14.99 USD')]

In [12]:
conn.close()

In [13]:
client = OpenAI(api_key = "sk-7nZuV2mGMDXZRxETx6rk6w", base_url = "https://aiapi-prod.stanford.edu/v1");
menu_items = "1 item_id: 1; restaurant_id: 1; Extra Large Meat Lovers Whole Pie 2 item_id: 10; restaurant_id: 1; Jumbo Chicken Wings Five Pieces 3 item_id: 19; restaurant_id: 1; Bon Appetit Cheese and Berry Danish Single"
json_schema = {
    "format": {
        "type": "json_schema",
        "name": "results",
        "strict": True,
        "schema": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "additionalProperties": False,
                        "properties": {
                            "item_id": {"type": "string"},
                            "item_name": {"type": "string"},
                            "restaurant_id": {"type": "string"},
                            "ingredients": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "additionalProperties": False,
                                    "properties": {
                                        "name": {"type": "string"},
                                        "confidence": {
                                            "type": "number",
                                            "minimum": 0.0,
                                            "maximum": 1.0
                                        }
                                    },
                                    "required": ["name", "confidence"]
                                }
                            },
                            "reasoning": {"type": "string"}
                        },
                        "required": [
                            "item_id",
                            "item_name",
                            "restaurant_id",
                            "ingredients",
                            "reasoning"
                        ]
                    }
                }
            },
            "required": ["results"]
        }
    }
}


assistant_instruction = f'''
You are a food menu analysis assistant.
Task:
Decompose each restaurant menu item into its likely ingredients.

Input:
You will be given a list of menu items. Each item contains:
- item_id
- item_name
- restaurant_id
- optional description

Output requirements (STRICT):
- Output MUST be machine-readable.
- Output MUST follow the provided JSON schema exactly.
- Do NOT include extra commentary, explanations, or markdown.
- Do NOT invent fields that are not requested.
- Each ingredient must be a short noun phrase.
- Each ingredient name must be in singular form (e.g., "chicken wing" not "chicken wings").
- Ingredients must be ordered from most prominent to least prominent.
- Each ingredient must include a confidence score between 0 and 1.
- The confidence represents how likely the ingredient is to be present in the dish.

For each menu item, infer:
1) The most likely ingredients commonly used in this dish
2) A confidence score (0–1) for each ingredient
3) The reasoning explaining WHY those ingredients are present, based on:
   - culinary conventions
   - dish name semantics
   - regional cooking practices
   - typical restaurant preparation methods

Constraints:
- If an ingredient is uncertain, still include it but give it a lower confidence.
- Do NOT include cooking utensils or heat
- Avoid overly granular items (e.g., “Himalayan pink salt” → “salt”).
'''


response = client.responses.create(
    model = "gpt-4o",
    input = menu_items,
    instructions = assistant_instruction,
    text = json_schema
)

# print(response.output_text)

In [14]:
print(response)

Response(id='resp_SCsw_TD9LzJYNkTucsuIYh4RSseS9iN9bLx2VS3ZOkV0auzsJ_JP5msrB8H7yX7s6RPil0E0-zVyWnUhofWZAGKYhGpiBfqi71hc3gn8nz2zR50dftNY35MDFdYcPfhO3eV-nKQwiTDx2FJBANxtn1H56ika-ext_KW6PkhkEueba5ay2rQId0UBDchAQIyDAQliMQpxSnkv_VLKmaAX3BbJA4KClCavBVVLZdj7GLYnyKcfKSM46Uqm9RDy1Hp3S8g422VVokCaE0jm1-7OHu7Ti7yMeLKyuwHlQX92hH1oYF_I3uHR3D0zWsABunn2wTg4-nxPv3C8rW-QCbJVbVYcyHxJIERc7Hy9BUxKEf1GJqyBwbMqFCkYJQsuzvvxeBkol1UOAhcnnXlT1tsm6zEewRnepH8aTnIlY6L6b3LD-HTB7Sh2EObxFnIlLApLUJHcEwg8k7vZUi8XB7Rx59VUIs5ag-gXvEejDJCCSHyp2bsWLKtvTH1WuYOolRw8gh6jsgyKDH9fvHKsR7n5Sl_RGj6VMGssBsqa', created_at=1770019990.0, error=None, incomplete_details=None, instructions='\nYou are a food menu analysis assistant.\nTask:\nDecompose each restaurant menu item into its likely ingredients.\n\nInput:\nYou will be given a list of menu items. Each item contains:\n- item_id\n- item_name\n- restaurant_id\n- optional description\n\nOutput requirements (STRICT):\n- Output MUST be machine-readable.\n- Output MUST follow the provided 

In [15]:
import json

data = json.loads(response.output_text)
print(type(data))
print(data["results"][1])  # individual item result



<class 'dict'>
{'item_id': '10', 'item_name': 'Jumbo Chicken Wings Five Pieces', 'restaurant_id': '1', 'ingredients': [{'name': 'chicken wings', 'confidence': 0.9}, {'name': 'barbecue sauce', 'confidence': 0.8}, {'name': 'butter', 'confidence': 0.65}, {'name': 'spices', 'confidence': 0.7}], 'reasoning': 'This dish consists of chicken wings, potentially tossed in a barbecue or hot sauce concoction, prepared in a style common in American cuisine.'}
