In [17]:
import pandas as pd

order = {
    "items": [],
    "cost": 0.0
}
item = {
    "name": "",
    "size": "",
    "quantity": 1,
    "modifiers": []
}

df = pd.read_csv('train.csv')
df['words'] = df.iloc[:, 1].str.split()

print(df['words'])

0      [Lemme, get, one, tall, Strawberry, Smoothie, ...
1      [I'd, like, to, order, single, trenta, espress...
2      [Could, I, have, single, trenta, mocha, plus, ...
3                     [Grab, me, four, avocado, toasts.]
4      [I'm, craving, couple, of, VENTI, frappe, (moc...
                             ...                        
495    [May, I, get, two, Tall, iced, coffees, includ...
496    [I'm, feeling, like, four, trenta, flat, white...
497    [Start, me, off, with, couple, of, tall, Flat,...
498    [Grab, me, two, short, like, Cold, Brews..., w...
499    [I'm, feeling, like, a, Blueberry, Muffin..., ...
Name: words, Length: 500, dtype: object


In [5]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyA_e0KQ-mTBsJ73_qCuLnuXvpVuv7V-C5k")

model = genai.GenerativeModel('gemini-2.5-flash-lite')
response = model.generate_content("Say hello")
print(response.text)

Hello! How can I help you today?


# Barista Bench: LLM-based Order Parser
### Uses Google Gemini to parse natural language coffee orders.

In [18]:
import os
import json
import time
import pandas as pd
import google.generativeai as genai
from tqdm import tqdm

### 1. Load Data

In [6]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)

Train shape: (500, 3)
Test shape: (3500, 2)

Train columns: ['id', 'order', 'expected_json']
Test columns: ['id', 'order']


### 2. Analyze Tracking Data To Build Menu And Pricing

In [12]:
all_items = []
for _, row in train_df.iterrows():
    try:
        data = json.loads(row['expected_json'])
        for item in data.get('items', []):
            all_items.append(item)
    except:
        continue

items_df = pd.json_normalize(all_items)
print(f"Total item entries in training: {len(items_df)}")
print(f"Unique item names: {sorted(items_df['name'].unique())}")
print(f"Unique sizes: {sorted(items_df['size'].dropna().unique())}")

all_modifiers = set()
for mods in items_df['modifiers']:
    if isinstance(mods, list):
        all_modifiers.update(mods)
print(f"Unique modifiers: {sorted(all_modifiers)}")

Total item entries in training: 1144
Unique item names: ['Americano', 'Avocado Toast', 'Bacon Gouda Sandwich', 'Bagel', 'Blueberry Muffin', 'Butter Croissant', 'Cappuccino', 'Caramel Macchiato', 'Chai Latte', 'Cold Brew', 'Drip Coffee', 'Earl Grey Tea', 'Espresso', 'Flat White', 'Frappe (Coffee)', 'Frappe (Mocha)', 'Green Tea', 'Hot Chocolate', 'Iced Coffee', 'Latte', 'Matcha Latte', 'Mocha', 'Strawberry Smoothie']
Unique sizes: ['Grande', 'Short', 'Tall', 'Trenta', 'Venti']
Unique modifiers: ['Almond Milk', 'Breve', 'Caramel Drizzle', 'Caramel Syrup', 'Classic Syrup', 'Coconut Milk', 'Cold Foam', 'Extra Hot', 'Extra Shot', 'Hazelnut Syrup', 'Light Ice', 'No Ice', 'No Whip', 'Oat Milk', 'Peppermint Syrup', 'Skim Milk', 'Soy Milk', 'Sugar Free Vanilla', 'Vanilla Syrup', 'Whip Cream']


### 3. Build Price Lookup From Training Data

In [14]:
price_data = []
for _, row in train_df.iterrows():
    try:
        data = json.loads(row['expected_json'])
        items = data.get('items', [])
        total = data.get('total_price', 0)

        if len(items) == 1 and items[0]['quantity'] == 1:
            item = items[0]
            price_data.append({
                'name': item['name'],
                'size': item.get('size'),
                'modifiers': tuple(sorted(item.get('modifiers', []))),
                'n_modifiers': len(item.get('modifiers', [])),
                'price': total
            })
    except:
        continue

price_df = pd.DataFrame(price_data)

# Base prices (no modifiers)
base_prices = price_df[price_df['n_modifiers'] == 0].groupby(['name', 'size'])['price'].median().to_dict()

# Modifier costs (relaxed threshold)
with_mod = price_df[price_df['n_modifiers'] == 1]
modifier_costs = {}
for _, row in with_mod.iterrows():
    base_key = (row['name'], row['size'])
    if base_key in base_prices:
        mod_cost = row['price'] - base_prices[base_key]
        mod_name = row['modifiers'][0] if row['modifiers'] else None
        if mod_name:
            modifier_costs.setdefault(mod_name, []).append(mod_cost)

avg_modifier_costs = {k: round(np.median(v), 2) for k, v in modifier_costs.items() if len(v) >= 1}

# Also infer prices from multi-item orders to fill gaps
for _, row in train_df.iterrows():
    try:
        data = json.loads(row['expected_json'])
        items = data.get('items', [])
        if len(items) >= 2:
            known_total = 0
            unknown = []
            for item in items:
                key = (item['name'], item.get('size'))
                n_mods = len(item.get('modifiers', []))
                mod_add = sum(avg_modifier_costs.get(m, 0.50) for m in item.get('modifiers', []))
                if key in base_prices:
                    known_total += (base_prices[key] + mod_add) * item['quantity']
                else:
                    unknown.append(item)
            if len(unknown) == 1:
                item = unknown[0]
                mod_add = sum(avg_modifier_costs.get(m, 0.50) for m in item.get('modifiers', []))
                inferred = (data['total_price'] - known_total) / item['quantity'] - mod_add
                key = (item['name'], item.get('size'))
                if 0.50 < inferred < 15.0:
                    if key not in base_prices:
                        base_prices[key] = round(inferred, 2)
    except:
        continue

print(f"Base prices found: {len(base_prices)}")
for k, v in sorted(base_prices.items()):
    print(f"  {k}: ${v:.2f}")
print(f"\nModifier costs found: {len(avg_modifier_costs)}")
for k, v in sorted(avg_modifier_costs.items()):
    print(f"  {k}: ${v:.2f}")

Base prices found: 93
  ('Americano', 'Grande'): $4.60
  ('Americano', 'Short'): $4.20
  ('Americano', 'Tall'): $4.00
  ('Americano', 'Trenta'): $1.00
  ('Americano', 'Venti'): $4.50
  ('Avocado Toast', None): $7.00
  ('Bacon Gouda Sandwich', None): $10.50
  ('Bagel', None): $2.50
  ('Blueberry Muffin', None): $7.75
  ('Butter Croissant', None): $3.40
  ('Cappuccino', 'Grande'): $7.25
  ('Cappuccino', 'Short'): $3.43
  ('Cappuccino', 'Tall'): $4.60
  ('Cappuccino', 'Trenta'): $6.00
  ('Cappuccino', 'Venti'): $5.50
  ('Caramel Macchiato', 'Grande'): $5.75
  ('Caramel Macchiato', 'Short'): $0.55
  ('Caramel Macchiato', 'Tall'): $5.25
  ('Caramel Macchiato', 'Trenta'): $7.25
  ('Caramel Macchiato', 'Venti'): $6.25
  ('Chai Latte', 'Grande'): $5.25
  ('Chai Latte', 'Short'): $4.25
  ('Chai Latte', 'Tall'): $4.78
  ('Chai Latte', 'Trenta'): $6.25
  ('Chai Latte', 'Venti'): $5.75
  ('Cold Brew', 'Grande'): $4.75
  ('Cold Brew', 'Short'): $4.28
  ('Cold Brew', 'Tall'): $4.25
  ('Cold Brew', '

### 4. Build System Prompt With Menu Knowledge

In [15]:
menu_str = "BASE PRICES (name, size): price\n"
for (name, size), price in sorted(base_prices.items()):
    menu_str += f"  {name}, {size}: ${price:.2f}\n"

modifier_str = "MODIFIER COSTS:\n"
for mod, cost in sorted(avg_modifier_costs.items()):
    modifier_str += f"  {mod}: ${cost:.2f}\n"

# Few-shot examples from training
example_indices = train_df.sample(n=min(10, len(train_df)), random_state=42).index
examples_str = ""
text_col = [c for c in train_df.columns if c not in ('id', 'expected_json')][0]
for i, idx in enumerate(example_indices):
    row = train_df.loc[idx]
    examples_str += f"\nExample {i+1}:\nOrder: {row[text_col]}\nOutput: {row['expected_json']}\n"

SYSTEM_PROMPT = f"""You are a barista order parser. Given a natural language coffee shop order, extract the structured order as JSON.

RULES:
- Parse each item with: name, size (null for food items), quantity, modifiers (list, empty if none)
- Handle corrections: "actually", "no make it", "cancel that", "remove that", "nevermind" mean to undo the previous modifier or item
- Ignore filler words: "literally", "like", "um", "actually" (when not used for corrections)
- Sizes: Short, Tall, Grande, Venti, Trenta (normalize to title case)
- Quantity words: single/a/one=1, pair/couple/double/two=2, triple/three=3, four=4, five=5
- Calculate total_price based on the menu prices below
- Food items (Toast, Bagel, Croissant, Sandwich, Muffin, Cookie, Cake Pop, etc.) have size: null
- Modifier names should be in Title Case

{menu_str}
{modifier_str}

IMPORTANT: Respond with ONLY valid JSON. No markdown, no explanation, no code fences. Format:
{{"items": [{{"name": "...", "size": "..." or null, "quantity": N, "modifiers": [...]}}], "total_price": X.XX}}

{examples_str}"""

### 5. Parse Test Orders Using Gemini

In [20]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyA_e0KQ-mTBsJ73_qCuLnuXvpVuv7V-C5k")  # your actual key

MAX_WORKERS = 10
MAX_RETRIES = 3
RATE_LIMIT_DELAY = 0.2

thread_local = threading.local()
rate_lock = threading.Lock()
last_request_time = [0.0]

def get_model():
    if not hasattr(thread_local, 'model'):
        thread_local.model = genai.GenerativeModel(
            'gemini-2.5-flash-lite',          # updated model
            system_instruction=SYSTEM_PROMPT,
            generation_config=genai.GenerationConfig(
                temperature=0.0,
                max_output_tokens=1024,
            )
        )
    return thread_local.model

def rate_limited_wait():
    """Ensure minimum delay between API calls globally."""
    with rate_lock:
        now = time.time()
        wait = RATE_LIMIT_DELAY - (now - last_request_time[0])
        if wait > 0:
            time.sleep(wait)
        last_request_time[0] = time.time()

def parse_order(order_text):
    model = get_model()
    for attempt in range(MAX_RETRIES):
        try:
            rate_limited_wait()
            response = model.generate_content(f"Parse this order:\n{order_text}")
            result_text = response.text.strip()
            result_text = result_text.replace("```json", "").replace("```", "").strip()

            parsed = json.loads(result_text)
            if 'items' not in parsed or 'total_price' not in parsed:
                raise ValueError("Missing required keys")
            return parsed

        except (json.JSONDecodeError, ValueError):
            if attempt < MAX_RETRIES - 1:
                time.sleep(2)
            continue
        except Exception as e:
            err_str = str(e).lower()
            if 'rate' in err_str or '429' in err_str:
                time.sleep(10 * (attempt + 1))  # back off hard on rate limits
            elif attempt < MAX_RETRIES - 1:
                time.sleep(3 ** (attempt + 1))
            continue

    return {"items": [], "total_price": 0.0}

def parse_order_with_index(args):
    idx, order_text = args
    parsed = parse_order(order_text)
    return idx, json.dumps(parsed)

print(f"\nParsing {len(test_df)} test orders with {MAX_WORKERS} workers...")

tasks = [(idx, row[text_col]) for idx, row in test_df.iterrows()]
results_dict = {}
failed = 0
start_time = time.time()

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = {executor.submit(parse_order_with_index, task): task[0] for task in tasks}

    with tqdm(total=len(tasks), desc="Parsing orders", unit="order",
              bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]') as pbar:
        for future in as_completed(futures):
            idx, result_json = future.result()
            results_dict[idx] = result_json

            parsed = json.loads(result_json)
            if len(parsed.get('items', [])) == 0:
                failed += 1

            pbar.set_postfix(failed=failed)
            pbar.update(1)

results = [results_dict[idx] for idx in test_df.index]
elapsed = time.time() - start_time
print(f"\nDone! {len(results)} orders in {elapsed:.1f}s ({failed} failed)")


Parsing 3500 test orders with 10 workers...


Parsing orders: 100%|██████████| 3500/3500 [5:51:38<00:00,  6.03s/order]  


Done! 3500 orders in 21098.1s (3498 failed)





### 6. Generate Submission

In [21]:
submission = pd.DataFrame({
    'id': test_df['id'],
    'predicted_json': results
})

submission.to_csv('submission.csv', index=False)
print(f"\nSubmission saved! Shape: {submission.shape}")
print("\nSample predictions:")
for i in range(min(5, len(submission))):
    print(f"\n  ID {submission.iloc[i]['id']}:")
    print(f"  {submission.iloc[i]['predicted_json']}")


Submission saved! Shape: (3500, 2)

Sample predictions:

  ID 500:
  {"items": [], "total_price": 0.0}

  ID 501:
  {"items": [], "total_price": 0.0}

  ID 502:
  {"items": [], "total_price": 0.0}

  ID 503:
  {"items": [], "total_price": 0.0}

  ID 504:
  {"items": [], "total_price": 0.0}
