---
title: "Structured outputs: putting the cart before the horse"
date: "11/08/2024"
date-modified: last-modified
description-meta: "A common error I've seen people do when using LLMs to generate structured outputs and how to avoid it."
toc: true
toc-depth: 3
lightbox: true
fig-cap-location: margin
categories:
  - llm
  - openai
  - pydantic
  - python
author:
  - name: Dylan Castillo
    url: https://dylancastillo.co
    affiliation: Iwana Labs
    affiliation-url: https://iwanalabs.com
citation: true
comments:
  utterances:
    repo: dylanjcastillo/blog_comments
    theme: dark-blue
    issue-term: pathname
---

In [1]:
# | output: false
# | echo: false

import nest_asyncio

nest_asyncio.apply()

In [2]:
from pathlib import Path

import pandas as pd

from dotenv import load_dotenv
from openai import AsyncOpenAI

load_dotenv()

client = AsyncOpenAI()

In [3]:
data_dir = Path().absolute().parent / "data" / "live_bench"
reasoning_dir = data_dir / "reasoning"
live_bench_json = reasoning_dir / "question.jsonl"

df = (
    pd.read_json(live_bench_json, lines=True)
    .query("livebench_release_date == '2024-07-26'")
    .assign(
        turns_str=lambda x: x.turns.str[0], 
        expects_integer=lambda x: x.turns.str[0].str.contains("integer", case=False)
    )
)

In [4]:
from pydantic import BaseModel

class ResponseFormatA(BaseModel):
    reasoning: str
    answer: int

class ResponseFormatB(BaseModel):
    answer: int
    reasoning: str

In [5]:
response_formatting_prompt = """You're a helpful assistant. You will help me answer a question.\n{specific_instructions}\nYou will always using this JSON schema: \n{response_format}."""


In [6]:
def create_system_prompt(expects_integer, response_format):
    if expects_integer:
        specific_instructions = "Provide your answer as a single integer."
    else:
        specific_instructions = "Provide your answer as a a bolded string (**)."
    return response_formatting_prompt.format(specific_instructions=specific_instructions, response_format=response_format)


In [None]:
import json
import asyncio
from asyncio import Semaphore

client = AsyncOpenAI()

async def process_row(row, response_format, semaphore):
    async with semaphore:
        response = await client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": create_system_prompt(row.expects_integer, response_format)},
                {"role": "user", "content": f"Question:\n{row.turns_str}"}
            ],
            response_format={"type": "json_object"}
        )
        response_data = json.loads(response.choices[0].message.content)
        return response_data

async def main(response_format):
    semaphore = Semaphore(30)
    tasks = [process_row(row, response_format, semaphore) for _, row in df.iterrows()]
    responses = await asyncio.gather(*tasks)

    return responses

response_format = ResponseFormatA.model_json_schema()
responses = asyncio.run(main(response_format))
responses

In [13]:
def extract_answer(expects_integer, answer):
    if expects_integer:
        return str(answer)
    else:
        return str(answer).strip("**")

df["raw_answer"] = [r["answer"] for r in responses]
df["response"] = df.apply(lambda row: extract_answer(row.expects_integer, row.raw_answer), axis=1)


In [None]:
df["response"] == df["ground_truth"]