In [None]:
from daft import col
from daft.functions import llm_generate, format

df_mini = df.limit(3)

df_mini_result = df_mini.with_column("result", llm_generate(
        df_mini["user"],
        model=model_id,
        provider="openai",
        extra_body={"guided_choice": ["A", "B", "C", "D"]},
        base_url=base_url,
        api_key=api_key
    )
).collect()
df_mini_result.show()

In [None]:
# Guided JSON
df_mini_result = df_mini.with_column("result", llm_generate(
        df_mini["user"],
        model=model_id,
        provider="openai",
        extra_body={"guided_json":{
            "type": "string",
            "enum": ["A", "B", "C", "D"]
        }},
        base_url=base_url,
        api_key=api_key
    )
).collect()
df_mini_result.select("question","choices_string","answer","image_png","result").show()

In [None]:
# Guided Choice
df_mini_result = df_mini.with_column("result", llm_generate(
        df_mini["user"],
        model=model_id,
        provider="openai",
        extra_body={"guided_choice": ["A", "B", "C", "D"]},
        base_url=base_url,
        api_key=api_key
    )
).collect()
df_mini_result.select("question","choices_string","answer","image_png","result").show()

In [None]:
# Guided Regex
df_mini_result = df_mini.with_column("result", llm_generate(
        df_mini["user"],
        model=model_id,
        provider="openai",
        extra_body={"guided_regex": r"^[A-D]$"},
        base_url=base_url,
        api_key=api_key
    )
).collect().show()
df_mini_result.select("question","choices_string","answer","image_png","result").show()

In [None]:
## Guided Grammar
df_mini_result = df_mini.with_column("result", llm_generate(
        df_mini["user"],
        model=model_id,
        provider="openai",
        extra_body={"guided_grammar": r"""
            root ::= choice
            choice ::= "A" | "B" | "C" | "D"
        """},
        base_url=base_url,
        api_key=api_key
    )
).collect()
df_mini_result.select("question","choices_string","answer","image_png","result").show()

In [None]:
# Pydantic Json Schema
from pydantic import BaseModel, Field

class Choices(BaseModel):
    answer: str = Field(description="Answer with the letter." , pattern=r"^[A-D]$")

df_mini_result = df_mini.with_column("result", llm_generate(
        df_mini["user"],
        model=model_id,
        provider="openai",
        response_format = {
            "type": "json_schema",
            "json_schema": {
                "name": "choices",
                "schema": Choices.model_json_schema(),
            },
        },
        base_url=base_url,
        api_key=api_key
    )
).collect()
df_mini_result.select("question","choices_string","answer","image_png","result").show()


## NOTE! With Pydantic we have to convert the result back to the pydantic model if we need to run our validations.
Its definitely best practice to immediately return inference results e so that we can vectorize the validation. (More efficient)

df_mini_result_validated = df_mini_result.with_column("pydantic_model_validated", df_mini_result["result"].apply(
    lambda x: Choices.model_validate_json(x),
    return_dtype= daft.DataType.python()
)).collect()

# Wahoo, a Validated Pydantic Model in Frame!
df_mini_result_validated.select("answer","pydantic_model_validated").show()