# Stream-validate LLM responses


In [1]:
# Few imports and global variables
from rich import print
import guardrails as gd
import openai
from IPython.display import clear_output
import time

### Setup

Install the necessary validators from Guardrails hub in your CLI.

```bash
!guardrails hub install hub://guardrails/valid_range
!guardrails hub install hub://guardrails/uppercase
!guardrails hub install hub://guardrails/lowercase
!guardrails hub install hub://guardrails/one_line
```

### 1. For structured JSON output


#### Define the prompt and output schema


In [2]:
from pydantic import BaseModel, Field
from guardrails.hub import LowerCase, UpperCase, ValidRange, OneLine
from typing import List

prompt = """
Given the following doctor's notes about a patient, please extract a dictionary that contains the patient's information.

${doctors_notes}

${gr.complete_json_suffix_v2}
"""

doctors_notes = """152 y/o female with chronic macular rash to face and hair, worse in beard, eyebrows and nares.
The rash is itchy, flaky and slightly scaly. Moderate response to OTC steroid cream. Patient has been using cream for 2 weeks and also suffers from diabetes."""


class Symptom(BaseModel):
    symptom: str = Field(description="Symptom that a patient is experiencing")
    affected_area: str = Field(
        description="What part of the body the symptom is affecting",
        validators=[
            LowerCase(on_fail="fix"),
        ],
    )


class Medication(BaseModel):
    medication: str = Field(
        description="Name of the medication the patient is taking",
        validators=[UpperCase(on_fail="fix")],
    )
    response: str = Field(description="How the patient is responding to the medication")


class PatientInfo(BaseModel):
    gender: str = Field(description="Patient's gender")
    age: int = Field(
        description="Patient's age",
        validators=[ValidRange(min=0, max=100, on_fail="fix")],
    )
    symptoms: List[Symptom] = Field(
        description="Symptoms that the patient is currently experiencing. Each symptom should be classified into  separate item in the list."
    )
    current_meds: List[Medication] = Field(
        description="Medications the patient is currently taking and their response"
    )
    miscellaneous: str = Field(
        description="Any other information that is relevant to the patient's health; something that doesn't fit into the other categories.",
        validators=[LowerCase(on_fail="fix"), OneLine(on_fail="fix")],
    )

    Importing validators from `guardrails.validators` is deprecated.
    All validators are now available in the Guardrails Hub. Please install
    and import them from the hub instead. All validators will be
    removed from this module in the next major release.

    Install with: `guardrails hub install hub://<namespace>/<validator_name>`
    Import as: from guardrails.hub import `ValidatorName`
    
  warn(

`from guardrails.validators import LowerCase` is deprecated and
support will be removed after version 0.5.x. Please switch to the Guardrails Hub syntax:
`from guardrails.hub import LowerCase` for future updates and support.
For additional details, please visit: https://hub.guardrailsai.com/validator/guardrails/lowercase.

  warn(

`from guardrails.validators import UpperCase` is deprecated and
support will be removed after version 0.5.x. Please switch to the Guardrails Hub syntax:
`from guardrails.hub import UpperCase` for future updates and support.
For additional details, ple

#### Create the Guard object


In [3]:
guard = gd.Guard.from_pydantic(output_class=PatientInfo, prompt=prompt)

##### Example 1: No streaming

By default, the `stream` parameter is set to `False`


In [4]:
# Wrap the OpenAI API call with the `guard` object
raw_llm_output, validated_output, *rest = guard(
    openai.chat.completions.create,
    prompt_params={"doctors_notes": doctors_notes},
    max_tokens=1024,
    temperature=0.3,
)

# Print the validated output from the LLM
print(validated_output)

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [5]:
# Let's see the logs
print(guard.history.last.tree)

##### Example 2: Streaming

Set the `stream` parameter to `True`


In [6]:
# Wrap the OpenAI API call with the `guard` object
fragment_generator = guard(
    openai.chat.completions.create,
    prompt_params={"doctors_notes": doctors_notes},
    max_tokens=1024,
    temperature=0,
    stream=True,
)


for op in fragment_generator:
    clear_output(wait=True)
    print(op)
    time.sleep(0.5)

In [7]:
# Let's see the logs
print(guard.history.last.tree)

As you can see here, the outputs in both examples match. The only difference is that, in the streaming example, the outputs are returned as soon as they are received and validated by Guardrails. In the non-streaming example, the outputs are returned only after the entire request has been processed by the API. In other words, when streaming is enabled, the API returns the outputs as soon as they are ready, rather than waiting for the entire request to be processed.


### 2. For unstructured text output


#### Define the prompt and Guard object with validators


In [8]:
from guardrails.hub import UpperCase, OneLine

prompt = """
Generate a short description of large language models. Each new sentence should be on another line.
"""

guard = gd.Guard.from_string(
    validators=[
        UpperCase(on_fail="fix"),
        OneLine(on_fail="fix"),
    ],
    description="testmeout",
    prompt=prompt,
)

`from guardrails.validators import UpperCase` is deprecated and
support will be removed after version 0.5.x. Please switch to the Guardrails Hub syntax:
`from guardrails.hub import UpperCase` for future updates and support.
For additional details, please visit: https://hub.guardrailsai.com/validator/guardrails/uppercase.

  warn(

`from guardrails.validators import OneLine` is deprecated and
support will be removed after version 0.5.x. Please switch to the Guardrails Hub syntax:
`from guardrails.hub import OneLine` for future updates and support.
For additional details, please visit: https://hub.guardrailsai.com/validator/guardrails/one_line.

  warn(



#### Example 1: No streaming

By default, the `stream` parameter is set to `False`


In [9]:
# Wrap the OpenAI API call with the `guard` object
raw, validated, *rest = guard(
    openai.chat.completions.create,
    max_tokens=50,
    temperature=0.1,
)

# Print the raw and validated outputs
print(f"Raw output:\n{raw}")
print(f"Validated output:\n{validated}")

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


#### Example 2: With streaming

Set the `stream` parameter to `True`


In [10]:
# Wrap the OpenAI API call with the `guard` object
fragment_generator = guard(
    openai.chat.completions.create,
    max_tokens=50,
    temperature=0.1,
    stream=True,
)


for op in fragment_generator:
    clear_output(wait=True)
    print(op)
    time.sleep(0.1)

In [11]:
# See guard history
print(guard.history.last.tree)

As you can see, the outputs in both examples match. The only difference is that, in the streaming example, the outputs are returned as soon as they are received and validated by Guardrails. In the non-streaming example, the outputs are returned only after the entire request has been processed by the API. In other words, when streaming is enabled, the API returns the outputs as soon as they are ready, rather than waiting for the entire request to be processed.

This also works with openai's chat completion API.
