From eb126574dfb0c80d60cc7f0029e4ac326329e2cf Mon Sep 17 00:00:00 2001 From: David Tam Date: Thu, 8 Aug 2024 13:58:35 -0700 Subject: [PATCH 1/6] add experimental helper to get us a much cleaner interface to openai json schema structured output --- guardrails/guard.py | 8 +- guardrails/utils/structured_data_utils.py | 68 ++++++++++++++ guardrails/utils/tools_utils.py | 27 ------ ...utils.py => test_structured_data_utils.py} | 92 ++++++++++++++++++- 4 files changed, 166 insertions(+), 29 deletions(-) create mode 100644 guardrails/utils/structured_data_utils.py delete mode 100644 guardrails/utils/tools_utils.py rename tests/unit_tests/utils/{test_tools_utils.py => test_structured_data_utils.py} (60%) diff --git a/guardrails/guard.py b/guardrails/guard.py index 75d1644b3..064f160fd 100644 --- a/guardrails/guard.py +++ b/guardrails/guard.py @@ -86,10 +86,12 @@ ValidatorMap, ) -from guardrails.utils.tools_utils import ( +from guardrails.utils.structured_data_utils import ( # Prevent duplicate declaration in the docs json_function_calling_tool as json_function_calling_tool_util, + output_format_json_schema as output_format_json_schema, ) +from guardrails.decorators.experimental import experimental from guardrails.settings import settings @@ -1331,6 +1333,10 @@ def to_dict(self) -> Dict[str, Any]: return i_guard.to_dict() + @experimental + def response_format_json_schema(self) -> Dict[str, Any]: + return output_format_json_schema(self._base_model) + def json_function_calling_tool( self, tools: Optional[list] = None, diff --git a/guardrails/utils/structured_data_utils.py b/guardrails/utils/structured_data_utils.py new file mode 100644 index 000000000..0f6384365 --- /dev/null +++ b/guardrails/utils/structured_data_utils.py @@ -0,0 +1,68 @@ +from typing import List, Optional + +from guardrails.classes.schema.processed_schema import ProcessedSchema + + +# takes processed schema and converts it to a openai tool object +def schema_to_tool(schema) -> dict: + tool = { + "type": "function", + "function": { + "name": "gd_response_tool", + "description": "A tool for generating responses to guardrails." + " It must be called last in every response.", + "parameters": schema, + "required": schema["required"] or [], + }, + } + return tool + + +def set_additional_properties_false_iteratively(schema): + stack = [schema] + while stack: + current = stack.pop() + if isinstance(current, dict): + if "properties" in current: + current["required"] = list( + current["properties"].keys() + ) # this has to be set + if "maximum" in current: + current.pop("maximum") # the api does not like these set + if "minimum" in current: + current.pop("minimum") # the api does not like these set + if "default" in current: + current.pop("default") # the api does not like these set + for prop in current.values(): + stack.append(prop) + elif isinstance(current, list): + for prop in current: + stack.append(prop) + if ( + isinstance(current, dict) + and "additionalProperties" not in current + and "type" in current + and current["type"] == "object" + ): + current["additionalProperties"] = False # the api needs these set + + +def json_function_calling_tool( + schema: ProcessedSchema, + tools: Optional[List] = None, +) -> List: + tools = tools or [] + tools.append(schema_to_tool(schema)) # type: ignore + return tools + + +def output_format_json_schema(schema: ProcessedSchema) -> dict: + print("====schema", schema) + schema = schema.model_json_schema() # this is a pydantic model + + set_additional_properties_false_iteratively(schema) + + return { + "type": "json_schema", + "json_schema": {"name": schema["title"], "schema": schema, "strict": True}, + } diff --git a/guardrails/utils/tools_utils.py b/guardrails/utils/tools_utils.py deleted file mode 100644 index da0386a8a..000000000 --- a/guardrails/utils/tools_utils.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import List, Optional - -from guardrails.classes.schema.processed_schema import ProcessedSchema - - -# takes processed schema and converts it to a openai tool object -def schema_to_tool(schema) -> dict: - tool = { - "type": "function", - "function": { - "name": "gd_response_tool", - "description": "A tool for generating responses to guardrails." - " It must be called last in every response.", - "parameters": schema, - "required": schema["required"] or [], - }, - } - return tool - - -def json_function_calling_tool( - schema: ProcessedSchema, - tools: Optional[List] = None, -) -> List: - tools = tools or [] - tools.append(schema_to_tool(schema)) # type: ignore - return tools diff --git a/tests/unit_tests/utils/test_tools_utils.py b/tests/unit_tests/utils/test_structured_data_utils.py similarity index 60% rename from tests/unit_tests/utils/test_tools_utils.py rename to tests/unit_tests/utils/test_structured_data_utils.py index 4bdd7fc38..3397e50c4 100644 --- a/tests/unit_tests/utils/test_tools_utils.py +++ b/tests/unit_tests/utils/test_structured_data_utils.py @@ -3,7 +3,11 @@ from guardrails.schema.pydantic_schema import pydantic_model_to_schema -from guardrails.utils.tools_utils import json_function_calling_tool, schema_to_tool +from guardrails.utils.structured_data_utils import ( + json_function_calling_tool, + schema_to_tool, + output_format_json_schema, +) class Delivery(BaseModel): @@ -141,3 +145,89 @@ def test_json_function_calling_tool(): }, } ] + + +def test_output_format_json_schema(): + schema = output_format_json_schema(Schedule) + assert schema == { + "type": "json_schema", + "json_schema": { + "name": "Schedule", + "schema": { + "additionalProperties": False, + "$defs": { + "Delivery": { + "additionalProperties": False, + "properties": { + "customer": { + "description": "customer name", + "title": "Customer", + "type": "string", + }, + "pickup_time": { + "description": "date and time of pickup", + "title": "Pickup Time", + "type": "string", + }, + "pickup_location": { + "description": "address of pickup", + "title": "Pickup Location", + "type": "string", + }, + "dropoff_time": { + "description": "date and time of dropoff", + "title": "Dropoff Time", + "type": "string", + }, + "dropoff_location": { + "description": "address of dropoff", + "title": "Dropoff Location", + "type": "string", + }, + "price": { + "description": "price of delivery with" + " currency symbol included", + "title": "Price", + "type": "string", + }, + "items": { + "description": "items for pickup/delivery typically" + " something a single person can carry on a bike", + "title": "Items", + "type": "string", + }, + "number_items": { + "description": "number of items", + "title": "Number Items", + "type": "integer", + }, + }, + "required": [ + "customer", + "pickup_time", + "pickup_location", + "dropoff_time", + "dropoff_location", + "price", + "items", + "number_items", + ], + "title": "Delivery", + "type": "object", + } + }, + "properties": { + "deliveries": { + "description": "deliveries for messenger", + "items": {"$ref": "#/$defs/Delivery"}, + "title": "Deliveries", + "type": "array", + } + }, + "required": ["deliveries"], + "title": "Schedule", + "type": "object", + }, + "strict": True, + }, + } From 6f6dddff4bb8b771b32103907343279bdc6fe080 Mon Sep 17 00:00:00 2001 From: David Tam Date: Thu, 8 Aug 2024 14:05:27 -0700 Subject: [PATCH 2/6] add some informative logs --- guardrails/utils/structured_data_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/guardrails/utils/structured_data_utils.py b/guardrails/utils/structured_data_utils.py index 0f6384365..80cd96961 100644 --- a/guardrails/utils/structured_data_utils.py +++ b/guardrails/utils/structured_data_utils.py @@ -1,5 +1,5 @@ from typing import List, Optional - +from guardrails.logger import logger from guardrails.classes.schema.processed_schema import ProcessedSchema @@ -28,10 +28,15 @@ def set_additional_properties_false_iteratively(schema): current["properties"].keys() ) # this has to be set if "maximum" in current: + logger.warn("Property maximum is not supported." " Dropping") current.pop("maximum") # the api does not like these set if "minimum" in current: + logger.warn("Property maximum is not supported." " Dropping") current.pop("minimum") # the api does not like these set if "default" in current: + logger.warn( + "Property default is not supported. " "Marking field Required" + ) current.pop("default") # the api does not like these set for prop in current.values(): stack.append(prop) @@ -57,7 +62,6 @@ def json_function_calling_tool( def output_format_json_schema(schema: ProcessedSchema) -> dict: - print("====schema", schema) schema = schema.model_json_schema() # this is a pydantic model set_additional_properties_false_iteratively(schema) From 649d2ad93c9a8455ee74d9519dee9f99d6c99077 Mon Sep 17 00:00:00 2001 From: David Tam Date: Thu, 8 Aug 2024 14:05:56 -0700 Subject: [PATCH 3/6] add some informative logs --- guardrails/utils/structured_data_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/guardrails/utils/structured_data_utils.py b/guardrails/utils/structured_data_utils.py index 80cd96961..244bbc1c2 100644 --- a/guardrails/utils/structured_data_utils.py +++ b/guardrails/utils/structured_data_utils.py @@ -34,9 +34,7 @@ def set_additional_properties_false_iteratively(schema): logger.warn("Property maximum is not supported." " Dropping") current.pop("minimum") # the api does not like these set if "default" in current: - logger.warn( - "Property default is not supported. " "Marking field Required" - ) + logger.warn("Property default is not supported. Marking field Required") current.pop("default") # the api does not like these set for prop in current.values(): stack.append(prop) From db6e0e8f7a3adf773ba72cc58231eab74a8a9488 Mon Sep 17 00:00:00 2001 From: David Tam Date: Thu, 8 Aug 2024 14:16:40 -0700 Subject: [PATCH 4/6] fix typings --- guardrails/guard.py | 2 +- guardrails/utils/structured_data_utils.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/guardrails/guard.py b/guardrails/guard.py index 064f160fd..0009b36ff 100644 --- a/guardrails/guard.py +++ b/guardrails/guard.py @@ -1335,7 +1335,7 @@ def to_dict(self) -> Dict[str, Any]: @experimental def response_format_json_schema(self) -> Dict[str, Any]: - return output_format_json_schema(self._base_model) + return output_format_json_schema(schema=self._base_model) def json_function_calling_tool( self, diff --git a/guardrails/utils/structured_data_utils.py b/guardrails/utils/structured_data_utils.py index 244bbc1c2..d63d0ee9a 100644 --- a/guardrails/utils/structured_data_utils.py +++ b/guardrails/utils/structured_data_utils.py @@ -1,6 +1,7 @@ from typing import List, Optional from guardrails.logger import logger from guardrails.classes.schema.processed_schema import ProcessedSchema +from guardrails.types.pydantic import ModelOrListOfModels # takes processed schema and converts it to a openai tool object @@ -59,12 +60,16 @@ def json_function_calling_tool( return tools -def output_format_json_schema(schema: ProcessedSchema) -> dict: - schema = schema.model_json_schema() # this is a pydantic model +def output_format_json_schema(schema: ModelOrListOfModels) -> dict: + parsed_schema = schema.model_json_schema() # type: ignore - set_additional_properties_false_iteratively(schema) + set_additional_properties_false_iteratively(parsed_schema) return { "type": "json_schema", - "json_schema": {"name": schema["title"], "schema": schema, "strict": True}, + "json_schema": { + "name": parsed_schema["title"], + "schema": parsed_schema, + "strict": True, + }, # type: ignore } From db2a3317ffdd88b7c027e1e2d71c03ac62b29dcf Mon Sep 17 00:00:00 2001 From: David Tam Date: Thu, 8 Aug 2024 14:32:34 -0700 Subject: [PATCH 5/6] actually check in the experimental decorator --- guardrails/decorators/experimental.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 guardrails/decorators/experimental.py diff --git a/guardrails/decorators/experimental.py b/guardrails/decorators/experimental.py new file mode 100644 index 000000000..440d27f9a --- /dev/null +++ b/guardrails/decorators/experimental.py @@ -0,0 +1,15 @@ +import functools +from guardrails.logger import logger + + +def experimental(func): + """Decorator to mark a function as experimental.""" + + @functools.wraps(func) + def wrapper(*args, **kwargs): + logger.warn( + f"The function '{func.__name__}' is experimental and subject to change." + ) + return func(*args, **kwargs) + + return wrapper From 5c149d63ef5a7ec8cb69d859920b33392a9028d1 Mon Sep 17 00:00:00 2001 From: David Tam Date: Thu, 8 Aug 2024 14:34:37 -0700 Subject: [PATCH 6/6] one last typing fix --- guardrails/guard.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/guardrails/guard.py b/guardrails/guard.py index 0009b36ff..1560ba2e1 100644 --- a/guardrails/guard.py +++ b/guardrails/guard.py @@ -92,6 +92,7 @@ output_format_json_schema as output_format_json_schema, ) from guardrails.decorators.experimental import experimental + from guardrails.settings import settings @@ -1335,7 +1336,7 @@ def to_dict(self) -> Dict[str, Any]: @experimental def response_format_json_schema(self) -> Dict[str, Any]: - return output_format_json_schema(schema=self._base_model) + return output_format_json_schema(schema=self._base_model) # type: ignore def json_function_calling_tool( self,