From 1d8f130df2ac558001f217a8bc29daaf9380acd4 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Mon, 11 Mar 2024 19:51:35 -0700
Subject: [PATCH] core[patch]: support labeled json schema as tools (#18935)

---
 .../langchain_core/utils/function_calling.py  | 29 +++++++++++++++----
 .../unit_tests/utils/test_function_calling.py | 27 +++++++++++++++--
 .../langchain_openai/chat_models/base.py      |  3 +-
 3 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py
index a4449b655b84525..a0cadca4f330b0e 100644
--- a/libs/core/langchain_core/utils/function_calling.py
+++ b/libs/core/langchain_core/utils/function_calling.py
@@ -270,7 +270,8 @@ def convert_to_openai_function(
     Args:
         function: Either a dictionary, a pydantic.BaseModel class, or a Python function.
             If a dictionary is passed in, it is assumed to already be a valid OpenAI
-            function.
+            function or a JSON schema with top-level 'title' and 'description' keys
+            specified.
 
     Returns:
         A dict version of the passed in function which is compatible with the
@@ -278,8 +279,21 @@ def convert_to_openai_function(
     """
     from langchain_core.tools import BaseTool
 
-    if isinstance(function, dict):
+    # already in OpenAI function format
+    if isinstance(function, dict) and all(
+        k in function for k in ("name", "description", "parameters")
+    ):
         return function
+    # a JSON schema with title and description
+    elif isinstance(function, dict) and all(
+        k in function for k in ("title", "description", "properties")
+    ):
+        function = function.copy()
+        return {
+            "name": function.pop("title"),
+            "description": function.pop("description"),
+            "parameters": function,
+        }
     elif isinstance(function, type) and issubclass(function, BaseModel):
         return cast(Dict, convert_pydantic_to_openai_function(function))
     elif isinstance(function, BaseTool):
@@ -288,8 +302,10 @@ def convert_to_openai_function(
         return convert_python_function_to_openai_function(function)
     else:
         raise ValueError(
-            f"Unsupported function type {type(function)}. Functions must be passed in"
-            f" as Dict, pydantic.BaseModel, or Callable."
+            f"Unsupported function\n\n{function}\n\nFunctions must be passed in"
+            " as Dict, pydantic.BaseModel, or Callable. If they're a dict they must"
+            " either be in OpenAI function format or valid JSON schema with top-level"
+            " 'title' and 'description' keys."
         )
 
 
@@ -301,13 +317,14 @@ def convert_to_openai_tool(
     Args:
         tool: Either a dictionary, a pydantic.BaseModel class, Python function, or
             BaseTool. If a dictionary is passed in, it is assumed to already be a valid
-            OpenAI tool or OpenAI function.
+            OpenAI tool, OpenAI function, or a JSON schema with top-level 'title' and
+            'description' keys specified.
 
     Returns:
         A dict version of the passed in tool which is compatible with the
             OpenAI tool-calling API.
     """
-    if isinstance(tool, dict) and "type" in tool:
+    if isinstance(tool, dict) and tool.get("type") == "function" and "function" in tool:
         return tool
     function = convert_to_openai_function(tool)
     return {"type": "function", "function": function}
diff --git a/libs/core/tests/unit_tests/utils/test_function_calling.py b/libs/core/tests/unit_tests/utils/test_function_calling.py
index bd03abe27579c17..629cf769c558739 100644
--- a/libs/core/tests/unit_tests/utils/test_function_calling.py
+++ b/libs/core/tests/unit_tests/utils/test_function_calling.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Literal, Optional, Type
+from typing import Any, Callable, Dict, List, Literal, Optional, Type
 
 import pytest
 
@@ -49,8 +49,29 @@ def _run(self, *args: Any, **kwargs: Any) -> Any:
     return DummyFunction()
 
 
+@pytest.fixture()
+def json_schema() -> Dict:
+    return {
+        "title": "dummy_function",
+        "description": "dummy function",
+        "type": "object",
+        "properties": {
+            "arg1": {"description": "foo", "type": "integer"},
+            "arg2": {
+                "description": "one of 'bar', 'baz'",
+                "enum": ["bar", "baz"],
+                "type": "string",
+            },
+        },
+        "required": ["arg1", "arg2"],
+    }
+
+
 def test_convert_to_openai_function(
-    pydantic: Type[BaseModel], function: Callable, dummy_tool: BaseTool
+    pydantic: Type[BaseModel],
+    function: Callable,
+    dummy_tool: BaseTool,
+    json_schema: Dict,
 ) -> None:
     expected = {
         "name": "dummy_function",
@@ -69,7 +90,7 @@ def test_convert_to_openai_function(
         },
     }
 
-    for fn in (pydantic, function, dummy_tool, expected):
+    for fn in (pydantic, function, dummy_tool, json_schema, expected):
         actual = convert_to_openai_function(fn)  # type: ignore
         assert actual == expected
 
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index a777c424130b480..46f0abc7e5f2773 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -799,7 +799,8 @@ def with_structured_output(
                 the model output will be a dict. With a Pydantic class the returned
                 attributes will be validated, whereas with a dict they will not be. If
                 `method` is "function_calling" and `schema` is a dict, then the dict
-                must match the OpenAI function-calling spec.
+                must match the OpenAI function-calling spec or be a valid JSON schema
+                with top level 'title' and 'description' keys specified.
             method: The method for steering model generation, either "function_calling"
                 or "json_mode". If "function_calling" then the schema will be converted
                 to an OpenAI function and the returned model will make use of the