In [1]:
from langchain_community.document_loaders import JSONLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import re

# Function to remove non-ASCII characters
def remove_non_ascii(file_path, output_file):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        # Remove all non-ASCII characters
        cleaned_content = ''.join(char for char in content if ord(char) < 128)

        with open(output_file, 'w', encoding='utf-8') as output:
            output.write(cleaned_content)

        print(f"File cleaned successfully. Output saved to: {output_file}")
    except Exception as e:
        print(f"Error: {e}")

remove_non_ascii("WaapiSwagger.json", "WaapiSwaggerCleaned.json")

path_docs = JSONLoader(file_path="WaapiSwagger.json",jq_schema='.paths[] | to_entries[]',text_content=False).load()
#securitySchemes_docs = JSONLoader(file_path="WaapiSwagger.json",jq_schema='.components.securitySchemes[]',text_content=False).load()
#schema_docs = JSONLoader(file_path="WaapiSwagger.json",jq_schema='.components.schemas[]',text_content=False).load()
#docs=path_docs + securitySchemes_docs + schema_docs
docs=path_docs

File cleaned successfully. Output saved to: WaapiSwaggerCleaned.json


In [2]:
# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=docs,
    collection_name="WaapiSwagger3",
    embedding=OpenAIEmbeddings(),
)

In [4]:
vectorstore.similarity_search("sign_up", k=10)

[Document(metadata={'seq_num': 73, 'source': 'D:\\code\\python\\langchain_tutorial\\WaapiSwagger.json'}, page_content='{"key": "get", "value": {"operationId": "ContactsController_checkExists", "summary": "Check phone number is registered in WhatsApp.", "parameters": [{"name": "phone", "required": true, "in": "query", "description": "The phone number to check", "example": "1213213213", "schema": {"type": "string"}}, {"name": "session", "required": true, "in": "query", "schema": {"default": "default", "type": "string"}}], "responses": {"200": {"description": "", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/WANumberExistResult"}}}}}, "tags": ["\\ud83d\\udc64 Contacts"], "security": [{"api_key": []}]}}'),
 Document(metadata={'seq_num': 5, 'source': 'D:\\code\\python\\langchain_tutorial\\WaapiSwagger.json'}, page_content='{"key": "post", "value": {"operationId": "AuthController_saveCaptcha", "summary": "Enter captcha code.", "parameters": [{"name": "session", "r

In [65]:
from langchain.tools.retriever import create_retriever_tool
retriever = vectorstore.as_retriever()

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_api_docs",
    "Search and return the APIs endpoints that should be called based on input query and their corresponding parameters or request body",
)

tools = [retriever_tool]

In [66]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
import os

# Load environment variables
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.0, api_key=OPENAI_API_KEY)
llm_with_tools = llm.bind_tools(tools)
response=llm_with_tools.invoke("I want to sign up for this service. What APIs should I call?")
print(response)

content='' additional_kwargs={'tool_calls': [{'id': 'call_oikugPxRhYjhxUYWMUhWEgtL', 'function': {'arguments': '{"query":"sign up"}', 'name': 'retrieve_api_docs'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 86, 'total_tokens': 103, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None} id='run-32e048fd-9156-4175-880e-25dd33fc164b-0' tool_calls=[{'name': 'retrieve_api_docs', 'args': {'query': 'sign up'}, 'id': 'call_oikugPxRhYjhxUYWMUhWEgtL', 'type': 'tool_call'}] usage_metadata={'input_tokens': 86, 'output_tokens': 17, 'total_tokens': 103, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [67]:
from langgraph.prebuilt import ToolNode

tool_node = ToolNode(tools)

tool_node.invoke({"messages": [response]})


{'messages': [ToolMessage(content='{"key": "get", "value": {"operationId": "GroupsController_getParticipants", "summary": "Get participants", "parameters": [{"name": "session", "required": true, "in": "path", "schema": {"default": "default"}, "description": "Session name"}, {"name": "id", "required": true, "in": "path", "description": "Group ID", "example": "123123123@g.us", "schema": {"type": "string"}}], "responses": {"200": {"description": ""}}, "tags": ["\\ud83d\\udc65 Groups"], "security": [{"api_key": []}]}}\n\n{"key": "post", "value": {"operationId": "GroupsController_joinGroup", "summary": "Join group via code", "parameters": [{"name": "session", "required": true, "in": "path", "schema": {"default": "default"}, "description": "Session name"}], "requestBody": {"required": true, "content": {"application/json": {"schema": {"$ref": "#/components/schemas/JoinGroupRequest"}}}}, "responses": {"201": {"description": "", "content": {"application/json": {"schema": {"$ref": "#/components/