#### Recursive JSON Text splitter
https://docs.langchain.com/oss/python/integrations/splitters/recursive_json_splitter

In [2]:
import json

import requests

# This is a large nested json object and will be loaded as a python dict
json_data = requests.get("https://api.smith.langchain.com/openapi.json").json()

In [6]:
from langchain_text_splitters import RecursiveJsonSplitter

splitter = RecursiveJsonSplitter(max_chunk_size=300)

In [7]:
# To obtain json chunks, use the .split_json method:

# Recursively split json data - If you need to access/manipulate the smaller json chunks
json_chunks = splitter.split_json(json_data=json_data)

for chunk in json_chunks[:3]:
    print(chunk)

{'openapi': '3.1.0', 'info': {'title': 'LangSmith', 'description': 'The LangSmith API is used to programmatically create and manage LangSmith resources.\n\n## Host\nhttps://api.smith.langchain.com\n\n## Authentication\nTo authenticate with the LangSmith API, set the `X-Api-Key` header\nto a valid [LangSmith API key](https://docs.langchain.com/langsmith/create-account-api-key#create-an-api-key).\n\n'}}
{'info': {'version': '0.1.0'}, 'paths': {'/api/v1/audit-logs': {'get': {'tags': ['audit-logs'], 'summary': 'Get Audit Logs'}}}}
{'paths': {'/api/v1/audit-logs': {'get': {'description': "Retrieve audit log records for the authenticated user's organization.\n\nRequires both start_time and end_time parameters to filter logs within a date range.\nSupports cursor-based pagination."}}}}


In [8]:
# To obtain LangChain Document objects, use the .create_documents method:

# The splitter can also output documents
docs = splitter.create_documents(texts=[json_data])

for doc in docs[:3]:
    print(doc)

page_content='{"openapi": "3.1.0", "info": {"title": "LangSmith", "description": "The LangSmith API is used to programmatically create and manage LangSmith resources.\n\n## Host\nhttps://api.smith.langchain.com\n\n## Authentication\nTo authenticate with the LangSmith API, set the `X-Api-Key` header\nto a valid [LangSmith API key](https://docs.langchain.com/langsmith/create-account-api-key#create-an-api-key).\n\n"}}'
page_content='{"info": {"version": "0.1.0"}, "paths": {"/api/v1/audit-logs": {"get": {"tags": ["audit-logs"], "summary": "Get Audit Logs"}}}}'
page_content='{"paths": {"/api/v1/audit-logs": {"get": {"description": "Retrieve audit log records for the authenticated user's organization.\n\nRequires both start_time and end_time parameters to filter logs within a date range.\nSupports cursor-based pagination."}}}}'


In [10]:
# .split_text to obtain string content directly:

texts = splitter.split_text(json_data=json_data)

print(texts[0])
print(texts[1])

{"openapi": "3.1.0", "info": {"title": "LangSmith", "description": "The LangSmith API is used to programmatically create and manage LangSmith resources.\n\n## Host\nhttps://api.smith.langchain.com\n\n## Authentication\nTo authenticate with the LangSmith API, set the `X-Api-Key` header\nto a valid [LangSmith API key](https://docs.langchain.com/langsmith/create-account-api-key#create-an-api-key).\n\n"}}
{"info": {"version": "0.1.0"}, "paths": {"/api/v1/audit-logs": {"get": {"tags": ["audit-logs"], "summary": "Get Audit Logs"}}}}
