------------------------
#### reading from JSON
-------------------------

In [1]:
import os
import openai
from IPython.display import Markdown, display

In [2]:
# some sample data
json_value = {
    "blogPosts": [
        {
            "id": 1,
            "title": "First blog post",
            "content": "This is my first blog post",
        },
        {
            "id": 2,
            "title": "Second blog post",
            "content": "This is my second blog post",
        },
    ],
    "comments": [
        {
            "id": 1,
            "content": "Nice post!",
            "username": "jerry",
            "blogPostId": 1,
        },
        {
            "id": 2,
            "content": "Interesting thoughts",
            "username": "simon",
            "blogPostId": 2,
        },
        {
            "id": 3,
            "content": "Loved reading this!",
            "username": "simon",
            "blogPostId": 2,
        },
    ],
}

In [3]:
# JSON Schema object that the above JSON value conforms to
json_schema = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "description": "Schema for a very simple blog post app",
    "type": "object",
    "properties": {
        "blogPosts": {
            "description": "List of blog posts",
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "id": {
                        "description": "Unique identifier for the blog post",
                        "type": "integer",
                    },
                    "title": {
                        "description": "Title of the blog post",
                        "type": "string",
                    },
                    "content": {
                        "description": "Content of the blog post",
                        "type": "string",
                    },
                },
                "required": ["id", "title", "content"],
            },
        },
        "comments": {
            "description": "List of comments on blog posts",
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "id": {
                        "description": "Unique identifier for the comment",
                        "type": "integer",
                    },
                    "content": {
                        "description": "Content of the comment",
                        "type": "string",
                    },
                    "username": {
                        "description": (
                            "Username of the commenter (lowercased)"
                        ),
                        "type": "string",
                    },
                    "blogPostId": {
                        "description": (
                            "Identifier for the blog post to which the comment"
                            " belongs"
                        ),
                        "type": "integer",
                    },
                },
                "required": ["id", "content", "username", "blogPostId"],
            },
        },
    },
    "required": ["blogPosts", "comments"],
}

In [4]:
#pip install jsonpath-ng

In [5]:
#pip install jsonpath-ng.ext

In [6]:
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.struct_store import JSONQueryEngine

In [7]:
llm = OpenAI(model="gpt-4o-mini")

In [8]:
nl_query_engine = JSONQueryEngine(
    json_value  = json_value,     # the actual JSON data (the object or dataset) you are querying
    json_schema = json_schema,    # helps the engine understand the organization and types of data contained in json_value
    llm         = llm,
    synthesize_response = True    # default, format the response into a more natural, human-readable form
)

raw_query_engine = JSONQueryEngine(
    json_value   = json_value,
    json_schema  = json_schema,
    llm          = llm,
    synthesize_response=False,
)

In [9]:
nl_response = nl_query_engine.query(
    "What comments has Jerry been writing?",
)

raw_response = raw_query_engine.query(
    "What comments has Jerry been writing?",
)

In [10]:
display(Markdown(f"Natural language Response{nl_response}"))
display(Markdown(f"Raw JSON Response{raw_response}"))

Natural language ResponseJerry has written the following comment: "Nice post!" on blog post with ID 1.

Raw JSON Response{"username == 'jerry')]": "{'id': 1, 'content': 'Nice post!', 'username': 'jerry', 'blogPostId': 1}"}

In [11]:
nl_response = nl_query_engine.query(
    "List Comments on blog Post # 1",
)

display(Markdown(f"Natural language Response{nl_response}"))

Natural language ResponseHere are the comments on blog post #1:

- **Username:** jerry
  - **Content:** Nice post!