https://www.zyte.com/blog/json-parsing-with-python/

In [1]:
# pip install jmespath
# pip install js2py

In [2]:
import json
import jmespath
from pprint import pprint

# catit

In [11]:
import subprocess

def cat_using_subprocess(file_path):
    result = subprocess.run(['cat', file_path], capture_output=True, text=True)
    print(result.stdout)

# def catit(mystring):
#     temp_file_path = 'mystring.json'
#     with open(temp_file_path, 'w') as f:
#         f.write(mystring)

#     result = subprocess.run(['cat', temp_file_path], capture_output=True, text=True)
#     print(result.stdout)


def catit2(mystring):
    print(f"17 {type(mystring)=}")
    if not isinstance(mystring, str):
        mystring = json.dumps(mystring, indent=4)
        print(f"21 {type(mystring)=}")

    print(f"23{type(mystring)=}")
    temp_file_path = 'mystring.json'
    with open(temp_file_path, 'w') as f:
        f.write(mystring)

    result = subprocess.run(['cat', temp_file_path], capture_output=True, text=True)
    print(result.stdout)

In [12]:
json_string = '{"numbers": [1, 2, 3], "car": {"model": "Model X", "year": 2022}}'
json_data = json.loads(json_string)
catit2(json_data)
print(type(json_data), json_data.keys())

17 type(mystring)=<class 'dict'>
21 type(mystring)=<class 'str'>
23type(mystring)=<class 'str'>
{
    "numbers": [
        1,
        2,
        3
    ],
    "car": {
        "model": "Model X",
        "year": 2022
    }
}
<class 'dict'> dict_keys(['numbers', 'car'])


In [13]:
# Accessing nested JSON
name = jmespath.search('car.model', json_data) 
name

'Model X'

In [14]:
# Taking the first number from numbers
first_number = jmespath.search('numbers[0]', json_data)
first_number

1

# jmespath

https://jmespath.org/tutorial.html

https://jmespath.org/examples.html

In [16]:
json_data2 = json.loads('{"a": {"b": {"c": {"d": "value"}}}}')
catit2(json_data2)

17 type(mystring)=<class 'dict'>
21 type(mystring)=<class 'str'>
23type(mystring)=<class 'str'>
{
    "a": {
        "b": {
            "c": {
                "d": "value"
            }
        }
    }
}


In [18]:
catit2(jmespath.search('a', json_data2))

17 type(mystring)=<class 'dict'>
21 type(mystring)=<class 'str'>
23type(mystring)=<class 'str'>
{
    "b": {
        "c": {
            "d": "value"
        }
    }
}


In [19]:
json_data3 = json.loads('{"a": {"b": {"c": {"d": "value"}}}}')
catit2(json_data3)

17 type(mystring)=<class 'dict'>
21 type(mystring)=<class 'str'>
23type(mystring)=<class 'str'>
{
    "a": {
        "b": {
            "c": {
                "d": "value"
            }
        }
    }
}


In [20]:
print(jmespath.search('a.b.c.d', json_data3))

value


In [21]:
mystring = '["a", "b", "c", "d", "e", "f"]'
json_data4 = json.loads(mystring)
print(json_data4, type(json_data4))
print(jmespath.search('[1]', json_data4))

['a', 'b', 'c', 'd', 'e', 'f'] <class 'list'>
b


In [24]:
mystring = """
{"a": {
  "b": {
    "c": [
      {"d": [0, [1, 2]]},
      {"d": [3, 4]}
    ]
  }
}}
"""
json_data5 = json.loads(mystring)
print(type(json_data5))
print(json_data5)
print(jmespath.search('a.b.c[0].d[1][0]', json_data5))

<class 'dict'>
{'a': {'b': {'c': [{'d': [0, [1, 2]]}, {'d': [3, 4]}]}}}
1


In [44]:
# slices, slicing
mystring = '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]'
json_data5 = json.loads(mystring)
print(type(json_data5))
print(json_data5, type(json_data5))
print(jmespath.search('[0:5]', json_data5))
print(jmespath.search('[::2]', json_data5))
print(jmespath.search('[::-1]', json_data5))

<class 'list'>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] <class 'list'>
[0, 1, 2, 3, 4]
[0, 2, 4, 6, 8]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


In [25]:
# List and slice projections
mystring="""
{
  "people": [
    {"first": "James", "last": "d"},
    {"first": "Jacob", "last": "e"},
    {"first": "Jayden", "last": "f"},
    {"missing": "different"}
  ],
  "foo": {"bar": "baz"}
}
"""
json_data5 = json.loads(mystring)
print(json_data5, type(json_data5))
print(jmespath.search('people[*].first', json_data5))
print(jmespath.search('people[:2].first', json_data5))
#print(jmespath.search('people[*].first', json_data5))
#print(jmespath.search('people[*].first', json_data5))

{'people': [{'first': 'James', 'last': 'd'}, {'first': 'Jacob', 'last': 'e'}, {'first': 'Jayden', 'last': 'f'}, {'missing': 'different'}], 'foo': {'bar': 'baz'}} <class 'dict'>
['James', 'Jacob', 'Jayden']
['James', 'Jacob']


In [34]:
# object projections
mystring = """
{
  "ops": {
    "functionA": {"numArgs": 2},
    "functionB": {"numArgs": 3},
    "functionC": {"variadic": true}
  }
}
"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
print(jmespath.search('ops.*.numArgs', json_data))
print(jmespath.search('keys(ops.*)', json_data))


{'ops': {'functionA': {'numArgs': 2}, 'functionB': {'numArgs': 3}, 'functionC': {'variadic': True}}} <class 'dict'>
[2, 3]


JMESPathTypeError: In function keys(), invalid type for value: [{'numArgs': 2}, {'numArgs': 3}, {'variadic': True}], expected one of: ['object'], received: "array"

In [30]:
# flatten projections
mystring = """{
  "reservations": [
    {
      "instances": [
        {"state": "running"},
        {"state": "stopped"}
      ]
    },
    {
      "instances": [
        {"state": "terminated"},
        {"state": "running"}
      ]
    }
  ]
}"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
print(jmespath.search('reservations[*].instances[*].state', json_data))
print(jmespath.search('reservations[].instances[].state', json_data))

print(set(jmespath.search('reservations[].instances[].state', json_data)))


{'reservations': [{'instances': [{'state': 'running'}, {'state': 'stopped'}]}, {'instances': [{'state': 'terminated'}, {'state': 'running'}]}]} <class 'dict'>
[['running', 'stopped'], ['terminated', 'running']]
['running', 'stopped', 'terminated', 'running']
{'stopped', 'terminated', 'running'}


In [59]:
mystring = """[
  [0, 1],
  2,
  [3],
  4,
  [5, [6, 7]]
]"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
print(jmespath.search('[]', json_data))
# print(jmespath.search('reservations[].instances[].state', json_data))


[[0, 1], 2, [3], 4, [5, [6, 7]]] <class 'list'>
[0, 1, 2, 3, 4, 5, [6, 7]]


In [63]:
# pipe expressions
mystring = """{
  "people": [
    {"first": "James", "last": "d"},
    {"first": "Jacob", "last": "e"},
    {"first": "Jayden", "last": "f"},
    {"missing": "different"}
  ],
  "foo": {"bar": "baz"}
}"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
print(jmespath.search('people[0].first', json_data))
print(jmespath.search('people[*].first', json_data))
print(jmespath.search('people[*].first | [0]', json_data))


{'people': [{'first': 'James', 'last': 'd'}, {'first': 'Jacob', 'last': 'e'}, {'first': 'Jayden', 'last': 'f'}, {'missing': 'different'}], 'foo': {'bar': 'baz'}} <class 'dict'>
James
['James', 'Jacob', 'Jayden']
James


In [66]:
# functions
mystring = """
{
  "people": [
    {
      "name": "b",
      "age": 30,
      "state": {"name": "up"}
    },
    {
      "name": "a",
      "age": 50,
      "state": {"name": "down"}
    },
    {
      "name": "c",
      "age": 40,
      "state": {"name": "up"}
    }
  ]
}
"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
print(jmespath.search('length(people)', json_data))
print(jmespath.search('length()', json_data))

{'people': [{'name': 'b', 'age': 30, 'state': {'name': 'up'}}, {'name': 'a', 'age': 50, 'state': {'name': 'down'}}, {'name': 'c', 'age': 40, 'state': {'name': 'up'}}]} <class 'dict'>
3


ArityError: Expected 1 argument for function length(), received 0

In [71]:
# functions 2 
mystring = """
[
    {
      "name": "b",
      "age": 30,
      "state": {"name": "up"}
    },
    {
      "name": "a",
      "age": 50,
      "state": {"name": "down"}
    },
    {
      "name": "c",
      "age": 40,
      "state": {"name": "up"}
    }
  ]
"""
json_data = json.loads(mystring)
print(json_data, '\n', type(json_data))
print(len(json_data))
#$zprint(jmespath.search('length(people)', json_data))
#print(jmespath.search('length()', json_data))
#print(

[{'name': 'b', 'age': 30, 'state': {'name': 'up'}}, {'name': 'a', 'age': 50, 'state': {'name': 'down'}}, {'name': 'c', 'age': 40, 'state': {'name': 'up'}}] 
 <class 'list'>
3


In [72]:
# functions continued
mystring = """
{
  "people": [
    {
      "name": "b",
      "age": 30
    },
    {
      "name": "a",
      "age": 50
    },
    {
      "name": "c",
      "age": 40
    }
  ]
}
"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
query = 'max_by(people, &age).name'
print(jmespath.search(query, json_data))

{'people': [{'name': 'b', 'age': 30}, {'name': 'a', 'age': 50}, {'name': 'c', 'age': 40}]} <class 'dict'>
a


### function with filter expression
https://jmespath.org/tutorial.html

The @ character in the example above refers to the current element being evaluated in myarray. The expression contains(@, 'foo') will return true if the current element in the myarray array contains the string foo.

In [74]:
# function with filter expression
# https://jmespath.org/tutorial.html
mystring = """
{
  "myarray": [
    "foo",
    "foobar",
    "barfoo",
    "bar",
    "baz",
    "barbaz",
    "barfoobaz"
  ]
}
"""
json_data = json.loads(mystring)
print(json_data, type(json_data))
query = "myarray[?contains(@, 'foo') == `true`]"
print(jmespath.search(query, json_data))

{'myarray': ['foo', 'foobar', 'barfoo', 'bar', 'baz', 'barbaz', 'barfoobaz']} <class 'dict'>
['foo', 'foobar', 'barfoo', 'barfoobaz']


In [12]:

# cat_using_subprocess('output1.json')

[
    [
        "Fred",
        25
    ],
    [
        "George",
        30
    ]
]


## catit

In [144]:
## Reading JSON from a File
# Read from file into a dictionary
with open('es-movies-response.json', 'r') as file:
    data2 = json.load(file)
    print(type(data2))

<class 'dict'>


In [146]:
# catit2(json.dumps(data2["hits"]["hits"], indent=4))

In [150]:
# keys = '_source.metascore'
# setkeys=set(['metascore'])
# print(setkeys)
# # sourcekeys = data2['hits']['hits']['._source'].keys()
query = "hits.hits[*].[_id, _source]"

result = jmespath.search(query, data2)  # list

In [153]:
# print(type(result), result[:500])
catit2(result)

# keys_to_remove = setkeys.intersection(set(data2['hits']['hits']['._source']).keys())
# for key in keys_to_remove:
#     del mydict[key]

17 type(mystring)=<class 'list'>
17 type(mystring)=<class 'str'>
type(mystring)=<class 'str'>
[
    [
        "KYgR95kBa2Q2SW-AJS9D",
        {
            "name": "Justice League",
            "genre": "Action",
            "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
            "yearofrelease": 201,
            "metascore": 45,
            "votes": 275122,
            "collection_date": "2025-10-01",
            "rating": 6.6
        }
    ],
    [
        "KogR95kBa2Q2SW-AWS8D",
        {
            "name": "Justice League",
            "genre": "Action",
            "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
            "yearofrelease": 201,
            "metascore": 45,
            "collecti

In [154]:
catit2(json.dumps(data2["hits"]["hits"][0], indent=4))

17 type(mystring)=<class 'str'>
type(mystring)=<class 'str'>
{
    "_index": "movies",
    "_id": "KYgR95kBa2Q2SW-AJS9D",
    "_score": 1,
    "_source": {
        "name": "Justice League",
        "genre": "Action",
        "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
        "yearofrelease": 201,
        "metascore": 45,
        "votes": 275122,
        "collection_date": "2025-10-01",
        "rating": 6.6
    }
}


In [149]:
# catit2(data2)

# jmespath examples

https://github.com/jmespath/jmespath.site/blob/master/docs/examples.rst

https://jmespath.org/examples.html


## Filters and Multiselect Lists¶


In [28]:
# 
mystring = """
 {
      "people": [
        {
          "age": 20,
          "other": "foo",
          "name": "Bob"
        },
        {
          "age": 25,
          "other": "bar",
          "name": "Fred"
        },
        {
          "age": 30,
          "other": "baz",
          "name": "George"
        }
      ]
    }"""
json_data = json.loads(mystring)
# print(json_data, type(json_data))
query = "people[?age > `20`].[name, age]"
result = jmespath.search(query, json_data)  # list
json_string = json.dumps(result, indent=4)
catit(json_string)

[
    [
        "Fred",
        25
    ],
    [
        "George",
        30
    ]
]


In [28]:
# 
mystring = """
 {
      "people": [
        {
          "age": 20,
          "other": "foo",
          "name": "Bob"
        },
        {
          "age": 25,
          "other": "bar",
          "name": "Fred"
        },
        {
          "age": 30,
          "other": "baz",
          "name": "George"
        }
      ]
    }"""
json_data = json.loads(mystring)
# print(json_data, type(json_data))
query = "people[?age > `20`].[name, age]"
result = jmespath.search(query, json_data)  # list
json_string = json.dumps(result, indent=4)
catit(json_string)

[
    [
        "Fred",
        25
    ],
    [
        "George",
        30
    ]
]


In [10]:
cat output1.json

[
    [
        "Fred",
        25
    ],
    [
        "George",
        30
    ]
]

In [29]:
ls

[34m02-Getting_Started[m[m/            movies-bulk.json
[34m03-Managing_Documents[m[m/         movies.md
[34m04-Mapping_&_Analysis[m[m/         mystring,txt
[34m05-Searching_for_Data[m[m/         mystring.json
[34m06-Joining_Queries[m[m/            orders-bulk.json
[34m07-Controlling_Query_Results[m[m/  output1.json
[34m08-Aggregations[m[m/               pagination.xlsx
[34m09-Improving Search Results[m[m/   products-bulk.json
LICENSE.md                     read-write-file.ipynb
README.md                      recipes-bulk.json
cheatsheet.md                  scratch.json
data.json                      sense.json
es-examples-hands-on.md        serialize_json.py
es-movies-response.json        value_counts.md
file-write-read.ipynb          value_counts.py
file-write-read.py             vvalue_counts.py
host.sh                        vvvalue_counts.py
jmespath_serialize_json.ipynb  work-notes.md


In [40]:
import json
with open('es-movies-response.json') as f:
    data = json.load(f)
    catit(json.dumps(data, indent=4))
    

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 7,
            "relation": "eq"
        },
        "max_score": 1,
        "hits": [
            {
                "_index": "movies",
                "_id": "KYgR95kBa2Q2SW-AJS9D",
                "_score": 1,
                "_source": {
                    "name": "Justice League",
                    "genre": "Action",
                    "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
                    "yearofrelease": 201,
                    "metascore": 45,
                    "votes": 275122,
                    "collection_date": "2025-10-01",
                    "rating": 6.6
                }
            },
            {
        

In [39]:
query = "length(hits.hits)"
result = jmespath.search(query, data)  # list
json_string = json.dumps(result, indent=4)
catit(json_string)

7


In [53]:
query = "hits.hits[*].[_id, _source]"
result = jmespath.search(query, data)  # list
json_string = json.dumps(result, indent=4)
catit(json_string)

[
    [
        "KYgR95kBa2Q2SW-AJS9D",
        {
            "name": "Justice League",
            "genre": "Action",
            "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
            "yearofrelease": 201,
            "metascore": 45,
            "votes": 275122,
            "collection_date": "2025-10-01",
            "rating": 6.6
        }
    ],
    [
        "KogR95kBa2Q2SW-AWS8D",
        {
            "name": "Justice League",
            "genre": "Action",
            "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
            "yearofrelease": 201,
            "metascore": 45,
            "collection_date": "2025-10-02",
            "votes": 275122,
            "rating": 6.6
        }
    ]

In [113]:
# pop/delete key from dict
mystring = """
{"foo": [{"state": "WA", "value": 1},
         {"state": "WA", "value": 2},
         {"state": "CA", "value": 3},
         {"state": "CA", "value": 4}]}
"""
json_data = json.loads(mystring)
print("json_data = ", json_data, "\ntype(json_data) = ", type(json_data))
# foo = json_data["foo"]
for dict in json_data["foo"]:
    dict.pop("value", None)

catit(json.dumps(json_data, indent=4))
query = "foo[*].keys(@)"

result = jmespath.search(query, json_data)  # list
json_string = json.dumps(result, indent=4)
# catit(json_string)

json_data =  {'foo': [{'state': 'WA', 'value': 1}, {'state': 'WA', 'value': 2}, {'state': 'CA', 'value': 3}, {'state': 'CA', 'value': 4}]} 
type(json_data) =  <class 'dict'>
{
    "foo": [
        {
            "state": "WA"
        },
        {
            "state": "WA"
        },
        {
            "state": "CA"
        },
        {
            "state": "CA"
        }
    ]
}


In [140]:
## Reading JSON from a File

import json

# Read from file into a dictionary
with open('es-movies-response.json', 'r') as file:
    data2 = json.load(file)
    print(type(data2))

catit2(json.dumps(data2["hits"]["hits"], indent=4))
# keys = '_source.metascore'
# setkeys=set(['metascore'])
# print(setkeys)
# # sourcekeys = data2['hits']['hits']['._source'].keys()
query = "hits.hits[*].[_id, _source]"

result = jmespath.search(query, data2)  # list
print(type(result), result[:500])
catit(result)

# keys_to_remove = setkeys.intersection(set(data2['hits']['hits']['._source']).keys())
# for key in keys_to_remove:
#     del mydict[key]

<class 'dict'>
17 type(mystring)=<class 'str'>
type(mystring)=<class 'str'>
[
    {
        "_index": "movies",
        "_id": "KYgR95kBa2Q2SW-AJS9D",
        "_score": 1,
        "_source": {
            "name": "Justice League",
            "genre": "Action",
            "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, to face an even greater enemy",
            "yearofrelease": 201,
            "metascore": 45,
            "votes": 275122,
            "collection_date": "2025-10-01",
            "rating": 6.6
        }
    },
    {
        "_index": "movies",
        "_id": "KogR95kBa2Q2SW-AWS8D",
        "_score": 1,
        "_source": {
            "name": "Justice League",
            "genre": "Action",
            "summary": "Fueled by his restored faith in humanity and inspired by Superman's selfless act, Bruce Wayne enlists the help of his newfound ally, Diana Prince, 

TypeError: write() argument must be str, not list