<a href="https://colab.research.google.com/github/ashkanvg/jsonpath_validator_python/blob/main/query_validator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
# !pip -q install jmespath jsonpath-ng
# !pip -q install jsonpath-ng
# !pip install "jsonpath-ng[ext]"
!pip install jsonpath2



In [37]:
# pip install jsonpath2
import json, random, sys
from jsonpath2.path import Path

In [38]:
# --- Load data ---
# wrap into an array
def load_json_or_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        first = f.read(1)
        f.seek(0)
        if first == '[':
            return json.load(f)
        # NDJSON fallback
        return [json.loads(line) for line in f if line.strip()]

def run(expr, dataset):
    path = Path.parse_str(expr)
    matches = [m.current_value for m in path.match(dataset)]
    print(f"\n{expr}\n→ {len(matches)} match(es)")
    # preview up to 5 results (avoid dumping big arrays)
    for v in matches[:5]:
        print("  ", v)

In [39]:
# Example: works for either .json (object/array) or .jsonl (JSON Lines) files
json_path = "./github_archive_sample_25.json"

data = load_json_or_jsonl(json_path)  # or "events.jsonl"
assert isinstance(data, list), "Expected a top-level array"
print(f"Loaded {len(data)} records")

Loaded 25 records


In [40]:

# --- Queries (from above) ---
queries = [
    '$[*].id',
    '$[*].type',
    '$[*].public',
    '$[*].created_at',
    '$[*].actor.login',
    '$[*].actor.url',
    '$[*].repo.name',
    '$[*].repo.url',
    '$[*].payload.action',
    '$[*].payload.comment.id',
    '$[*].payload.comment.path',
    '$[*].payload.comment.user.login',
    '$[*].payload.comment._links.self.href',
    '$[*].payload.pull_request.id',
    '$[*].payload.pull_request.title',
    '$[*].payload.pull_request.state',
    '$[*].payload.pull_request.user.login',
    '$[*].payload.pull_request.assignee.login',
    '$[*].payload.pull_request.head.ref',
    '$[*].payload.pull_request.head.repo.full_name',
    '$[*].payload.pull_request.head.repo.language',
    '$[*].payload.pull_request.base.ref',
    '$[*].payload.pull_request._links.statuses.href',
    '$[*].org.login',
    '$[0].id',
    '$[17].actor.login',
    '$[42].payload.pull_request.title',
    '$[42].payload.pull_request._links.statuses.href',
    '$[99].payload.comment.user.login',
    '$[123].payload.pull_request.head.repo.full_name',
    '$[512].repo.url',
    '$[777].org.login',
    '$[123].payload.pull_request["html_url","diff_url","patch_url"]',
    '$[256]..created_at',
    '$[*]..created_at',
    '$[*]..id',
    '$[*]..url',
    '$[*].payload.pull_request["html_url","diff_url","patch_url"]',
    '$[*].payload.pull_request._links.*.href',
    '$[*].payload.*.id',

    '$[*][?(@.payload.pull_request.state = "open")].payload.pull_request["id","number","title"]',
    '$[*].payload.pull_request["head","base"]["ref","sha"]',
    '$[*].payload.pull_request["head","base"][?(@.repo.language = "Go")].repo["full_name","watchers_count"]',
    '$[*].payload.pull_request._links.*.href',
    '$[*].payload.pull_request._links["self","review_comments","commits"].href',
    '$[*].payload.pull_request._links["comments","review_comments"].href',
    '$[*][?(@.actor.login = "azylman" or @.payload.comment.user.id = 790102)].id',
    '$[*]..login',
    '$[*].payload.pull_request..["created_at","updated_at","closed_at","merged_at"]',
    '$[*][?(@.payload.comment.path = "Makefile" and @.payload.comment.position = 9)].payload.comment["path","position","diff_hunk"]',
    '$[*].payload.pull_request["head","base"].repo["git_url","ssh_url","clone_url"]',
    '$[*]..["stargazers_count","forks_count","open_issues_count"]',
    '$[*].payload.pull_request["head","base"].user.login',
    '$[*]..["issue_url","comments_url","review_comments_url","commits_url","statuses_url"]',
    '$[*].payload.pull_request.head.repo["full_name","default_branch"]',
    '$[*].payload.pull_request.base.repo["full_name","default_branch"]',
    '$[0:5].id',
    '$[::5].id',
    '$[-1].id',
    '$[*][?(@.org and @.org.login = "Clever")].org["login","id"]',
    '$[*]..id',
    '$[*]..url',
    '$[*].payload.pull_request["head","base"][?(@.label = "Clever:add-support-for-more-delimiters" or @.label = "Clever:master")]["label","ref"]',
    '$[*][?(@.payload.pull_request.user.login and @.payload.pull_request.assignee.login and @.payload.pull_request.user.login != @.payload.pull_request.assignee.login)].payload.pull_request["user","assignee"].login',
    '$[*].payload.pull_request["head","base"][0:1].ref',
    '$[*].payload.pull_request["head","base"][-1:].ref',
    '$[*].payload.pull_request["head","base"][?(@.repo.open_issues_count > 0)].repo.open_issues_count',
    '$[*][?(@.payload.comment)].payload.comment.user["login","html_url"]',
    '$[*].payload.pull_request._links.*["href"][length()]',
    '$[*].payload.comment.diff_hunk[substring(0, 40)]',
]

In [41]:
for q in queries:
    # print(q)
    # print("\n")
    run(q, data)


$[*].id
→ 25 match(es)
   2491008872
   2491008873
   2491008875
   2491008880
   2491008889

$[*].type
→ 25 match(es)
   CreateEvent
   PullRequestEvent
   IssuesEvent
   WatchEvent
   ForkEvent

$[*].public
→ 25 match(es)
   True
   True
   True
   True
   True

$[*].created_at
→ 25 match(es)
   2015-01-02T23:00:00Z
   2015-01-02T23:00:00Z
   2015-01-02T23:00:00Z
   2015-01-02T23:00:00Z
   2015-01-02T23:00:01Z

$[*].actor.login
→ 25 match(es)
   unwarysheep
   erwinvanhunen
   alpha-beta-soup
   finbarr
   agung-wete

$[*].actor.url
→ 25 match(es)
   https://api.github.com/users/unwarysheep
   https://api.github.com/users/erwinvanhunen
   https://api.github.com/users/alpha-beta-soup
   https://api.github.com/users/finbarr
   https://api.github.com/users/agung-wete

$[*].repo.name
→ 25 match(es)
   unwarysheep/blog
   OfficeDev/PnP
   alpha-beta-soup/national-crash-statistics
   motdotla/dotenv
   gwarnants/FunctionList-PHP-Patch

$[*].repo.url
→ 25 match(es)
   https://api.github.co