In [31]:
import json

def find_value_paths(data, target_value):
    """
    Recursively search a nested JSON-like structure (dicts + lists)
    and return all paths where the value equals `target_value`.

    Returns:
        List of tuples: [(path_string, value), ...]
    """
    results = []

    def _search(obj, path):
        # If this is a dict, iterate through its keys/values
        if isinstance(obj, dict):
            for key, value in obj.items():
                new_path = f"{path}.{key}" if path else key
                # Check for direct value match
                if value == target_value:
                    results.append((new_path, value))
                # Recurse
                _search(value, new_path)

        # If this is a list, iterate through elements
        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                new_path = f"{path}[{i}]"
                if item == target_value:
                    results.append((new_path, item))
                _search(item, new_path)

        # Primitive values (base case)
        else:
            # If the primitive itself matches, record the path
            if obj == target_value and path:
                results.append((path, obj))

    _search(data, "")
    return results


def find_value_paths_deduplicated(data, target_value):
    """
    Recursively search a nested JSON-like structure (dicts + lists)
    and return unique paths where the value equals `target_value`.
    """

    results = set()   # store (path, value) pairs to avoid duplicates

    def _search(obj, path):
        # dict case
        if isinstance(obj, dict):
            for key, value in obj.items():
                new_path = f"{path}.{key}" if path else key
                if value == target_value:
                    results.add((new_path, value))
                _search(value, new_path)

        # list case
        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                new_path = f"{path}[{i}]"
                if item == target_value:
                    results.add((new_path, item))
                _search(item, new_path)

        # primitive case
        else:
            if obj == target_value and path:
                results.add((path, obj))

    _search(data, "")

    # Convert back to list for usability
    return list(results)

In [28]:
def find_value_paths2(data, target_value):
    """
    Recursively search a nested JSON-like structure (dicts, lists)
    for occurrences of `target_value`, returning all unique paths.

    Returns:
        List of (path_string, value) tuples
            e.g. [("root.items[2].name", "Alice"), ...]
    """

    results = []  # list of (path_string, value)

    def already_recorded(path, value):
        # Ensure no duplicates without using a set
        for p, v in results:
            if p == path:
                return True
        return False

    def _search(obj, path):
        # Match the entire object
        if obj == target_value and not already_recorded(path, obj):
            results.append((path, obj))

        if isinstance(obj, dict):
            for key, value in obj.items():
                new_path = f"{path}.{key}" if path else key
                _search(value, new_path)

        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                new_path = f"{path}[{i}]"
                _search(item, new_path)

        # Primitives: nothing more to recurse into
        else:
            return

    _search(data, "")
    return results


In [None]:
nested = {
    "a": {
        "b": 10,
        "c": [10, {"d": 20, "e": 10}]
    },
    "f": [{"g": 10}, {"h": 20}]
}

matches = find_value_paths_deduplicated(nested, 10)
for path, val in matches:
    print(path, "=", val)

In [34]:
matches = find_value_paths2(nested, 10)
for path, val in matches:
    print(path, "=", val)

a.b = 10
a.c[0] = 10
a.c[1].e = 10
f[0].g = 10


In [30]:
with open("users.json") as file:
    data = json.load(file)

for index, user in enumerate(data):
    if index > 6:
        break
    result = find_value_paths_deduplicated(user, "Mastercard")
    print(result)
    if len(result) > 0:
        print(user)

[]
[]
[]
[('bank.cardType', 'Mastercard')]
{'id': 4, 'firstName': 'James', 'lastName': 'Davis', 'maidenName': '', 'age': 46, 'gender': 'male', 'email': 'james.davis@x.dummyjson.com', 'phone': '+49 614-958-9364', 'username': 'jamesd', 'password': 'jamesdpass', 'birthDate': '1979-5-4', 'image': 'https://dummyjson.com/icon/jamesd/128', 'bloodGroup': 'AB+', 'height': 193.31, 'weight': 62.1, 'eyeColor': 'Amber', 'hair': {'color': 'Blonde', 'type': 'Straight'}, 'ip': '101.118.131.66', 'address': {'address': '238 Jefferson Street', 'city': 'Seattle', 'state': 'Pennsylvania', 'stateCode': 'PA', 'postalCode': '68354', 'coordinates': {'lat': 16.782513, 'lng': -139.34723}, 'country': 'United States'}, 'macAddress': '10:7d:df:1f:97:58', 'university': 'University of Southern California', 'bank': {'cardExpire': '07/30', 'cardNumber': '5303440212268149', 'cardType': 'Mastercard', 'currency': 'CAD', 'iban': 'DE01300746880579852937'}, 'company': {'department': 'Support', 'name': 'Pagac and Sons', 'titl

In [35]:
with open("users.json") as file:
    data = json.load(file)

for index, user in enumerate(data):
    if index > 6:
        break
    result = find_value_paths2(user, "Mastercard")
    print(result)
    if len(result) > 0:
        print(user)

[]
[]
[]
[('bank.cardType', 'Mastercard')]
{'id': 4, 'firstName': 'James', 'lastName': 'Davis', 'maidenName': '', 'age': 46, 'gender': 'male', 'email': 'james.davis@x.dummyjson.com', 'phone': '+49 614-958-9364', 'username': 'jamesd', 'password': 'jamesdpass', 'birthDate': '1979-5-4', 'image': 'https://dummyjson.com/icon/jamesd/128', 'bloodGroup': 'AB+', 'height': 193.31, 'weight': 62.1, 'eyeColor': 'Amber', 'hair': {'color': 'Blonde', 'type': 'Straight'}, 'ip': '101.118.131.66', 'address': {'address': '238 Jefferson Street', 'city': 'Seattle', 'state': 'Pennsylvania', 'stateCode': 'PA', 'postalCode': '68354', 'coordinates': {'lat': 16.782513, 'lng': -139.34723}, 'country': 'United States'}, 'macAddress': '10:7d:df:1f:97:58', 'university': 'University of Southern California', 'bank': {'cardExpire': '07/30', 'cardNumber': '5303440212268149', 'cardType': 'Mastercard', 'currency': 'CAD', 'iban': 'DE01300746880579852937'}, 'company': {'department': 'Support', 'name': 'Pagac and Sons', 'titl

In [36]:
data1 = {
    "user": {
        "profile": {
            "name": "Alice",
            "details": {
                "bo": [[
                    {"age": 30, "location": {"city": "Boston"}},
                    {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                ]]}
        }
    }
}
print(find_value_paths2(data1, "Miami"))
print(find_value_paths_deduplicated(data1, "X"))

[('user.profile.details.bo[0][1].projects[1].city', 'Miami')]
[('user.profile.details.bo[0][1].projects[0].title', 'X')]


In [37]:
nested = {
    "a": {
        "b": 10,
        "c": [10, {"d": 20, "e": 10}]
    },
    "f": [{"g": 10}, {"h": 20}]
}

#matches = find_value_paths(nested, 10)
matches = find_value_paths_deduplicated(nested, 10)
for path, val in matches:
    print(path, "=", val)

a.b = 10
a.c[0] = 10
a.c[1].e = 10
f[0].g = 10


In [39]:
with open("users.json") as file:
    data = json.load(file)

for index, user in enumerate(data):
    if index > 5:
        break
    print(find_value_paths2(user, "Mastercard"))

[]
[]
[]
[('bank.cardType', 'Mastercard')]
[('bank.cardType', 'Mastercard')]
[]


In [40]:
data1 = {
    "user": {
        "profile": {
            "name": "Alice",
            "details": {
                "bo": [[
                    {"age": 30, "location": {"city": "Boston"}},
                    {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                ]]}
        }
    }
}
print(find_value_paths2(data1, "Miami"))
print(find_value_paths_deduplicated(data1, "X"))
print(find_value_paths_deduplicated(data1, "details"))

[('user.profile.details.bo[0][1].projects[1].city', 'Miami')]
[('user.profile.details.bo[0][1].projects[0].title', 'X')]
[]


In [41]:
target_value = {"city": "Boston"}
print(find_value_paths2(data1, target_value))

[('user.profile.details.bo[0][0].location', {'city': 'Boston'})]


In [42]:
target_value = {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
print(find_value_paths2(data1, target_value))

[('user.profile.details.bo[0][1]', {'projects': [{'title': 'X'}, {'title': 'Y', 'city': 'Miami'}]})]
