I have a text file containing a list of deeply nested JSON objects.  The objects have keys associated with objects as well as lists, included nested lists.   Using Python, I want a function that can search each object recursively for a specific key.  If the key is found, I want the function to return the full path to the nested key as well as the value.  The function must be able to correctly parse nested lists.  
 There should be no duplicates in the results. 
I also want to be able to provide a substring and have the function find keys that contain the substring.

In [53]:
def xfind_key_paths(data, key_substring, *, exact=False):
    """
    Recursively search nested JSON-like data (dicts + lists)
    for keys matching either:
        - exact == True  → key == key_substring
        - exact == False → key_substring in key (substring match)

    Returns:
        List of unique tuples: (full_path, value)
    """
    results = set()

    def _search(obj, path):
        # dict case
        if isinstance(obj, dict):
            for key, value in obj.items():
                new_path = f"{path}.{key}" if path else key

                # Match key either exactly or via substring
                if (key == key_substring) if exact else (key_substring in key):
                    results.add((new_path, value))

                _search(value, new_path)

        # list case
        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                new_path = f"{path}[{i}]"
                _search(item, new_path)

        # primitive → do nothing
        else:
            return

    _search(data, "")

    return sorted(results)  # optional but nice: deterministic order


In [60]:
def find_xxkey_paths(data, key_substring, *, exact=False):
    """
    Recursively search nested JSON-like data (dicts + lists)
    for keys:
        - exact == True  → key == key_substring
        - exact == False → key_substring in key (substring match)

    Works with values of any type (dict, list, str, int, etc.).

    Returns:
        A sorted list of unique tuples: (full_path, value)
    """

    results = []  # set()  # ensure no duplicates

    def _search(obj, path):
        # Case: dictionary
        if isinstance(obj, dict):
            for key, value in obj.items():
                # Build full path: dot notation

                new_path = f"{path}.{key}" if path else key
                # Key match (substring or exact)
                if (key == key_substring) if exact else (key_substring in key):
                    try:
                        results.add((new_path, value))
                    except TypeError as e:
                        print(f"{type(new_path)=}, {type(value)=}")

                # Recurse into value
                _search(value, new_path)

        # Case: list
        elif isinstance(obj, list):
            for idx, item in enumerate(obj):
                # Build full path: list index notation
                new_path = f"{path}[{idx}]"
                _search(item, new_path)

        # Case: primitive (str, number, bool, None)
        else:
            # Nothing to recurse into
            return

    _search(data, "")

    # Sorted for readability & deterministic output
    return sorted(results)


In [64]:
def find_key_paths(data, target_key):
    """
    Recursively search a nested JSON-like structure (dicts + lists)
    and return all paths where `target_key` is found, along with values.

    Returns:
        List of tuples: [(path_string, value), ...]
        Example path: "root.addresses[2].street"
    """
    results = []

    def _search(obj, path):
        if isinstance(obj, dict):
            for key, value in obj.items():
                new_path = f"{path}.{key}" if path else key
                if key == target_key:
                    results.append((new_path, value))
                _search(value, new_path)

        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                new_path = f"{path}[{i}]"
                _search(item, new_path)

        # Base case: primitives → ignore
        else:
            return

    _search(data, "")
    return results


In [69]:
def find_key_paths(data, key=None, substring=None):
    """
    Recursively search a nested JSON-like structure (dicts + lists)
    for keys that match exactly (key=...) or contain a substring (substring=...).

    Returns:
        List of dicts:
            [{"path": "...", "value": <object>}, ...]

    Ensures no duplicates and does NOT use a set, so unhashable values are allowed.
    """

    if key is None and substring is None:
        raise ValueError("You must supply either key= or substring=")

    results = []   # list of {"path": str, "value": object}

    def already_recorded(path, value):
        """Prevent duplicates without using sets."""
        for entry in results:
            if entry["path"] == path:
                # path uniquely identifies the match
                return True
        return False

    def key_matches(k):
        """Check exact match or substring match."""
        if key is not None and k == key:
            return True
        if substring is not None and substring in k:
            return True
        return False

    def _search(obj, path):
        if isinstance(obj, dict):
            for k, v in obj.items():
                new_path = f"{path}.{k}" if path else k

                if key_matches(k) and not already_recorded(new_path, v):
                    results.append({"path": new_path, "value": v})

                _search(v, new_path)

        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                new_path = f"{path}[{i}]"
                _search(item, new_path)

        # primitives → no recursion needed
        else:
            return

    _search(data, "")
    return results


In [70]:
data1 = {
    "user": {
        "profile": {
            "name": "Alice",
            "details": {
                "bo": [[
                    {"age": 30, "location": {"city": "Boston"}},
                    {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                ]]}
        }
    }
}
results = find_key_paths(data1, "city")
print(results)
print(f"{data1['user']['profile']['details']['bo'][0][0]['location']['city'] == 'Boston'=}")
print(f"{data1['user']['profile']['details']['bo'][0][1]['projects'][1]['city'] == 'Miami'=}")


[{'path': 'user.profile.details.bo[0][0].location.city', 'value': 'Boston'}, {'path': 'user.profile.details.bo[0][1].projects[1].city', 'value': 'Miami'}]
data1['user']['profile']['details']['bo'][0][0]['location']['city'] == 'Boston'=True
data1['user']['profile']['details']['bo'][0][1]['projects'][1]['city'] == 'Miami'=True


In [71]:
results = find_key_paths(data1, "age")
print(results)
# print(f"{data1['user']['profile']['details']['bo'][0][0]['location']['city'] == 'Boston'=}")
# print(f"{data1['user']['profile']['details']['bo'][0][1]['projects'][1]['city'] == 'Miami'=}")


[{'path': 'user.profile.details.bo[0][0].age', 'value': 30}]


In [72]:
results = find_key_paths(data1, "bo")
print(results)

[{'path': 'user.profile.details.bo', 'value': [[{'age': 30, 'location': {'city': 'Boston'}}, {'projects': [{'title': 'X'}, {'title': 'Y', 'city': 'Miami'}]}]]}]


In [75]:
data2 = {
    "user": {
        "profile": {
            "name": "Alice",
            "details": {
                "bo": [[
                    {"age": 30, "address_location": {"city": "Boston"}},
                    {"projects_location": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                ]]}
        }
    }
}
results = find_key_paths(data2, substring="location")
print(results)

[{'path': 'user.profile.details.bo[0][0].address_location', 'value': {'city': 'Boston'}}, {'path': 'user.profile.details.bo[0][1].projects_location', 'value': [{'title': 'X'}, {'title': 'Y', 'city': 'Miami'}]}]


In [76]:
nested = {
    "a": {
        "b": 10,
        "c": [10, {"d": 20, "e": 10}]
    },
    "f": [{"g": 10}, {"h": 20}]
}
print(nested)
matches = find_key_paths(nested, 'c')
for path, val in matches:
    print(path, "=", val)


{'a': {'b': 10, 'c': [10, {'d': 20, 'e': 10}]}, 'f': [{'g': 10}, {'h': 20}]}
path = value


In [77]:
data1 = {
    "user": {
        "profile": {
            "name": "Alice",
            "details": {
                "bo": [[
                    {"age": 30, "location": {"city": "Boston"}},
                    {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                ]]}
        }
    }
}
print(find_key_paths(data1, "Miami"))
print(find_key_paths(data1, "X"))

[]
[]


In [52]:
def main():
    func_no = 3
    match func_no:
        case 1:
            nested = {
                "a": {
                    "b": 10,
                    "c": [10, {"d": 20, "e": 10}]
                },
                "f": [{"g": 10}, {"h": 20}]
            }

            matches = find_value_paths_deduplicated(nested, 10)
            for path, val in matches:
                print(path, "=", val)

        case 2:
            with open("users.json") as file:
                data = json.load(file)

            for index, user in enumerate(data):
                if index > 3:
                    break
                print(find_value_paths_deduplicated(user, "Mastercard"))
        case 3:
            data1 = {
                "user": {
                    "profile": {
                        "name": "Alice",
                        "details": {
                            "bo": [[
                                {"age": 30, "location": {"city": "Boston"}},
                                {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                            ]]}
                    }
                }
            }
            print(find_key_paths(data1, "Miami"))
            print(find_key_paths(data1, "X"))

        case 11:
            nested = {
                "a": {
                    "b": 10,
                    "c": [10, {"d": 20, "e": 10}]
                },
                "f": [{"g": 10}, {"h": 20}]
            }

            matches = find_value_paths(nested, 10)
            for path, val in matches:
                print(path, "=", val)

        case 12:
            with open("users.json") as file:
                data = json.load(file)

            for index, user in enumerate(data):
                if index > 3:
                    break
                print(find_value_paths(data, "Mastercard"))
        case 13:
            data1 = {
                "user": {
                    "profile": {
                        "name": "Alice",
                        "details": {
                            "bo": [[
                                {"age": 30, "location": {"city": "Boston"}},
                                {"projects": [{"title": "X"}, {"title": "Y", "city": "Miami"}]}
                            ]]}
                    }
                }
            }
            print(find_value_paths(data1, "city"))
            print(find_value_paths(data1, "title"))
        case _:  # Wildcard pattern for a default case
            print("no match")

if __name__ == "__main__":
    main()



[]
[]
