In [14]:
import json
import networkx as nx
import matplotlib.pyplot as plt
import csv

In [21]:
def json_to_graph(data, graph=None, parent=None):
    """
    Recursively traverse a JSON structure and create a graph with nodes and edges,
    ensuring correct handling of nested primitives like message.externalConsignmentId.
    
    Parameters:
    - data: dict or list, the JSON data to be processed
    - graph: nx.Graph or nx.DiGraph, the graph being built
    - parent: str, the parent node (if any) for creating edges
    
    Returns:
    - graph: a NetworkX graph representing the JSON structure
    """
    if graph is None:
        graph = nx.DiGraph()  # Using a directed graph

    if isinstance(data, dict):
        for key, value in data.items():
            # Skip None or empty values
            if value is None or value == "":
                continue

            node_id = f"{parent}.{key}" if parent else key  # Unique node ID
            graph.add_node(node_id, label=key, value=str(value))  # Add node
            
            if parent:
                graph.add_edge(parent, node_id, relation="contains")  # Link to parent
            
            # Recursively process nested structures
            json_to_graph(value, graph, node_id)
    
    elif isinstance(data, list):
        for index, item in enumerate(data):
            # Skip None or empty items
            if item is None or item == "":
                continue

            node_id = f"{parent}[{index}]"  # Unique node ID for list items
            graph.add_node(node_id, label=f"{parent}[{index}]", value=str(item))  # Add node
            
            if parent:
                graph.add_edge(parent, node_id, relation="contains")  # Link to parent
            
            # Recursively process nested structures
            json_to_graph(item, graph, node_id)
    
    else:
        # Base case: primitive data (e.g., string)
        if data is None or data == "":
            return graph

        # Use parent as the node ID for primitives
        node_id = str(parent)
        graph.add_node(node_id, label=parent.split('.')[-1], value=str(data))  # Label is last key
        if parent:
            graph.add_edge(parent, node_id, relation="contains")  # Link to parent
    
    return graph

In [33]:
def json_to_graph_keep_deepest(data, graph=None, parent=None):
    """
    Recursively traverse a JSON structure and create a graph with only the deepest literal nodes.

    Parameters:
    - data: dict or list, the JSON data to be processed
    - graph: nx.Graph or nx.DiGraph, the graph being built
    - parent: str, the parent node (if any) for creating edges

    Returns:
    - graph: a NetworkX graph with only the deepest literal values.
    """
    if graph is None:
        graph = nx.DiGraph()  # Using a directed graph

    if isinstance(data, dict):
        is_leaf = True  # Assume this level is a leaf unless proven otherwise
        for key, value in data.items():
            if isinstance(value, (dict, list)):
                # Not a leaf, recurse deeper
                is_leaf = False
                json_to_graph_keep_deepest(value, graph, f"{parent}.{key}" if parent else key)
            elif value is not None and value != "":
                # Found a literal value, keep it
                node_id = f"{parent}.{key}" if parent else key
                graph.add_node(node_id, label=key, value=str(value))
                if parent:
                    graph.add_edge(parent, node_id, relation="contains")
        
        # If all items were leaves, remove the current (nested) node
        if is_leaf and parent:
            graph.remove_node(parent)

    elif isinstance(data, list):
        is_leaf = True  # Assume this level is a leaf unless proven otherwise
        for index, item in enumerate(data):
            if isinstance(item, (dict, list)):
                # Not a leaf, recurse deeper
                is_leaf = False
                json_to_graph_keep_deepest(item, graph, f"{parent}[{index}]" if parent else f"[{index}]")
            elif item is not None and item != "":
                # Found a literal value, keep it
                node_id = f"{parent}[{index}]" if parent else f"[{index}]"
                graph.add_node(node_id, label=f"[{index}]", value=str(item))
                if parent:
                    graph.add_edge(parent, node_id, relation="contains")
        
        # If all items were leaves, remove the current (nested) node
        if is_leaf and parent:
            graph.remove_node(parent)

    else:
        # Base case: primitive data (e.g., string, int)
        if data is not None and data != "":
            node_id = parent
            graph.add_node(node_id, label=parent.split('.')[-1], value=str(data))
            if parent:
                graph.add_edge(parent.rsplit('.', 1)[0], node_id, relation="contains")

    return graph

In [34]:
json_data = {
    "messageType": "eventDetailed",
    "message": {
        "externalShipmentId": None,
        "externalConsignmentId": "222",
        "externalEventId": "53",
        "place": {
            "placeLocalization": {
                "addressName": None,
                "addressNumber": None,
                "location": None,
                "roadType": None,
                "terminalNode": {
                    "nodeCode": "62105",
                    "nodeName": None
                }
            },
            "dateTimeType": {
                "dateTime": "2024-06-17T17:15:00.000+02:00",
                "type": "Actual"
            }
        },
        "externalOrderId": None,
        "eventType": "ARRIVAL_EVENT",
        "equipments": None,
        "transportMean": None,
        "observations": None,
        "externalReferences": [
            {
                "type": "event_reference",
                "id": "E-00001",
                "endPoint": None
            }
        ]
    },
    "operationType": "CREATE"
}

# Generate the graph
G = json_to_graph(json_data)
G2 = json_to_graph_keep_deepest(json_data)



In [5]:
# List all nodes with their attributes
print("Nodes:")
for node, attributes in G.nodes(data=True):
    print(f"  {node}: {attributes}")

# List all edges with their relationships
print("\nEdges:")
for source, target, attributes in G.edges(data=True):
    print(f"  {source} -> {target} [relation: {attributes['relation']}]")

Nodes:
  messageType: {'label': 'messageType', 'value': 'eventDetailed'}
  message: {'label': 'message', 'value': "{'externalShipmentId': None, 'externalConsignmentId': '222', 'externalEventId': '53', 'place': {'placeLocalization': {'addressName': None, 'addressNumber': None, 'location': None, 'roadType': None, 'terminalNode': {'nodeCode': '62105', 'nodeName': None}}, 'dateTimeType': {'dateTime': '2024-06-17T17:15:00.000+02:00', 'type': 'Actual'}}, 'externalOrderId': None, 'eventType': 'ARRIVAL_EVENT', 'equipments': None, 'transportMean': None, 'observations': None, 'externalReferences': [{'type': 'event_reference', 'id': 'E-00001', 'endPoint': None}]}"}
  message.externalConsignmentId: {'label': 'message.externalConsignmentId', 'value': '222'}
  message.externalEventId: {'label': 'message.externalEventId', 'value': '53'}
  message.place: {'label': 'place', 'value': "{'placeLocalization': {'addressName': None, 'addressNumber': None, 'location': None, 'roadType': None, 'terminalNode': {

In [6]:
def print_graph_as_tree(graph, root=None, level=0, visited=None):
    if visited is None:
        visited = set()

    if root is None:
        # Start with nodes that have no incoming edges (potential roots)
        roots = [n for n in graph.nodes if graph.in_degree(n) == 0]
        for r in roots:
            print_graph_as_tree(graph, r, level, visited)
        return

    # Print the current node with indentation
    indent = "  " * level
    print(f"{indent}- {root}: {graph.nodes[root]}")

    # Mark the node as visited to prevent cycles
    visited.add(root)

    # Recurse on neighbors (children)
    for neighbor in graph.successors(root):
        if neighbor not in visited:
            print_graph_as_tree(graph, neighbor, level + 1, visited)

# Display the graph as a tree
print("\nGraph as Tree:")
print_graph_as_tree(G)


Graph as Tree:
- message: {'label': 'message', 'value': "{'externalShipmentId': None, 'externalConsignmentId': '222', 'externalEventId': '53', 'place': {'placeLocalization': {'addressName': None, 'addressNumber': None, 'location': None, 'roadType': None, 'terminalNode': {'nodeCode': '62105', 'nodeName': None}}, 'dateTimeType': {'dateTime': '2024-06-17T17:15:00.000+02:00', 'type': 'Actual'}}, 'externalOrderId': None, 'eventType': 'ARRIVAL_EVENT', 'equipments': None, 'transportMean': None, 'observations': None, 'externalReferences': [{'type': 'event_reference', 'id': 'E-00001', 'endPoint': None}]}"}
  - message.externalConsignmentId: {'label': 'message.externalConsignmentId', 'value': '222'}
  - message.externalEventId: {'label': 'message.externalEventId', 'value': '53'}
  - message.place: {'label': 'place', 'value': "{'placeLocalization': {'addressName': None, 'addressNumber': None, 'location': None, 'roadType': None, 'terminalNode': {'nodeCode': '62105', 'nodeName': None}}, 'dateTimeT

In [20]:
csv_file_path = "graph_nodes.csv"

# Open the file and write the nodes
with open(csv_file_path, mode="w", newline="") as file:
    writer = csv.writer(file)
    
    # Write the header
    writer.writerow(["Node", "Attribute", "Value"])
    
    # Write each node with its attributes
    for node, attributes in G.nodes(data=True):
        for attr, value in attributes.items():
            writer.writerow([node, attr, value])

In [35]:
csv_file_path = "graph_deep_nodes.csv"

# Open the file and write the nodes
with open(csv_file_path, mode="w", newline="") as file:
    writer = csv.writer(file)
    
    # Write the header
    writer.writerow(["Node", "Attribute", "Value"])
    
    # Write each node with its attributes
    for node, attributes in G2.nodes(data=True):
        for attr, value in attributes.items():
            writer.writerow([node, attr, value])