In [8]:
import json
import networkx as nx
import matplotlib.pyplot as plt
import csv
import re

In [2]:
def json_to_graph(data, graph=None, parent=None):
    """
    Recursively traverse a JSON structure and create a graph with nodes and edges.

    Parameters:
    - data: dict or list, the JSON data to be processed
    - graph: nx.Graph or nx.DiGraph, the graph being built
    - parent: str, the parent node (if any) for creating edges

    Returns:
    - graph: a NetworkX graph representing the JSON structure.
    """
    if graph is None:
        graph = nx.DiGraph()

    if isinstance(data, dict):
        for key, value in data.items():
            if value is None or value == "":
                continue

            node_id = f"{parent}.{key}" if parent else key
            # Include the key as the label (preserve index notation if present)
            label = key.split('[')[0] + key[key.find('['):] if '[' in key else key
            graph.add_node(node_id, label=label, value=str(value))
            
            if parent:
                graph.add_edge(parent, node_id, relation="contains")
            
            json_to_graph(value, graph, node_id)
    
    elif isinstance(data, list):
        for index, item in enumerate(data):
            if item is None or item == "":
                continue

            node_id = f"{parent}[{index}]" if parent else f"[{index}]"
            label = parent.split('.')[-1] + f"[{index}]"
            graph.add_node(node_id, label=label, value=str(item))
            
            if parent:
                graph.add_edge(parent, node_id, relation="contains")
            
            json_to_graph(item, graph, node_id)
    
    else:
        if data is None or data == "":
            return graph

        node_id = str(parent)
        label = parent.split('.')[-1] if parent else "root"
        graph.add_node(node_id, label=label, value=str(data))
        if parent:
            graph.add_edge(parent.rsplit('.', 1)[0], node_id, relation="contains")
    
    return graph

In [3]:
def json_to_graph_keep_deepest(data, graph=None, parent=None):
    """
    Recursively traverse a JSON structure and create a graph with only the deepest literal nodes.

    Parameters:
    - data: dict or list, the JSON data to be processed
    - graph: nx.Graph or nx.DiGraph, the graph being built
    - parent: str, the parent node (if any) for creating edges

    Returns:
    - graph: a NetworkX graph with only the deepest literal values.
    """
    if graph is None:
        graph = nx.DiGraph()

    if isinstance(data, dict):
        is_leaf = True
        for key, value in data.items():
            if isinstance(value, (dict, list)):
                is_leaf = False
                json_to_graph_keep_deepest(value, graph, f"{parent}.{key}" if parent else key)
            elif value is not None and value != "":
                is_leaf = False
                node_id = f"{parent}.{key}" if parent else key
                # Include the key as the label (preserve index notation if present)
                label = key.split('[')[0] + key[key.find('['):] if '[' in key else key
                graph.add_node(node_id, label=label, value=str(value))
                if parent:
                    graph.add_edge(parent, node_id, relation="contains")

        if is_leaf and parent and parent in graph:
            graph.remove_node(parent)

    elif isinstance(data, list):
        is_leaf = True
        for index, item in enumerate(data):
            if isinstance(item, (dict, list)):
                is_leaf = False
                json_to_graph_keep_deepest(item, graph, f"{parent}[{index}]" if parent else f"[{index}]")
            elif item is not None and item != "":
                is_leaf = False
                node_id = f"{parent}[{index}]" if parent else f"[{index}]"
                label = parent.split('.')[-1] + f"[{index}]"
                graph.add_node(node_id, label=label, value=str(item))
                if parent:
                    graph.add_edge(parent, node_id, relation="contains")

        if is_leaf and parent and parent in graph:
            graph.remove_node(parent)

    else:
        if data is not None and data != "":
            node_id = parent
            label = parent.split('.')[-1] if parent else "root"
            graph.add_node(node_id, label=label, value=str(data))
            if parent:
                graph.add_edge(parent.rsplit('.', 1)[0], node_id, relation="contains")

    return graph

In [13]:
json_data = {
    "messageType": "eventDetailed",
    "message": {
        "externalShipmentId": None,
        "externalConsignmentId": "222",
        "externalEventId": "53",
        "place": {
            "placeLocalization": {
                "addressName": None,
                "addressNumber": None,
                "location": None,
                "roadType": None,
                "terminalNode": {
                    "nodeCode": "62105",
                    "nodeName": None
                }
            },
            "dateTimeType": {
                "dateTime": "2024-06-17T17:15:00.000+02:00",
                "type": "Actual"
            }
        },
        "externalOrderId": None,
        "eventType": "ARRIVAL_EVENT",
        "equipments": None,
        "transportMean": None,
        "observations": None,
        "externalReferences": [
            {
                "type": "event_reference",
                "id": "E-00001",
                "endPoint": None
            }
        ]
    },
    "operationType": "CREATE"
}

# Generate the graph
G = json_to_graph(json_data)
G2 = json_to_graph_keep_deepest(json_data)



In [14]:
json_data = {
	"messageType": "shipmentDetailed",
	"message": {
		"externalShipmentId": "ES24D44444444FXHCUHCT",
		"contractCarriageCondition": "NA",
		"involvedActors": [
			{
				"actorAddress": None,
				"actorId": "D44444444",
				"actorName": "Company D",
				"actorRoles": [
					"LogisticRoles_TransportOperator"
				],
				"actorWebSite": None,
				"actorEmail": "Simple4shipment@gmail.com",
				"actorPhoneNumber": None
			},
			{
				"actorAddress": None,
				"actorId": "A11111111",
				"actorName": "Company B",
				"actorRoles": [
					"LogisticRoles_Consignor"
				],
				"actorWebSite": None,
				"actorEmail": None,
				"actorPhoneNumber": None
			},
			{
				"actorAddress": None,
				"actorId": "B22222222",
				"actorName": "Company A",
				"actorRoles": [
					"LogisticRoles_Consignee"
				],
				"actorWebSite": None,
				"actorEmail": "Juavie10@gmail.com",
				"actorPhoneNumber": None
			},
			{
				"actorAddress": None,
				"actorId": "C33333333",
				"actorName": "Company C",
				"actorRoles": [
					"LogisticRoles_DispatchParty"
				],
				"actorWebSite": None,
				"actorEmail": None,
				"actorPhoneNumber": None
			}
		],
		"origin": {
			"placeLocalization": {
				"addressName": None,
				"addressNumber": None,
				"location": {
					"zipCode": None,
					"city": {
						"name": None,
						"cityLoCode": "ES MAD"
					},
					"country": {
						"name": None,
						"unLoCode": "ES"
					}
				},
				"roadType": None,
				"terminalNode": None
			},
			"dateTimeType": {
				"dateTime": "2024-09-16T11:46:45.000+02:00",
				"type": "Estimated"
			}
		},
		"destination": {
			"placeLocalization": {
				"addressName": None,
				"addressNumber": None,
				"location": {
					"zipCode": None,
					"city": {
						"name": None,
						"cityLoCode": "ES SVQ"
					},
					"country": {
						"name": None,
						"unLoCode": "ES"
					}
				},
				"roadType": None,
				"terminalNode": None
			},
			"dateTimeType": {
				"dateTime": "2024-09-23T13:15:48.000+02:00",
				"type": "Estimated"
			}
		},
		"equipments": [
			{
				"equipmentId": "JSVU12453627",
				"equipmentType": {
					"code": "CN",
					"description": None
				},
				"wagonId": None,
				"codeITU": "40",
				"grossWeight": 42000,
				"isEmpty": True,
				"sealIndicator": "Precxx3234",
				"goods": [
					{
						"package": {
							"numberOfPackages": 26,
							"shippingMark": "bridgestones",
							"packagingType": None
						},
						"goodTypeCode": "tires",
						"goodDescription": "finished rubber tires",
						"cargoType": "Pallets",
						"idNumberList": [
							"21321"
						],
						"goodWeight": 0.0,
						"goodGrossMass": 23000.0,
						"goodSize": 12,
						"temperature": None,
						"dangerousGoods": False,
						"dangerousGoodsDescription": None,
						"damageRemarks": None
					}
				],
				"damageRemarks": "string",
				"city": {
					"name": None,
					"cityLoCode": "ES MAD"
				},
				"country": None,
				"dangerousEquipment": False,
				"dangerousEquipmentDescription": None
			}
		],
		"documents": [],
		"externalReferences": [
			{
				"type": "shipment_reference",
				"id": "BL112233",
				"endPoint": None
			}
		]
	},
	"operationType": "CREATE"
}
# Generate the graph
G = json_to_graph(json_data)
G2 = json_to_graph_keep_deepest(json_data)

In [4]:
flowertrucks_json_data = {
    "messageType": "Transport Opdracht",
    "message": {
        "shipment": {
            "shipper": "Asian Export Food",
            "loadingLocation": {
                "plaats": "Kampen",
                "land": "Nederland"
            },
            "loadingDate": "17-01-2025"
        },
        "goods": [
            {
                "kenmerk": "00803055558",
                "actie": "Lossen",
                "euroDV": 1,
                "blokDV": 1,
                "noOfColli": 23,
                "unloadLocation": {
                    "straat": "Masvagen 2",
                    "naam": "AX Utveckling AB",
                    "postcode": "22100",
                    "plaats": "MARIEHAMN",
                    "land": "FIN"
                }
            },
            {
                "kenmerk": "00803055544",
                "actie": "Lossen",
                "euroDV": 1,
                "blokDV": 1,
                "noOfColli": 16,
                "unloadLocation": {
                    "straat": "Hämeenkatu 7",
                    "naam": "Nurmi Dao Oy",
                    "postcode": "20500",
                    "plaats": "TURKU",
                    "land": "FIN"
                }
            },
            {
                "kenmerk": "0080305561",
                "actie": "Lossen",
                "euroDV": 1,
                "blokDV": 1,
                "noOfColli": 1,
                "unloadLocation": {
                    "straat": "Kuunsade 10b 43",
                    "naam": "Golden Food Oy",
                    "postcode": "02210",
                    "plaats": "HELSINKI",
                    "land": "FIN"
                }
            },
            {
                "kenmerk": "00803055599",
                "actie": "Lossen",
                "euroDV": 2,
                "blokDV": 3,
                "noOfColli": 50,
                "unloadLocation": {
                    "straat": "Gatan 5",
                    "naam": "ABC Logistics",
                    "postcode": "11450",
                    "plaats": "STOCKHOLM",
                    "land": "SWE"
                }
            },
            {
                "kenmerk": "0080305560",
                "actie": "Lossen",
                "euroDV": 1,
                "blokDV": 2,
                "noOfColli": 12,
                "unloadLocation": {
                    "straat": "Storgata 15",
                    "naam": "Nordic Foods",
                    "postcode": "0161",
                    "plaats": "OSLO",
                    "land": "NOR"
                }
            },
            {
                "kenmerk": "0080305562",
                "actie": "Lossen",
                "euroDV": 2,
                "blokDV": 3,
                "noOfColli": 25,
                "unloadLocation": {
                    "straat": "Skolgatan 12",
                    "naam": "Food Service AB",
                    "postcode": "11850",
                    "plaats": "GOTHENBURG",
                    "land": "SWE"
                }
            },
            {
                "kenmerk": "0080305563",
                "actie": "Lossen",
                "euroDV": 1,
                "blokDV": 2,
                "noOfColli": 10,
                "unloadLocation": {
                    "straat": "Bryggen 3",
                    "naam": "North Sea Logistics",
                    "postcode": "5003",
                    "plaats": "BERGEN",
                    "land": "NOR"
                }
            },
            {
                "kenmerk": "0080305564",
                "actie": "Lossen",
                "euroDV": 2,
                "blokDV": 1,
                "noOfColli": 8,
                "unloadLocation": {
                    "straat": "Tallinn Road 8",
                    "naam": "Baltic Foods",
                    "postcode": "10111",
                    "plaats": "TALLINN",
                    "land": "EST"
                }
            },
            {
                "kenmerk": "0080305565",
                "actie": "Lossen",
                "euroDV": 2,
                "blokDV": 3,
                "noOfColli": 18,
                "unloadLocation": {
                    "straat": "Main Street 22",
                    "naam": "Global Goods Inc",
                    "postcode": "8000",
                    "plaats": "AARHUS",
                    "land": "DNK"
                }
            },
            {
                "kenmerk": "0080305566",
                "actie": "Lossen",
                "euroDV": 1,
                "blokDV": 2,
                "noOfColli": 15,
                "unloadLocation": {
                    "straat": "Food Street 45",
                    "naam": "ScandiFoods AS",
                    "postcode": "3010",
                    "plaats": "DRAMMEN",
                    "land": "NOR"
                }
            }
        ]
    }
}
# Generate the graph
Flowertrucks_G = json_to_graph(flowertrucks_json_data)
Flowertrucks_G2 = json_to_graph_keep_deepest(flowertrucks_json_data)

In [5]:
# List all nodes with their attributes
print("Nodes:")
for node, attributes in Flowertrucks_G.nodes(data=True):
    print(f"  {node}: {attributes}")

# List all edges with their relationships
print("\nEdges:")
for source, target, attributes in Flowertrucks_G.edges(data=True):
    print(f"  {source} -> {target} [relation: {attributes['relation']}]")

Nodes:
  messageType: {'label': 'messageType', 'value': 'Transport Opdracht'}
  message: {'label': 'message', 'value': "{'shipment': {'shipper': 'Asian Export Food', 'loadingLocation': {'plaats': 'Kampen', 'land': 'Nederland'}, 'loadingDate': '17-01-2025'}, 'goods': [{'kenmerk': '00803055558', 'actie': 'Lossen', 'euroDV': 1, 'blokDV': 1, 'noOfColli': 23, 'unloadLocation': {'straat': 'Masvagen 2', 'naam': 'AX Utveckling AB', 'postcode': '22100', 'plaats': 'MARIEHAMN', 'land': 'FIN'}}, {'kenmerk': '00803055544', 'actie': 'Lossen', 'euroDV': 1, 'blokDV': 1, 'noOfColli': 16, 'unloadLocation': {'straat': 'Hämeenkatu 7', 'naam': 'Nurmi Dao Oy', 'postcode': '20500', 'plaats': 'TURKU', 'land': 'FIN'}}, {'kenmerk': '0080305561', 'actie': 'Lossen', 'euroDV': 1, 'blokDV': 1, 'noOfColli': 1, 'unloadLocation': {'straat': 'Kuunsade 10b 43', 'naam': 'Golden Food Oy', 'postcode': '02210', 'plaats': 'HELSINKI', 'land': 'FIN'}}, {'kenmerk': '00803055599', 'actie': 'Lossen', 'euroDV': 2, 'blokDV': 3, 'no

In [17]:
def print_graph_as_tree(graph, root=None, level=0, visited=None):
    if visited is None:
        visited = set()

    if root is None:
        # Start with nodes that have no incoming edges (potential roots)
        roots = [n for n in graph.nodes if graph.in_degree(n) == 0]
        for r in roots:
            print_graph_as_tree(graph, r, level, visited)
        return

    # Print the current node with indentation
    indent = "  " * level
    print(f"{indent}- {root}: {graph.nodes[root]}")

    # Mark the node as visited to prevent cycles
    visited.add(root)

    # Recurse on neighbors (children)
    for neighbor in graph.successors(root):
        if neighbor not in visited:
            print_graph_as_tree(graph, neighbor, level + 1, visited)

# Display the graph as a tree
print("\nGraph as Tree:")
print_graph_as_tree(G)


Graph as Tree:
- message: {'label': 'message', 'value': "{'externalShipmentId': 'ES24D44444444FXHCUHCT', 'contractCarriageCondition': 'NA', 'involvedActors': [{'actorAddress': None, 'actorId': 'D44444444', 'actorName': 'Company D', 'actorRoles': ['LogisticRoles_TransportOperator'], 'actorWebSite': None, 'actorEmail': 'Simple4shipment@gmail.com', 'actorPhoneNumber': None}, {'actorAddress': None, 'actorId': 'A11111111', 'actorName': 'Company B', 'actorRoles': ['LogisticRoles_Consignor'], 'actorWebSite': None, 'actorEmail': None, 'actorPhoneNumber': None}, {'actorAddress': None, 'actorId': 'B22222222', 'actorName': 'Company A', 'actorRoles': ['LogisticRoles_Consignee'], 'actorWebSite': None, 'actorEmail': 'Juavie10@gmail.com', 'actorPhoneNumber': None}, {'actorAddress': None, 'actorId': 'C33333333', 'actorName': 'Company C', 'actorRoles': ['LogisticRoles_DispatchParty'], 'actorWebSite': None, 'actorEmail': None, 'actorPhoneNumber': None}], 'origin': {'placeLocalization': {'addressName': 

In [18]:
csv_file_path = "graph_nodes.csv"

# Open the file and write the nodes
with open(csv_file_path, mode="w", newline="") as file:
    writer = csv.writer(file)
    
    # Write the header
    writer.writerow(["Node", "Attribute", "Value"])
    
    # Write each node with its attributes
    for node, attributes in G.nodes(data=True):
        for attr, value in attributes.items():
            writer.writerow([node, attr, value])

In [19]:
csv_file_path = "graph_deep_nodes.csv"

# Open the file and write the nodes
with open(csv_file_path, mode="w", newline="") as file:
    writer = csv.writer(file)
    
    # Write the header
    writer.writerow(["Node", "Attribute", "Value"])
    
    # Write each node with its attributes
    for node, attributes in G2.nodes(data=True):
        for attr, value in attributes.items():
            writer.writerow([node, attr, value])

In [None]:
unique_labels = set()

for node, attributes in Flowertrucks_G.nodes(data=True):
    if "label" in attributes and "value" in attributes:
        if not isinstance(attributes["value"], (dict, list)):
            unique_labels.add(attributes["label"])  # Add to set to ensure uniqueness

# Convert to a sorted list (optional)
unique_labels_list = sorted(unique_labels)

normalized_labels = set()
for label in unique_labels:
    # Use regex to remove the `[n]` part dynamically
    normalized_label = re.sub(r"\[\d+\]$", "", label)
    normalized_labels.add(normalized_label)  # Ensure uniqueness

# Convert to sorted list (optional, for readability)
normalized_labels_list = sorted(normalized_labels)

# Print the cleaned-up labels
print(normalized_labels_list)

['actie', 'blokDV', 'euroDV', 'goods', 'kenmerk', 'land', 'loadingDate', 'loadingLocation', 'message', 'messageType', 'naam', 'noOfColli', 'plaats', 'postcode', 'shipment', 'shipper', 'straat', 'unloadLocation']
