In [None]:
from lxml import etree

def parse_graphml_in_chunks(file_path):
    # Initialize nodes and edges
    nodes = {}
    edges = []

    # Initialize the context for parsing the GraphML file
    try:
        context = etree.iterparse(file_path, events=("start", "end"), recover=True)
        
        # Iterate over the parsed elements
        for event, elem in context:
            if event == "end":
                try:
                    if elem.tag == "{http://graphml.graphdrawing.org/xmlns}node":
                        # Process node
                        node_id = elem.attrib['id']
                        # Extract other attributes if needed
                        attributes = {data.attrib['key']: data.text for data in elem.findall("{http://graphml.graphdrawing.org/xmlns}data")}
                        nodes[node_id] = attributes

                    elif elem.tag == "{http://graphml.graphdrawing.org/xmlns}edge":
                        # Process edge
                        source = elem.attrib['source']
                        target = elem.attrib['target']
                        # Extract edge attributes
                        attributes = {data.attrib['key']: data.text for data in elem.findall("{http://graphml.graphdrawing.org/xmlns}data")}
                        edges.append((source, target, attributes))
                
                except (KeyError, ValueError) as inner_error:
                    pass
                
                # Clear the element to free memory after processing
                elem.clear()

        # Clear the context to free memory
        del context
    except etree.XMLSyntaxError as e:
        print(f"XML Syntax Error: {e}")
        # Optionally: log the error, return partial results, or raise the error

    return nodes, edges

# Usage
nodes, edges = parse_graphml_in_chunks('../data/graph_no_metric_part.graphml')

In [5]:
type(nodes)

dict

In [12]:
print(len(nodes))

0


In [11]:
import igraph as ig

g = ig.Graph()

g.add_vertices(len(nodes))

# set vertex attributes
for i, (node_id, attributes) in enumerate(nodes.items()):
    g.vs[i]["id"] = node_id
    for key, value in attributes.items():
        g.vs[i][key] = value

# add edges
edge_list = [(nodes[source]['id'], nodes[target]['id']) for source, target, _ in edges]
g.add_edges(edge_list)



[]


ValueError: max() arg is an empty sequence

In [None]:
# calculate degree centraility
degree_centrality = g.degree()
print(degree_centrality)
max_degree = max(degree_centrality)
normalized_degree_centrality = [dc / (max_degree if max_degree > 0 else 1) for dc in degree_centrality]

# Print results
for i in range(g.vcount()):
    print(f"Node ID: {g.vs[i]['id']}, Degree Centrality: {degree_centrality[i]}, Normalized Degree Centrality: {normalized_degree_centrality[i]:.4f}")

# Optional: Output some graph information
print("Number of vertices:", g.vcount())
print("Number of edges:", g.ecount())
print("Graph summary:")
print(g.summary())

## print out the attributes of first 10 nodes

In [None]:
for node, attrs in list(G.nodes(data=True))[:10]:
    print(f"Node {node}: {attrs}")


## print our the attributes of first 10 edges

In [None]:
for u, v, attrs in list(G.edges(data=True))[:10]:
    print(f"Edge {u}-{v}: {attrs}")

## measure the basic centrality