In [8]:
import pydgraph
import numpy as np
import json
from scipy.sparse import coo_matrix
import networkx as nx

def create_client():
    """Create and return a Dgraph client."""
    client_stub = pydgraph.DgraphClientStub('localhost:9080')  # Update with your Dgraph server address
    return pydgraph.DgraphClient(client_stub)

def fetch_graph_edges(client, offset=0, limit=10000):
    """Fetch graph edges from Dgraph in batches."""
    query = f"""
    {{
      edges(func: has(edge), first: {limit}, offset: {offset}) {{
        uid
        edge {{ uid }}
      }}
    }}
    """
    txn = client.txn(read_only=True)
    try:
        response = txn.query(query)
        return json.loads(response.json)
    finally:
        txn.discard()

def compute_pagerank(num_nodes, edges, alpha=0.85, max_iter=100, tol=1e-6):
    """Compute PageRank using a sparse adjacency matrix."""
    rows, cols = zip(*edges)
    values = np.ones(len(edges))
    # use COO (Coordinate List) to build sparce matrix 
    # COO is often used for constructing sparse matrices, but it is less memory efficient than CSR (Compressed Sparse Row) 
    # 
    adj_matrix = coo_matrix((values, (rows, cols)), shape=(num_nodes, num_nodes))
    out_degree = np.array(adj_matrix.sum(axis=1)).flatten()
    out_degree[out_degree == 0] = 1  # Avoid division by zero
    stochastic_matrix = adj_matrix.multiply(1 / out_degree[:, None])

    # Initialize PageRank values
    pagerank = np.ones(num_nodes) / num_nodes
    for _ in range(max_iter):
        new_pagerank = alpha * stochastic_matrix.dot(pagerank) + (1 - alpha) / num_nodes
        if np.linalg.norm(new_pagerank - pagerank, ord=1) < tol:
            break
        pagerank = new_pagerank
    return pagerank

def main():
    client = create_client()
    
    # Fetch data in batches and process
    offset = 0
    limit = 10000
    edges = []
    while True:
        data = fetch_graph_edges(client, offset=offset, limit=limit)
        if not data['edges']:
            break
        for edge in data['edges']:
            from_uid = edge['uid']
            to_uids = [to['uid'] for to in edge['edge']]
            edges.extend((from_uid, to_uid) for to_uid in to_uids)
        offset += limit
    
    # Extract unique nodes and map them to indices
    unique_nodes = list(set([e[0] for e in edges] + [e[1] for e in edges]))
    node_to_index = {node: i for i, node in enumerate(unique_nodes)}
    num_nodes = len(unique_nodes)

    # Map edges to indices
    indexed_edges = [(node_to_index[from_uid], node_to_index[to_uid]) for from_uid, to_uid in edges]
    print(indexed_edges)
    G = nx.from_edgelist(edgelist)
    pagerank_scores = nx.pagerank(G, alpha=0.85, max_iter=100, tol=1e-6)
    print(pagerank_scores)
    # Compute PageRank
    pagerank_scores = compute_pagerank(num_nodes, indexed_edges)
    
    # Print or store results
    for node, score in zip(unique_nodes, pagerank_scores):
        print(f"Node {node}: PageRank Score {score}")

main()

[(6, 3), (6, 8), (6, 2), (5, 7), (5, 8), (5, 2), (9, 3), (9, 8), (1, 8), (1, 2), (0, 3), (0, 6), (0, 5), (0, 9), (0, 1), (0, 7), (0, 4), (0, 8), (0, 2), (4, 3), (4, 7), (8, 7), (8, 2), (2, 3), (2, 7)]


NameError: name 'edgelist' is not defined