In [1]:
import numpy as np
from collections import defaultdict

In [2]:
def convert_to_links_dict(edge_list):
    links = defaultdict(list)
    for src, dest in edge_list:
        links[src].append(dest)
        # Access 'dest' to ensure it's in the dictionary. 
        # This handles dangling nodes (nodes with no outgoing links).
        # It adds them to the dictionary with an empty list as its value.
        links[dest]
    return links

In [3]:
def pagerank(links, max_iter=100, alpha=0.85):
    n = len(links)
    S = np.zeros((n, n))

    for i, src in enumerate(links):
        if len(links[src]) == 0:
            S[i] = np.ones(n) / n
        else:
            for dest in links[src]:
                j = list(links.keys()).index(dest)
                S[i, j] = 1 / len(links[src])

    pr = np.random.dirichlet(np.ones(n),size=1).T
    G = (alpha * S.T + (1 - alpha) / n)

    for _ in range(max_iter):
        pr = G @ pr

    pagerank_scores = {node: pr[i, 0] for i, node in enumerate(links)}
    return sorted(pagerank_scores.items(), key=lambda x: x[1], reverse=True)

In [4]:
def hits(links, max_iter=100):
    n = len(links)
    S = np.zeros((n, n))

    for i, src in enumerate(links):
        for dest in links[src]:
            j = list(links.keys()).index(dest)
            S[i, j] = 1

    hubs = np.ones((n, 1)) / n
    auth = np.ones((n, 1)) / n

    for _ in range(max_iter):
        auth = S.T @ hubs
        hubs = S @ auth

    auth = auth / np.sum(auth)
    hubs = hubs / np.sum(hubs)

    hub_scores = {node: hubs[i, 0] for i, node in enumerate(links)}
    auth_scores = {node: auth[i, 0] for i, node in enumerate(links)}
    return sorted(hub_scores.items(), key=lambda x: x[1], reverse=True), sorted(auth_scores.items(), key=lambda x: x[1], reverse=True)

In [5]:
edge_list = [('A', 'B'), ('B', 'C'), ('C', 'A'), ('A', 'D')]

# Convert the edge list to a links dictionary
# links = {
#     'A': ['B', 'D'],
#     'B': ['C'],
#     'C': ['A'],
#     'D': []
# }

links = convert_to_links_dict(edge_list)

pr_scores = pagerank(links)
hub_scores, auth_scores = hits(links)

for node, rank in pr_scores:
    print(f"Node {node} has PageRank: {rank}")
print()
for node, hub in hub_scores:
    print(f"Node {node} has HITS hub: {hub}")
print()
for node, authority in auth_scores:
    print(f"Node {node} has HITS authority: {authority}")


Node A has PageRank: 0.3078534031413612
Node C has PageRank: 0.2646222887060584
Node B has PageRank: 0.21376215407629023
Node D has PageRank: 0.21376215407629023

Node A has HITS hub: 1.0
Node B has HITS hub: 7.888609052210118e-31
Node C has HITS hub: 7.888609052210118e-31
Node D has HITS hub: 0.0

Node B has HITS authority: 0.5
Node D has HITS authority: 0.5
Node A has HITS authority: 7.888609052210118e-31
Node C has HITS authority: 7.888609052210118e-31
