In [1]:
import networkx as nx
import operator

# **Hubs and Authorities**

Given a query to a search engine:
* **Root**: set of highly relevant web pages - potential *authorities*.
* Find all pages that link to a page in the root - potential *hubs*.
* **Base**: root nodes and any node that links to a node in root set.
* Consider all edges connecting nodes in the base set.

### **HITS Algorithm**
Computing $k$ iterations of the HITS algorithm to assign an *authority* score and *hub* score to each node.

1. Assign each node an authority and hub score of 1.
2. Apply the **Authority Update Rule**: each node's **authority** score is the sum of **hub** scores of each node that **points to it**.
3. Apply the **Hub Update Rule**: each node's **hub** score is the sum of **authority** scores of each node that **it points to**.
4. Normalize authority and hub scores.
5. Repeat $k$ times.

For the graph:

<img src='../assets/directed_graph.png' width=300px>

In [8]:
D = nx.read_adjlist(
    '../assets/directed_graph.txt', 
    nodetype=str,
    create_using=nx.DiGraph()
)

hubScore, authorityScore = nx.hits(D)
print('hub scores')
display(list(sorted(hubScore.items(), key=operator.itemgetter(1), reverse=True)))

print('\nauthority scores')
display(list(sorted(authorityScore.items(), key=operator.itemgetter(1), reverse=True)))

hub scores


[('I', 0.26210285382793175),
 ('O', 0.1749665618222365),
 ('G', 0.11661143582632146),
 ('J', 0.08560552875802654),
 ('H', 0.08237928525220428),
 ('N', 0.08163932406273886),
 ('F', 0.06868460039960149),
 ('K', 0.06724410846654054),
 ('C', 0.031174510874723352),
 ('L', 0.013407472246848108),
 ('E', 0.00856732795473908),
 ('B', 0.003226911703833137),
 ('A', 0.0023940305291031243),
 ('D', 0.001996048275151718),
 ('M', -0.0)]


authority scores


[('J', 0.21137098647248379),
 ('G', 0.1577288813015056),
 ('F', 0.13273976088563819),
 ('L', 0.12363167805541275),
 ('H', 0.1000593367517164),
 ('K', 0.06679453456525274),
 ('O', 0.06384672583294418),
 ('A', 0.05641817169163232),
 ('I', 0.0314487863219809),
 ('M', 0.03078922475013199),
 ('D', 0.015171685376536124),
 ('C', 0.004502521740958466),
 ('E', 0.002907831819274072),
 ('B', 0.0016759389187341385),
 ('N', 0.0009139355157982575)]