In [1]:
import numpy as np
import math
import networkx
from tqdm import tqdm

In [2]:
G = networkx.DiGraph()
def HITS(G, max_iters, tol):
    dict_edges = {}
    list_edges = G.edges()
    
    for a in list_edges:
        dict_edges[a] = 1
    
    list_nodes = G.nodes()
    cnt_nodes = len(list_nodes)
    initial_val = 1 / cnt_nodes
    dict_hub_val = {}
    dict_auth_val = {}
    
    for ab in list_nodes:
        dict_hub_val[ab] = initial_val
        dict_auth_val[ab] = initial_val
    
    dict_hub = {}
    dict_auth = {}
    
    #Creating dictionaries of hubs and authority
    for x in list_edges:
        s = x[0]
        d = x[1]
        if dict_hub.get(s) is None:
            empt = [d]
            dict_hub[s] = empt
        else:
            dict_hub[s].append(d)

        if dict_auth.get(d) is None:
            empt = [s]
            dict_auth[d] = empt
        else:
            dict_auth[d].append(s)
    
    cnt_iter = 1
    while cnt_iter <= max_iters:
        cnt_iter += 1
        #Updating authority values first
        for val in dict_auth:
            req_hubs = dict_auth[val]
            new_auth_score = 0
            for ax in req_hubs:
                new_auth_score += dict_hub_val[ax]
            
            dict_auth_val[val] = dict_auth_val[val] + new_auth_score
        
        #Copying prev hub val dict to calcualte L1 norm
        dict_prev_hub_val = {}
        for kk in dict_hub_val:
            dict_prev_hub_val[kk] = dict_hub_val[kk]
        
        #Updating the hub values now
        for val in dict_hub:
            req_auth = dict_hub[val]
            new_hub_score = 0
            for ax in req_auth:
                new_hub_score += dict_auth_val[ax]
            
            dict_hub_val[val] = dict_hub_val[val] + new_hub_score
            
        #Now normalizing the authority scores
        norm_factor_auth = 0
        for a in dict_auth_val:
            norm_factor_auth = norm_factor_auth + (dict_auth_val[a] * dict_auth_val[a])
        
        norm_factor_auth = math.sqrt(norm_factor_auth)
        for value in dict_auth_val:
            dict_auth_val[value] = dict_auth_val[value] / norm_factor_auth
        
        #Now normalizing the hub scores
        nor_factor_hub = 0
        for a in dict_hub_val:
            nor_factor_hub = nor_factor_hub + (dict_hub_val[a] * dict_hub_val[a])
        
        nor_factor_hub = math.sqrt(nor_factor_hub)
        for value in dict_hub_val:
            dict_hub_val[value] = dict_hub_val[value] / nor_factor_hub
        
        #Now calculating the L1 norm value
        l1_norm_val = 0
        for v in dict_hub_val:
            l1_norm_val = l1_norm_val + abs(dict_hub_val[v] - dict_prev_hub_val[v])
        if l1_norm_val <= tol:
            return dict_hub_val, dict_auth_val
    
    return dict_hub_val, dict_auth_val

In [3]:
directory = r'D:\M.TECH SEM 2\MLN\Assignments\A1\mln_a1\mln_a1\so.txt'
fp = open(directory, "r")
text = fp.read()
fp.close()
list_lines = text.split("\n")
list_edges = []
for a in list_lines:
    b = a.split("\t")
    if b[0] != b[1]:
        pair = (b[0], b[1])
        list_edges.append(pair)

Graph_2b = networkx.DiGraph()
Graph_2b.add_edges_from(list_edges)
cnt_nodes = len(Graph_2b.nodes())

hub_val_dict, auth_val_dict = HITS(Graph_2b, 100, 0.00004539992)

list_hub_pairs = []
list_auth_pairs = []

for k in hub_val_dict:
    a = [k, hub_val_dict[k]]
    list_hub_pairs.append(a)

for k in auth_val_dict:
    a = [k, auth_val_dict[k]]
    list_auth_pairs.append(a)

list_hub_pairs.sort(key=lambda x: x[1], reverse = True)
list_auth_pairs.sort(key=lambda x: x[1],reverse = True)

top_5_hub = []
top_5_auth = []
top_5_hub_scores = []
top_5_auth_scores = []
for i in range(5):
    top_5_hub.append(list_hub_pairs[i][0])
    top_5_auth.append(list_auth_pairs[i][0])
    top_5_hub_scores.append(list_hub_pairs[i][1])
    top_5_auth_scores.append(list_auth_pairs[i][1])

print("The top 5 hubs are : ")
for i in range(1,6):
    print(f"Hub {i}  is  {top_5_hub[i-1]}")

print("\nThe top 5 authorities are : ")
for i in range(1,6):
    print(f"Authority {i}  is  {top_5_auth[i-1]}")

print(f"\n Top 5 hub scores are {top_5_hub_scores}")
print(f"\n Top 5 authority scores are {top_5_auth_scores}")

The top 5 hubs are : 
Hub 1  is  892029
Hub 2  is  1194415
Hub 3  is  359862
Hub 4  is  648138
Hub 5  is  470184

The top 5 authorities are : 
Authority 1  is  22656
Authority 2  is  157882
Authority 3  is  571407
Authority 4  is  57695
Authority 5  is  139985

 Top 5 hub scores are [0.07327072388045137, 0.05946412931822916, 0.05685438227295593, 0.05556602208415009, 0.053125086359864584]

 Top 5 authority scores are [0.6060317506322206, 0.29865646325698303, 0.28413922813538406, 0.26948377095408615, 0.24941476293751216]
