In [1]:
import numpy as np

class Graph:
    def __init__(self):
        self.Nodes = []

    def search(self, name):
        
        exist = False
        
        for node in self.Nodes:
            if(node.name == name):
                exist = True
                break

        if exist: 
            return next(node for node in self.Nodes if node.name == name)
        
        else:                       
            new_node = Node(name)
            self.Nodes.append(new_node)
            return new_node

    def addEdge(self, parent, child):
        parent_node = self.search(parent)
        child_node = self.search(child)
        
        if(child_node.name not in parent_node.children):
            parent_node.children.append(child_node)
        
        if(parent_node.name not in child_node.parents):
            child_node.parents.append(parent_node)

    def display(self):
        for node in self.Nodes:
            print(f'{node.name} links to {[child.name for child in node.children]}')

class Node:
    def __init__(self, name):
        self.name = name
        self.children = []
        self.parents = []
        self.auth = 1.0
        self.preAuth = 1.0
        self.hub = 1.0
        self.preHub = 1.0

In [2]:
def init_graph(fname):
    with open(fname,encoding="utf-8") as f:
        lines = f.readlines()

    graph = Graph()

    for line in lines:
        [parent, child] = line.strip().split(',')
        graph.addEdge(parent, child)
        
    graph.Nodes.sort(key=lambda node: int(node.name))

    return graph


In [3]:
def HITS(g, num):
    for i in range(num):
        nodeList = g.Nodes
        for node in nodeList:
            node.auth = sum(parent_node.preHub for parent_node in node.parents)
        
        for node in nodeList:
            node.hub = sum(child_node.preAuth for child_node in node.children)
         
        auth_sum = sum(node.auth for node in nodeList)
        hub_sum = sum(node.hub for node in nodeList)

        for node in nodeList:
            node.auth /= auth_sum
            node.hub /= hub_sum
            node.f = node.auth
            node.preHub = node.hub
            
def get_auth_hub_list(g):
    nodeList = g.Nodes
    auth_list = np.asarray([node.auth for node in nodeList], dtype='float32')
    hub_list = np.asarray([node.hub for node in nodeList], dtype='float32')

    return np.round(auth_list, 3), np.round(hub_list, 3)


        


In [4]:
import os
import time
if __name__ == '__main__':
    authority_fname = '_HITS_authority.txt'
    hub_fname = '_HITS_hub.txt'
    result_dir = 'result'
    data_path = './hw3dataset/'
    for file_name in os.listdir(data_path):
        
        file_path = data_path + file_name
        fname = file_path.split('/')[-1].split('.')[0]
        it = 30
        graph = init_graph(file_path)
        start = time.time()
#         graph.display()
        HITS(graph, it)
        end = time.time()
        

        auth_list, hub_list = get_auth_hub_list(graph)
        print()
        print(fname)
        print('Authority:')
        print(auth_list)
        path = os.path.join(result_dir, fname)
        os.makedirs(path, exist_ok=True)
        np.savetxt(os.path.join(path, fname + authority_fname), auth_list, fmt='%.3f', newline=" ")
        print('Hub:')
        print(hub_list)
        print("time: ",end-start)
        print()
        np.savetxt(os.path.join(path, fname + hub_fname), hub_list, fmt='%.3f', newline=" ")


IBM
Authority:
[0.    0.    0.074 0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.023 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.003
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.001 0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.073 0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.002 0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 


graph_6
Authority:
[0.    0.    0.    ... 0.    0.005 0.   ]
Hub:
[0.005 0.    0.    ... 0.    0.005 0.   ]
time:  0.041409969329833984

