In [None]:
import pandas as pd
import networkx as nx

def hits_algorithm(df):
    G = nx.DiGraph()
    
    # 그래프에 엣지 추가
    for idx, links in zip(df['idx'], df['link_idx']):
        links = [int(item) for item in links if item.isdigit()]
        for link in links:
            if link != 0:
                G.add_edge(idx, link)
    
    # HITS 알고리즘 실행
    hubs, authorities = nx.hits(G)
    
    return hubs, authorities

def main():
    df = pd.read_csv('df_links.csv')
    # HITS 알고리즘 실행
    hubs, authorities = hits_algorithm(df)
    print(len(hubs), len(authorities))
    
    # 결과 출력
    print("=====================HITS=====================")
    print(hubs)
    print()
    print()
    print(authorities)

if __name__ == "__main__":
    main()

In [None]:
    df = pd.read_csv('df_links.csv')
    # HITS 알고리즘 실행
    hubs, authorities = hits_algorithm(df)
    print(len(hubs), len(authorities))
    
    # 결과 출력
    print("=====================HITS=====================")
    print(hubs)
    print()
    print()
    print(authorities)

In [None]:
hubs_df = pd.DataFrame(list(hubs.items()), columns=['idx', 'hub_score'])
authorities_df = pd.DataFrame(list(authorities.items()), columns=['idx', 'authority_score'])
print(hubs_df.sort_values(by='hub_score', ascending=False))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=False))

In [None]:
print(hubs_df.sort_values(by='hub_score', ascending=True))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=True))

In [None]:
df

In [None]:
def HITS(G):
    # initialize all authority and hub. to 1
    hub_dict  =  {u: 1.0 for u in G}
    auth_dict = {u: 1.0 for u in G}
    # iteration
    for _ in range(0, 100):
        # udpate authority: sum of its in-edge node hub
        for u in G:
            auth_dict[u] = sum([hub_dict[v] for v in G.predecessors(u)])
        # update hub: sum of its out-edge node authority
        for u in G:
            hub_dict[u] = sum([auth_dict[v] for v in G.successors(u)])
        # normalized hub dictionary
        #hub_norm = np.sqrt(np.sum([v**2 for v in hub_dict.values()]))
        hub_norm = max(hub_dict.values())
        hub_dict = {k: v/hub_norm for k, v in hub_dict.items()}
        # normalized authority dictionary
        #auth_norm = np.sqrt(np.sum([v**2 for v in auth_dict.values()]))
        auth_norm = max(auth_dict.values())
        auth_dict = {k: v / auth_norm for k, v in auth_dict.items()}

In [None]:
df = pd.read_csv('df_links.csv')

DG = nx.DiGraph()

for idx, links in zip(df['idx'], df['link_idx']):
    links = [int(item) for item in links if item.isdigit()]
    for link in links:
        if link != 0:
            DG.add_edge(idx, link)
        
assert nx.is_weakly_connected(DG) == True

def custom_HITS(G):
    # initialize all authority and hub. to 1
    hub_dict  =  {u: 1.0 for u in G}
    auth_dict = {u: 1.0 for u in G}
    max_iter = 10**2
    for _ in range(0, max_iter):
        # udpate authority: sum of its in-edge node hub
        for u in G:
            auth_dict[u] = sum([hub_dict[v] for v in G.predecessors(u)])
        # update hub: sum of its out-edge node authority
        for u in G:
            hub_dict[u] = sum([auth_dict[v] for v in G.successors(u)])
        # normalized hub dictionary
        #hub_norm = np.sqrt(np.sum([v**2 for v in hub_dict.values()]))
        hub_norm = max(hub_dict.values())
        hub_dict = {k: v/hub_norm for k, v in hub_dict.items()}
        # normalized authority dictionary
        #auth_norm = np.sqrt(np.sum([v**2 for v in auth_dict.values()]))
        auth_norm = max(auth_dict.values())
        auth_dict = {k: v / auth_norm for k, v in auth_dict.items()}
    return (hub_dict, auth_dict)

def np_normalize_dict(input_dict):
    norm = np.linalg.norm(list(input_dict.values()))
    return {k: v/norm for k, v in input_dict.items()}

#############################################
nx_hub_dict, nx_auth_dict    = nx.hits(DG)
cus_hub_dict, cus_auth_dict = custom_HITS(DG)

# normalized by np.norm
nx_hub_dict = np_normalize_dict(nx_hub_dict)
nx_auth_dict = np_normalize_dict(nx_auth_dict)
cus_hub_dict = np_normalize_dict(cus_hub_dict)
cus_auth_dict = np_normalize_dict(cus_auth_dict)

print("== nx authority, hub dictionary ")
print(nx_auth_dict)
print(nx_hub_dict)

In [None]:
print("== custom authority, hub dictionary ")
print(cus_auth_dict)
print(cus_hub_dict)

In [None]:
hubs_df = pd.DataFrame(list(cus_hub_dict.items()), columns=['idx', 'hub_score'])
authorities_df = pd.DataFrame(list(cus_auth_dict.items()), columns=['idx', 'authority_score'])
print(hubs_df.sort_values(by='hub_score', ascending=False))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=False))

In [None]:
print(hubs_df.sort_values(by='hub_score', ascending=True))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=True))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
pos = nx.spring_layout(DG)
nx.draw(DG, pos, with_labels=True, node_size=3000, node_color="skyblue", font_size=10, font_color="black", arrowsize=20)
plt.title("Directed Graph")
plt.show()

In [None]:
DG = nx.DiGraph()

# 22개만
#df = df[:22]
#df = pd.read_csv('df_links.csv')
for idx, links in zip(df['idx'], df['link_idx']):
    links = [int(item) for item in links if item.isdigit()]
    for link in links:
        if link != 0:
            DG.add_edge(idx, link)
        
assert nx.is_weakly_connected(DG) == True

def custom_HITS(G):
    # initialize all authority and hub. to 1
    hub_dict  =  {u: 1.0 for u in G}
    auth_dict = {u: 1.0 for u in G}
    max_iter = 10**2
    for _ in range(0, max_iter):
        # udpate authority: sum of its in-edge node hub
        for u in G:
            auth_dict[u] = sum([hub_dict[v] for v in G.predecessors(u)])
        # update hub: sum of its out-edge node authority
        for u in G:
            hub_dict[u] = sum([auth_dict[v] for v in G.successors(u)])
        # normalized hub dictionary
        #hub_norm = np.sqrt(np.sum([v**2 for v in hub_dict.values()]))
        hub_norm = max(hub_dict.values())
        hub_dict = {k: v/hub_norm for k, v in hub_dict.items()}
        # normalized authority dictionary
        #auth_norm = np.sqrt(np.sum([v**2 for v in auth_dict.values()]))
        auth_norm = max(auth_dict.values())
        auth_dict = {k: v / auth_norm for k, v in auth_dict.items()}
    return (hub_dict, auth_dict)

def np_normalize_dict(input_dict):
    norm = np.linalg.norm(list(input_dict.values()))
    return {k: v/norm for k, v in input_dict.items()}

#############################################
nx_hub_dict, nx_auth_dict    = nx.hits(DG)
cus_hub_dict, cus_auth_dict = custom_HITS(DG)

# normalized by np.norm
nx_hub_dict = np_normalize_dict(nx_hub_dict)
nx_auth_dict = np_normalize_dict(nx_auth_dict)
cus_hub_dict = np_normalize_dict(cus_hub_dict)
cus_auth_dict = np_normalize_dict(cus_auth_dict)

print("== nx authority, hub dictionary ")
print(nx_auth_dict)
print(nx_hub_dict)
print("=="*30)

# Draw the graph
# plt.figure(figsize=(10, 8))
# pos = nx.spring_layout(DG)
# nx.draw(DG, pos, with_labels=True, node_size=3000, node_color="skyblue", font_size=10, font_color="black", arrowsize=20)
# plt.title("Directed Graph")
# plt.show()

In [None]:
print("== custom authority, hub dictionary ")
print(cus_auth_dict)
print(cus_hub_dict)

In [None]:
hubs_df = pd.DataFrame(list(cus_hub_dict.items()), columns=['idx', 'hub_score'])
authorities_df = pd.DataFrame(list(cus_auth_dict.items()), columns=['idx', 'authority_score'])
print(hubs_df.sort_values(by='hub_score', ascending=False))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=False))

In [None]:
print(hubs_df.sort_values(by='hub_score', ascending=True))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=True))

In [None]:
hubs_df = pd.DataFrame(list(nx_hub_dict.items()), columns=['idx', 'hub_score'])
authorities_df = pd.DataFrame(list(nx_auth_dict.items()), columns=['idx', 'authority_score'])
print(hubs_df.sort_values(by='hub_score', ascending=False))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=False))

In [None]:
print(hubs_df.sort_values(by='hub_score', ascending=True))
print()
print()
print(authorities_df.sort_values(by='authority_score', ascending=True))

In [None]:
cus_hub_dict == nx_hub_dict

In [None]:
cus_auth_dict == nx_auth_dict