/
hits_algorithm.py
93 lines (67 loc) · 2.06 KB
/
hits_algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import networkx as nx
from pprint import pprint
import pandas as pd
import numpy as np
def main():
edge_list = [('A', 'D'), ('B', 'C'), ('B', 'E'), ('C', 'A'),
('D', 'C'), ('E', 'D'), ('E', 'B'), ('E', 'F'),
('E', 'C'), ('C', 'H'), ('G', 'A'),
('G', 'C'), ('H', 'A'), ('F', 'B')]
node_set = set()
for edge in edge_list:
node_a, node_b = edge
node_set.add(node_a)
node_set.add(node_b)
node_list = sorted(list(node_set))
G = nx.DiGraph()
G.add_nodes_from(node_list)
G.add_edges_from(edge_list)
adj_matrix = nx.to_numpy_matrix(G)
# adj_list = nx.to_dict_of_lists(G)
# pprint("隣接リスト")
# pprint(adj_list)
df_adj = pd.DataFrame(
adj_matrix,
columns=node_list,
index=node_list,
dtype="int64")
pprint("隣接行列")
pprint(df_adj)
auth, hub = calc_hits_algorithm(adj_matrix=adj_matrix)
df_auth = pd.DataFrame(
auth,
columns=["authority"],
index=node_list
)
df_hub = pd.DataFrame(
hub,
columns=["hubs"],
index=node_list
)
df_result = pd.concat([df_auth, df_hub], axis=1)
pprint("Result")
pprint(df_result)
def calc_hits_algorithm(adj_matrix, iter=60, normalize=True, delta=1e-8):
g, r = adj_matrix.shape
auth = np.ones((g, 1))
hub = np.ones((g, 1))
L = adj_matrix
for k in range(iter):
auth_k_1 = auth
hub_k_1 = hub
auth = L.T @ L @ auth
hub = L @ L.T @ hub
if normalize:
auth_l1_norm = np.linalg.norm(auth, ord=1)
hub_l1_norm = np.linalg.norm(hub, ord=1)
auth = auth / auth_l1_norm
hub = hub / hub_l1_norm
delta_auth = np.linalg.norm(auth - auth_k_1, ord=1)
delta_hub = np.linalg.norm(hub - hub_k_1, ord=1)
if delta_auth < delta and delta_hub < delta:
print("iter count : ", k)
return auth, hub
print("iter count : ", k)
return auth, hub
if __name__ == "__main__":
main()