In [1]:
from math import sqrt
from pygraph.classes.digraph import digraph
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
data = pd.read_csv("relation_message.csv",header=None,names=['begin','end','relationship'])

In [3]:
class HITSIterator:
    __doc__ = "FILE_PATH"
    
    def __init__(self,dg):
        # 最大迭代次数
        self.max_iterations = 100
        # 迭代结束条件
        self.min_delta = 0.0001
        self.graph = dg
        
        self.hub = {}
        self.authority = {}
        for node in self.graph.nodes():
            self.hub[node] = 1
            self.authority[node] = 1
            
    def hits(self):
        # 计算每个页面的hub、authority值
        
        if not self.graph:
            return
        
        flag = False
        # 遍历迭代
        for i in range(self.max_iterations):
            # 记录每轮的变化值
            change = 0.0
            # 标准化系数
            norm = 0
            tmp = {}
            # 计算每个页面的authority值
            tmp = self.authority.copy()
            for node in self.graph.nodes():
                self.authority[node] = 0
                # 遍历所有入射页面
                for incident_page in self.graph.incidents(node):
                    self.authority[node] += self.hub[incident_page]
                norm += pow(self.authority[node],2)
            # 标准化
            norm = sqrt(norm)
            for node in self.graph.nodes():
                self.authority[node] /= norm
                change += abs(tmp[node] - self.authority[node])
                
            # 计算每个页面的hub值
            norm = 0
            tmp = self.hub.copy()
            for node in self.graph.nodes():
                self.hub[node] = 0
                # 遍历所有出射页面
                for neighbor_page in self.graph.neighbors(node):
                    self.hub[node] += self.authority[neighbor_page]
                norm += pow(self.hub[node],2)
            # 标准化
            norm = sqrt(norm)
            for node in self.graph.nodes():
                self.hub[node] /= norm
                change += abs(tmp[node] - self.hub[node])
                
#             print("This is No. %s iteration" % (i + 1))
#             print("authority",self.authority)
#             print("hub",self.hub)
            
            if change < self.min_delta:
                flag = True
                break
        if flag:
            print("finished in %s iterations!" % (i + 1))
        else:
            print("finished out of 100 iterations!")
            
#         print("===============================================================")
#         print("The best authority page:",self.authority.items())
#         print("===============================================================")
#         print("The best hub page:",self.hub.items())
        return self.authority.items(),self.hub.items()

In [4]:
data

Unnamed: 0,begin,end,relationship
0,tonys,rhody,friend
1,tonys,hawke,friend
2,tonys,stever,friend
3,tonys,falcon,friend
4,tonys,nickf,friend
...,...,...,...
1139,stanlee,eriks,friend
1140,stanlee,tonys,friend
1141,stanlee,rhody,friend
1142,stanlee,thor,friend


In [5]:
nodes = pd.read_csv("message.csv",header=None,names=['name','fullname','isalive','intro'])

In [6]:
nodes

Unnamed: 0,name,fullname,isalive,intro
0,tonys,Tony Stark,alive,human
1,stever,Steve Rogers,alive,human
2,bruceb,Bruce Banner,alive,human
3,thor,Thor,alive,asgardian
4,blackw,Natasha Romanoff,alive,human
...,...,...,...,...
177,ebonym,The Ebony Maw,alive,unknown
178,obsidianc,Obsidian Cull,alive,unknown
179,outrider,Outriders,alive,unknown
180,bettyb,Betty Brant,alive,human


In [7]:
dg = digraph()

In [8]:
nodes_list = list(nodes['name'].values)
dg.add_nodes(nodes_list)
for row in data.values:
    try:
        dg.add_edge((row[0],row[1]))
    except:
        continue

In [9]:
hits = HITSIterator(dg)
auth_val,hub_val = hits.hits()

finished in 22 iterations!


In [10]:
auth_val

dict_items([('tonys', 0.024694475587717042), ('stever', 0.19393878730470748), ('bruceb', 0.14792659237199662), ('thor', 0.13765902683286393), ('blackw', 0.19272166768830978), ('rhody', 0.09805639238120958), ('hawke', 0.1930822997431318), ('nickf', 0.21817504605873023), ('falcon', 0.21404707592285538), ('vision', 0.24130448834716214), ('wandam', 0.23554259237557076), ('pietrom', 0.1886161288250593), ('ultronu', 0.19713916931101982), ('helenc', 0.03267444879075297), ('ultrons', 0.19537510871697691), ('mariah', 0.16978791033119273), ('wolfgangv', 0.17261388462325944), ('list', 4.00637999271691e-49), ('peggyc', 0.052715911858342165), ('heimdall', 0.07740667753552014), ('laurab', 0.019726246400506808), ('madameb', 0.02652120032584049), ('ulyssesk', 0.023181483195289898), ('pepperp', 0.03010524125660259), ('happyh', 0.03675203446932099), ('howardso', 0.032415716392897004), ('marias', 0.03120550402351984), ('obadiahs', 0.03720382740797158), ('yinsen', 0.03010524125660259), ('raza', 0.03028884

In [11]:
print(len(list(auth_val)))
authdf = pd.DataFrame(auth_val)

182


In [12]:
authdf

Unnamed: 0,0,1
0,tonys,2.469448e-02
1,stever,1.939388e-01
2,bruceb,1.479266e-01
3,thor,1.376590e-01
4,blackw,1.927217e-01
...,...,...
177,ebonym,1.119506e-25
178,obsidianc,1.347455e-25
179,outrider,1.466241e-25
180,bettyb,2.748816e-03


In [20]:
authdf.sort_values(by=1,ascending=False)[:10]

Unnamed: 0,0,1
9,vision,0.241304
10,wandam,0.235543
7,nickf,0.218175
8,falcon,0.214047
12,ultronu,0.197139
14,ultrons,0.195375
1,stever,0.193939
6,hawke,0.193082
4,blackw,0.192722
11,pietrom,0.188616


In [19]:
authdf[:10]

Unnamed: 0,0,1
0,tonys,0.024694
1,stever,0.193939
2,bruceb,0.147927
3,thor,0.137659
4,blackw,0.192722
5,rhody,0.098056
6,hawke,0.193082
7,nickf,0.218175
8,falcon,0.214047
9,vision,0.241304


In [23]:
hubdf = pd.DataFrame(hub_val)
hubdf

Unnamed: 0,0,1
0,tonys,3.866753e-01
1,stever,3.602632e-01
2,bruceb,2.816675e-01
3,thor,3.618400e-01
4,blackw,3.406414e-01
...,...,...
177,ebonym,2.190653e-26
178,obsidianc,1.141568e-26
179,outrider,0.000000e+00
180,bettyb,2.153191e-04


In [26]:
hubdf.sort_values(by=1,ascending=False)[:10]

Unnamed: 0,0,1
0,tonys,0.386675
3,thor,0.36184
1,stever,0.360263
4,blackw,0.340641
2,bruceb,0.281668
5,rhody,0.256534
6,hawke,0.253366
79,bucky,0.214986
52,loki,0.197131
8,falcon,0.170805
