In [None]:
import numpy as np

# 读数据
file = open('Data.txt', 'r')
G=[] 
for line in file:
    data=line.split()
    edge=(int(data[0]),int(data[1]))
    G.append(edge)


# 统计点的个数
node_set=set()
for edge in G:
    node_set.add(edge[0])
    node_set.add(edge[1])

node_num=len(node_set) # 点的个数

# 验证点的序号是否为 1-node_num
for i in range(1,node_num+1):
    if i not in node_set:
        print('点的序号不连续')
        break

beta=0.8 #按照链接跳转的概率


# 基础版本

In [None]:
# 初始化矩阵

matrix = np.zeros((node_num,node_num))

for edge in G:
    matrix[edge[1]-1][edge[0]-1]=1

for j in range(node_num):
	sum_of_col=sum(matrix[:,j]) # 列元素之和

	# 如果发现dead-end，将其转为随机跳转
	if sum_of_col==0:
		matrix[:,j]=1/node_num
		continue
	
	for i in range(node_num):
	    matrix[i,j]/=sum_of_col

# 用 1/node_num 初始化分数
scores=np.ones((node_num))/node_num

beta=0.8 #按照链接跳转的概率

new_scores=np.zeros((node_num))
# 迭代
k=0 # 迭代次数
e=node_num # 两次迭代之间的误差

while e>1e-6:
	new_scores=beta*np.dot(matrix,scores)+(1-beta)/node_num
	e=sum(abs(new_scores-scores))
	scores=np.copy(new_scores)
	k+=1
	
print(scores)

In [None]:
print(sum(scores))
sorted_indices = (np.argsort(scores)+1)[::-1]
sorted_scores = (np.sort(scores))[::-1]
print('PageRank值从小到大排序：',sorted_indices)
print('PageRank值从小到大排序：',sorted_scores)

In [None]:
with open('result.txt', 'w') as file:
    # 遍历列表的索引和值
    for i in range(len(sorted_indices)):
        # 写入格式化的字符串到文件
        file.write(f"{sorted_indices[i]}：{sorted_scores[i]}\n")

# 稀疏矩阵优化

In [None]:
# 初始化稀疏矩阵
sparse_matrix = [[] for _ in range(node_num)]

for edge in G:
    sparse_matrix[edge[0]-1].append(edge[1]-1)

# 用 1/node_num 初始化分数 
scores=np.ones((node_num))/node_num

e=node_num # 两次迭代之间的误差

beta=0.8 #按照链接跳转的概率

k=0 # 迭代次数

while e>1e-5:
    new_scores=(1-beta)*np.ones((node_num))/node_num
    for i in range(node_num):
        # 如果是dead-end
        if len(sparse_matrix[i])==0:
            new_scores+=beta*scores[i]/node_num
            continue
        
        for j in sparse_matrix[i]:
            new_scores[j]+=beta*scores[i]/len(sparse_matrix[i])
    
    e=sum(abs(new_scores-scores))
    scores=np.copy(new_scores)
    k+=1

print(scores)

In [None]:
print(sum(scores))
sorted_indices = (np.argsort(scores)+1)[::-1]
sorted_scores = (np.sort(scores))[::-1]
print('PageRank值从小到大排序：',sorted_indices)
print('PageRank值从小到大排序：',sorted_scores)

# 分块优化

In [None]:
# 初始化稀疏矩阵
sparse_matrix = [[] for _ in range(node_num)]

for edge in G:
    sparse_matrix[edge[0]-1].append(edge[1]-1)

# 用 1/node_num 初始化分数 
scores=np.ones((node_num))/node_num

e=1 # 两次迭代之间的误差

block_size=2000 # 每次迭代的块大小

block_num=node_num//block_size

remainder=node_num%block_size



new_scores=np.zeros((node_num))

while e>1e-5:
    # 每次处理一块
    e=0
    for i in range(block_num):
        # 初始化该块
        new_scores[i*block_size:(i+1)*block_size]=(1-beta)/node_num
        # 遍历稀疏矩阵
        for j in range(node_num):
            # 遇到dead-end
            if len(sparse_matrix[j])==0:
                new_scores[i*block_size:(i+1)*block_size]+=beta*scores[j]/node_num
                continue
            
            for m in sparse_matrix[j]:
                if m>=i*block_size and m<(i+1)*block_size:
                    new_scores[m]+=beta*scores[j]/len(sparse_matrix[j])
        e+=sum(abs(new_scores[i*block_size:(i+1)*block_size]-scores[i*block_size:(i+1)*block_size]))
    
    
    
    # 处理剩余部分
    new_scores[block_num*block_size:]=(1-beta)/node_num

    for j in range(node_num):
        if len(sparse_matrix[j])==0:
            new_scores[block_num*block_size:]+=beta*scores[j]/node_num
            continue
        for m in sparse_matrix[j]:
            if m>=block_num*block_size:
                new_scores[m]+=beta*scores[j]/len(sparse_matrix[j])
    e+=sum(abs(new_scores[block_num*block_size:]-scores[block_num*block_size:]))
    
    scores=np.copy(new_scores)


print(scores)

In [None]:
print(sum(scores))
print(node_num)
print(sum(scores))
sorted_indices = (np.argsort(scores)+1)[::-1]
sorted_scores = (np.sort(scores))[::-1]
print('PageRank值从小到大排序：',sorted_indices)
print('PageRank值从小到大排序：',sorted_scores)

In [None]:
ls=np.zeros(10)
ls[2:4]+=2
print(ls)

# Block-Stripe优化

In [None]:
block_size=2000 # 每次迭代的块大小

block_num=node_num//block_size

remainder=node_num%block_size

if remainder!=0:
    block_num+=1

stripes = [ {} for _ in range(block_num)]

length=[0 for _ in range(node_num)]

# 初始化稀疏矩阵
for edge in G:
    to_node=edge[1]-1
    from_node=edge[0]-1
    index=to_node//block_size
    if from_node not in stripes[index]:
        stripes[index][from_node]=[]
    length[from_node]+=1

# 处理dead-end
for i in range(node_num):
    is_de=True
    for j in range(block_num):
        if i in stripes[j]:
            is_de=False
            break
    if is_de:
        length[i]=node_num
        for j in range(block_num):
            stripes[j][i]=[j*block_size+m for m in range(block_size)]

scores=np.ones((node_num))/node_num

new_scores=np.zeros((node_num))
e=1
beta=0.8
while e>1e-5:
    e=0;
    for i in range(block_num):
        new_scores[i*block_size:(i+1)*block_size]=(1-beta)/node_num
        for from_node in stripes[i]:
            for to_node in stripes[i][from_node]:
                new_scores[to_node]+=beta*scores[from_node]/length[from_node]
        e+=sum(abs(new_scores[i*block_size:(i+1)*block_size]-scores[i*block_size:(i+1)*block_size]))
    
    # 处理剩余部分
    if remainder!=0:
        new_scores[block_num*block_size:]=(1-beta)/node_num
        for from_node in stripes[block_num-1]:
            for to_node in stripes[block_num-1][from_node]:
                new_scores[to_node]+=beta*scores[from_node]/length[from_node]
        e+=sum(abs(new_scores[block_num*block_size:]-scores[block_num*block_size:]))
    scores=np.copy(new_scores)

print(scores)

In [None]:
print(node_num)