In [1]:
import random

from collections import defaultdict

# 读入data文件，构建网络(作者-会议、会议-作者、作者-作者)

In [2]:
def BuildNet(filename):
    con_num, aut_num = 0, 0
    con_dict = {}
    aut_dict = {}

    with open(filename, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    '''
    统计所有会议的信息以及所有作者的信息。
    '''
    for line in lines:
        line = line.strip()
        conference = line.split('$')[0]    # 会议信息
        authors = line.split('$')[1].split(';')[:-1]    # 作者(可能大于1)
        
        if conference not in con_dict:
            con_num += 1
            con_dict[conference] = con_num
        for aut in authors:
            if aut not in aut_dict:
                aut_num += 1
                aut_dict[aut] = aut_num
    '''
    构建三种字典
    '''
    C_A, A_C, A_A = {}, {}, {}
    for key, value in con_dict.items():
        C_A[key] = defaultdict(int)    # 作者-会议字典
    for key, value in aut_dict.items():
        A_A[key] = defaultdict(int)
        A_C[key] = defaultdict(int)
        
    for line in lines:
        line = line.strip()
        conference = line.split('$')[0]    # 会议信息
        authors = line.split('$')[1].split(';')[:-1]    # 作者(可能大于1)
        
        for aut in authors:
            C_A[conference][aut] += 1
            A_C[aut][conference] += 1
            for aut2 in authors:
                if aut != aut2:
                    A_A[aut][aut2] += 1
                    A_A[aut2][aut] += 1
    
    return C_A, A_C, A_A

        
# 获取几种网络
CAnet, ACnet, AAnet = BuildNet('data/data.txt')

# Step 0: Initialization

In [22]:
K = 15    # 类别数目
alpha = 0.95
delta = 0.000001    # 判断是否收敛
iter_num = 20    # 迭代次数

In [4]:
'''
将会议初始化成K个类
'''
def init_cluster():
    cluster = [[] for i in range(K)]    # 每个类一个列表
    for k in CAnet.keys():
        randnum = random.randint(0, K-1)
        cluster[randnum].append(k)
    return cluster

Cluster = init_cluster()

# Step 1: Ranking for each cluster

In [92]:
'''
实现 Simple Ranking
'''
def SimpleRanking(confer_author, author_confer, cluster):
    print("Ranking...")
    rank_confer, rank_author, rank_allconfer = defaultdict(int), defaultdict(int), defaultdict(int)
    # 统计会议和作者的排名
    con_totsum = 0
    aut_totsum = 0
    for con, value in confer_author.items():
        if con in cluster:    # 只计算这个类里的会议
            rank_confer[con] = 0
            for key, value in confer_author[con].items():    # 所有指向这个会议的作者求和
                rank_confer[con] += value
                con_totsum += value
                
                rank_author[key] += value    # 作者在这个会议发表的论文数量
                aut_totsum += value
    for key, value in rank_confer.items():    # 对排名得分归一化
        rank_confer[key] /= con_totsum
    for key, value in rank_author.items():    # 对排名得分归一化
        rank_author[key] /= aut_totsum
    
    # 最后计算这个类别条件下，所有会议的rank。将会议排名再乘上作者排名————是否要归一化？经过输出发现，第一轮的时候貌似不同类别总和差的不是很大
    golbal_sum = 0
    for confer, value in confer_author.items():
        for author,value in confer_author[confer].items():
            rank_allconfer[confer] += value * rank_author[author]
            golbal_sum +=  value * rank_author[author]
    for confer,value in rank_allconfer.items():
        rank_allconfer[confer] /= golbal_sum
        
    return rank_confer, rank_author, rank_allconfer

'''
实现 Authority Ranking
'''
def AuthorityRanking(confer_author, author_confer, author_author, cluster, alpha):
    print("Ranking...")
    rank_confer, rank_author, rank_allconfer = defaultdict(int), defaultdict(int), defaultdict(int)
    
    # 记录类别中论文的数量以及所有作者的数量
    tot_confer = len(cluster)
    tot_author = len(author_confer)
    # 对会议、作者排名进行初始化
    for confer in cluster:
        rank_confer[confer] = 1.0 / tot_confer
    for author,value in author_confer.items():
        rank_author[author] = 1.0 / tot_author
    
    # 循环更新 conference 和 author 的rank值
    iternum = 0    # 迭代次数
    
    while iternum < 5:
        iternum += 1    # 迭代次数
        print("iter num: {}".format(iternum))
        
        # --------先计算rank_confer--------
        confer_sum = 0
        for confer in cluster:
            rankc = 0
            for author in confer_author[confer]:
                rankc += confer_author[confer][author] * rank_author[author]
            rank_confer[confer] = rankc
            confer_sum += rankc
        
        # confer排名归一化
        for confer in rank_confer:
            rank_confer[confer] /= confer_sum

        # --------计算author_rank--------
        save_rankc = rank_author.copy()    # 上一轮迭代的作者排名
        author_sum = 0
        
        for author in rank_author:
            acsum = 0    # 作者与会议之间的影响
            for confer in author_confer[author]:
                acsum += author_confer[author][confer] * rank_confer[confer]
            aasum = 0    # 作者之间相互影响
            for a2 in author_author[author]:
                aasum += author_author[author][a2] * save_rankc[a2]
            rank_author[author] = alpha*acsum + (1-alpha)*aasum
            author_sum += rank_author[author]
        # author排名归一化
        for author in rank_author:
            rank_author[author] /= author_sum
            

        # 最后计算这个类别条件下，所有会议的rank。
        golbal_sum = 0
        for confer, value in confer_author.items():
            for author,value in confer_author[confer].items():
                rank_allconfer[confer] += value * rank_author[author]
                golbal_sum +=  value * rank_author[author]
        for confer,value in rank_allconfer.items():
            rank_allconfer[confer] /= golbal_sum
        
        
    
    return rank_confer, rank_author, rank_allconfer

In [95]:
rc, ra, rcg = AuthorityRanking(CAnet, ACnet, AAnet, Cluster[0], alpha)

Ranking...
iter num: 1
iter num: 2
iter num: 3
iter num: 4
iter num: 5


# Step 2: Get new attributes for objects and cluster

In [73]:
dicta = {'aa':1, 'bb':2, 'cc':3}
for ele in dicta:
    print(ele)
dictb = dicta.copy()
dictb['aa'] = 1231313
print(dicta, dictb)

aa
bb
cc
{'aa': 1, 'bb': 2, 'cc': 3} {'aa': 1231313, 'bb': 2, 'cc': 3}


# Step 3: Adjust each object