In [102]:
import random as random
import time as time
import heapq as heapq

class HyperNode:
    def __init__(self,id,degree,edges):
        self.id = id
        self.degree = degree
        self.edges = edges

    def __lt__(self,other):
        return self.id<other.id
    
    def print(self):
        print("id:" + str(self.id))
        print("degree:" + str(self.degree))
        print("edges: "+ str(self.edges))

class HyperEdge:
    def __init__(self,id,degree,nodes):
        self.id = id
        self.degree = degree 
        self.nodes = nodes
        
    def print(self):
        print("id:" + str(self.id))
        print("degree:" + str(self.degree))
        print("edges: "+ str(self.nodes))

# # return: mp_node,mp_edge
# def load_data(path):
#     # input:
#     # path: data set path
    
#     # return: 
#     # mp_node: dict of partition node n_id:HyperNode
#     # mp_edge: dict of partition edge e_id:HyperEdge
    
#     mp_node = {}
#     mp_edge = {}
#     with open(path,'r') as f:
#         for line in f:
#             if(line[0]=='#' or line=='\n') : continue
#             data = [int(i) for i in line[0:-1].split(" ")]
#             n_id = data[0]
#             n_degree = data[1]
#             n_edges = set(data[2:])
#             if mp_node.get(n_id) == None : 
#                 mp_node[n_id] = HyperNode(n_id,n_degree,n_edges)
#                 tmp[n_id] = HyperNode(n_id,n_degree,n_edges)
#             for edge in n_edges:
#                 if mp_edge.get(edge) == None:
#                     mp_edge[edge] = HyperEdge(edge,0,set())
#                 mp_edge[edge].degree += 1
#                 mp_edge[edge].nodes.add(n_id)
#     return mp_node,mp_edge

# return: mp_node,mp_edge
def load_data(path):
    # input:
    # path: data set path
    
    # return: 
    # mp_node: dict of partition node n_id:HyperNode
    # mp_edge: dict of partition edge e_id:HyperEdge
    
    mp_node = {}
    mp_edge = {}
    with open(path,'r') as f:
        for line in f:
            n_id,e_id = line[0:-1].split(" ")
            if mp_node.get(n_id) == None : 
                mp_node[n_id] = HyperNode(n_id,0,set())
            if mp_edge.get(e_id) == None :
                mp_edge[e_id] = HyperEdge(e_id,0,set())
                
            mp_node[n_id].degree += 1
            mp_edge[e_id].degree += 1
            mp_node[n_id].edges.add(e_id)
            mp_edge[e_id].nodes.add(n_id)
    return mp_node,mp_edge

# return: None
def recoder(part_node,mp_node,mp_edge,path):
    # input:
    # part_node: result of partition node
    # mp_node: dictionary of hyper node 
    # mp_edge: dictionary of hyper edge
    # path: path to save record result
    
    dic = {}
    tot = 0
    for par in part_node:
        for node in par:
            dic[node.id] = len(dic)

    print(path+"/record.txt")
    with open(path+"/record.txt",'w') as f:
        for par in part_node:
            for node in par:
                for edge in node.edges:
                    f.write(str(dic[node.id])+" "+str(edge)+"\n")
# return: part_node,part_edge

def solve(p,mp_node,mp_edge,prop = 1.0,debug = False):
    # input : 
    # p: partition number 
    # mp_node: dict of partition nodes  
    # mp_edge: dict of partition edges
    # prob: accelerate factory to add node
    # debug: print debug infomation
    
    # return :
    # part_node: list of partition node set 
    # part_edge: list of partition edge set
    
    random.seed(19990320)
    node_number = len(mp_node)
    cur_p = 0
    maxi_cap = node_number/p + 1
    part_node = [set() for i in range(p)]
    part_edge = [set() for i in range(p)]
    mp_eval = {i:0 for i in mp_node.keys()}
    
    ave_hyperedge_degree = 0
    for edge in mp_edge.values():
        ave_hyperedge_degree += edge.degree/len(mp_edge)

    cnt = 0
    search_set = {}
    prop = 1
    k = 3
    while len(mp_node)!=0:
        cnt += 1
        if cnt % 100 == 0 and debug : print (cnt)
            
        add_node = []
        select_node = []
        
        for node in mp_node.values():   
            if mp_eval[node.id]/node.degree >= prop:
                add_node.append(node)
                continue
            if  len(select_node) < k:
                heapq.heappush(select_node,(mp_eval[node.id],node))
            else :
                if mp_eval[node.id] > select_node[0][0] :
                    heapq.heappop(select_node)
                    heapq.heappush(select_node,(mp_eval[node.id],node))
        
        
        for rank,node in select_node : 
            add_node.append(node)
#         print(add_node)
        for par_node in add_node:
            if len(part_node[cur_p]) >= maxi_cap :
                break
            part_node[cur_p].add(par_node)
            for edge in par_node.edges:
                if edge not in part_edge[cur_p]:
                    part_edge[cur_p].add(edge)
                    if mp_edge[edge].degree > 2*ave_hyperedge_degree : continue
                    for node in mp_edge[edge].nodes:
                        if mp_node.get(node) == None : continue
                        search_set[node] = mp_node[node]
                        mp_eval[node] += 1
            del mp_node[par_node.id] 
            search_set[par_node.id] = 0
            del search_set[par_node.id]

        if len(part_node[cur_p]) >= maxi_cap : # next partition pre-process
            for key in mp_eval.keys():
                mp_eval[key] = 0
            cur_p += 1   
            
    return part_node,part_edge





In [103]:
path = "./data/wiki/wiki.txt"         
p = 8
mp_node,mp_edge = load_data(path)
time_beg = time.time()
part_node,part_edge = solve(p,mp_node,mp_edge)  
time_end = time.time()
mp_node,mp_edge = load_data(path)
recoder(part_node,mp_node,mp_edge,"./data/wiki")

print("runtime:",int((time_end-time_beg)*1000),"ms")
k_1 = sum([len(i) for i in part_edge]) - len(mp_edge)
print("p:"+str(p)+" k-1:"+str(k_1))

    

./data/wiki/record.txt
runtime: 1969 ms
p:8 k-1:12929


In [6]:
path = "./data/wiki/wiki.txt"
p = 1
while p<=32:
    mp_node,mp_edge = load_data(path)
    p *= 2 
    time_beg = time.time()
    part_node,part_edge = solve(p,mp_node,mp_edge)  
    time_end = time.time()
    
    print("runtime:",int((time_end-time_beg)*1000),"ms")
    for key,value in tmp.items():
        mp_node[key] = value
    k_1 = sum([len(i) for i in part_edge]) - edge_number
    print("p:"+str(p)+" k-1:"+str(k_1))

    
        
    

        

AttributeError: 'NoneType' object has no attribute 'edges'

In [23]:
tmp_node = [set() for i in range(p)]

for par in range(len(part_edge)):
    for i in part_node[par]: tmp_node[par].add(i.id)
    cnt = 0
    print("edges:"+str(len(part_edge[par])))
    print("nodes:"+str(len(part_node[par])))
    for edge in part_edge[par]:
        for node in mp_edge[edge].nodes:
#             print(node)
#             print(mp_node[node])
            if node in tmp_node[par]:
                cnt += 1
    cur_p += 1
    print(cnt)
    

edges:56433
nodes:5653
277912
edges:25730
nodes:5653
59056
edges:15618
nodes:5653
28610
edges:7641
nodes:5653
14208
edges:7349
nodes:5653
12455
edges:7531
nodes:5653
11444
edges:7756
nodes:5653
10089
edges:8023
nodes:5653
9310
edges:7767
nodes:5653
8748
edges:7984
nodes:5642
8405


In [4]:
k_1 = sum([len(i) for i in part_edge]) - edge_number
print(k_1)

30965
