In [1]:
import numpy as np

class Graph:
    def __init__(self):
        self.Nodes = []

    def search(self, name):
        
        exist = False
        
        for node in self.Nodes:
            if(node.name == name):
                exist = True
                break

        if exist:
            return next(node for node in self.Nodes if node.name == name)
        
        else:                       
            new_node = Node(name)
            self.Nodes.append(new_node)
            return new_node

    def addEdge(self, parent, child):
        parent_node = self.search(parent)
        child_node = self.search(child)
        
        if(child_node.name not in parent_node.children):
            parent_node.children.append(child_node)
        
        if(parent_node.name not in child_node.parents):
            child_node.parents.append(parent_node)

    def display(self):
        for node in self.Nodes:
            print(f'{node.name} links to {[child.name for child in node.children]}')


class Node:
    def __init__(self, name):
        self.name = name
        self.children = []
        self.parents = []
        self.rank = 1.0


In [2]:
def init_graph(fname):
    with open(fname,encoding="utf-8") as f:
        lines = f.readlines()
        
    graph = Graph()

    for line in lines:
        [parent, child] = line.strip().split(',')
        graph.addEdge(parent, child)
        
    graph.Nodes.sort(key=lambda node: int(node.name))

    return graph

In [3]:
def PageRank(g, dampingFactor, num):
    for i in range(num):
        nodeList = g.Nodes
        for node in nodeList:
            pNodes = node.parents
            pageRankSum = sum((pNode.rank / len(pNode.children)) for pNode in pNodes)
            node.rank = (dampingFactor / len(g.Nodes)) + (1-dampingFactor) * pageRankSum
        
        pageRankSum = sum(node.rank for node in g.Nodes)
        for node in g.Nodes:
            node.rank /= pageRankSum

def get_pagerank_list(g):
    pagerank_list = np.asarray([node.rank for node in g.Nodes], dtype='float32')
    return np.round(pagerank_list, 3)

import os
import time
if __name__ == '__main__':

    iteration = 30
    dampingFactor = 0.15
    data_path = './hw3dataset/'
    for file_name in os.listdir(data_path):
        file_path = data_path + file_name
        result_dir = 'result'
        fname = file_path.split('/')[-1].split('.')[0]
        pagerank_fname = '_PageRank.txt'

        graph = init_graph(file_path)

        start = time.time()

        PageRank(graph, dampingFactor, iteration)
        end = time.time()
        
        print(fname)
        pagerank_list = get_pagerank_list(graph)
        print('PageRank:')
        print(pagerank_list)
        print("time: ",end-start)
        print()
        path = os.path.join(result_dir, fname)
        os.makedirs(path, exist_ok=True)
        np.savetxt(os.path.join(path, fname + pagerank_fname), pagerank_list, fmt='%.3f', newline=" ")


IBM
PageRank:
[0.    0.    0.038 0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.01  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.011
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.038 0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.004 0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.