In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.preprocessing import normalize

## Question1

In [118]:
def pagerank(G, d = 0.85, max_iter = 100, personalized = None):
    # Parameters:
    #     G is the input graph 
    #     d is the dampling factor
    #     max_iter is the maximum iterations 
    #     personalized is the personalized dictionary
    # Return:
    #     nodes' pageranks.
    
    # if not directed, transform it to directed graph
    G = G.to_directed()
    N = G.number_of_nodes()
    
    # the order of nodes
    node_list = np.arange(1, N+1, 1, dtype=int)
    
    # normalize graph matrix by columns
    A = nx.to_numpy_matrix(G, nodelist=node_list)
    A = np.array(A)
    A_normalized = normalize(A, axis=0, norm='l1')
    
    # calculate transition matrix and initial vector
    v = np.ones(N) / N
    v_0 = v
    P = ((1 - d)/ N + d * A_normalized)

    # iterate 
    for i in range(max_iter):
        v_curr = v
        v = ((1 - d) * v_0 + d * P @ v_curr)
        # check convergence
        err = sum(abs(v[n] - v_curr[n]) for n in range(v.shape[0]))
        if err < N * 1.0e-6:
            return v
        
    raise Exception("PageRank didn't converge")

# Question2

In [3]:
f_hollins = open("hollins/hollins.dat", "r")
data_hollins = f_hollins.readlines()
edgelist_hollins = pd.DataFrame(columns=['source', 'target'])

for edge in data_hollins[6013:]:
    edge = edge.split(' ')
    edge[0] = int(edge[0])
    edge[1] = int(edge[1])
    edgelist_hollins.loc[len(edgelist_hollins.index)] = [edge[0], edge[1]]

In [4]:
G_hollin = nx.from_pandas_edgelist(edgelist_hollins,
                                   source='source',
                                   target='target', 
                                   edge_attr=None, 
                                   create_using=nx.DiGraph())

In [120]:
pagerank_hollins = pagerank(G_hollin)
pr = nx.pagerank(G_hollin, alpha=0.85)
print(max(pagerank_hollins))
print(max(pr.values()))

0.011258540172189512
0.020209640931177636


0.020209640931177636
