# Fast Pagerank Implementation (Moler PageRank)
I needed a fast PageRank for Wikisim project, it has to be fast enough that can run in real time on relatively small graphs. I started from optimizing the networkx, wowever, I found a very nice algorithm algorithms by **Cleve Mole** which takes the full advantage of sparse matrix operations. 
Two implemenations are provided, both inspired  by the sparse fast solutions given in **Cleve Moler**'s book, [*Experiments with MATLAB*](http://www.mathworks.com/moler/index_ncm.html). The power method is much faster with enough precision for our task. Our benchmarsk shows that this implementation is **faster than networkx** implementation magnititude of times

## Personalized Pagerank
I also modified the algorithm a little bit, to be able to calculate **personalized Pagerank** as well. 

## Input Format
The input is a 2d array, each row of the array is an edge of the graph $[[a,b], [c,d]]$, $a$ and $b$ are the node numbers. The **personalization vector** is a **teleporting vector** of preference (or customaization).
## Comparison with Networkx
Both of the implementation (Exact Solution and PowerMethod) are much faster than their correspondent method in NetworkX


In [9]:
"""Two "fast" implementations of PageRank.

Pythom implementations of Matlab original in Cleve Moler, Experiments with MATLAB.
"""
# uncomment
from __future__ import division

import scipy as sp
import scipy.sparse as sprs
import scipy.spatial
import scipy.sparse.linalg 


__author__ = "Armin Sajadi"
__copyright__ = "Copyright 215, The Wikisim Project"
__credits__ = ["Armin Sajadi"]
__license__ = "GPL"
__version__ = "1.0.1"
__maintainer__ = "Armin Sajadi"
__email__ = "sajadi@cs.dal.ca"
__status__ = "Development"


def create_csr(Z):
    """ Creates a csr presentation from 2darray presentation
    Args:
        Z: input graph in the form of a 2d array, such as sp.array([[2,0], [1,2], [2,1]])
    Returns:
        a csr representation
    
    """   
    rows = Z[:,0];
    cols = Z[:,1];
    n = max(max(rows), max(cols))+1;
    G=sprs.csr_matrix((sp.ones(rows.shape),(rows,cols)), shape=(n,n));
    return G

def moler_pagerank_sparse(G, p, pv):
    """ Calculates pagerank given a csr graph
    
    Args:
        G: a csr graph.
        p: teleporting probability
        pv: vector of probability distrib. over the nodes 
    Returns:
        Pagerank scores for the nodes
     
    """
    # In Moler's algorithm, $G_{ij}$ represents the existences of an edge
    # from node $j$ to $i$, while we have assumed the opposite!
    G=G.T

    n,_=G.shape
    c=sp.asarray(G.sum(axis=0)).reshape(-1)

    k=c.nonzero()[0]

    D=sprs.csr_matrix((1/c[k],(k,k)),shape=(n,n))

    pv=pv.reshape(n,1)
    e=n*pv 

    I=sprs.eye(n)
    x = sprs.linalg.spsolve((I - p*G.dot(D)), e);

    x=x/x.sum()
    return x
def moler_pagerank_sparse_power(G, p, tol, pv):
    """ Calculates pagerank given a csr graph
    
    Args:
        G: a csr graph.
        p: teleporting probability
        tol: threshold for convergence
        pv: vector of probability distrib. over the nodes 
        
    Returns:
        Pagerank Scores for the nodes
     
    """
    # In Moler's algorithm, $G_{ij}$ represents the existences of an edge
    # from node $j$ to $i$, while we have assumed the opposite!
    G=G.T

    n,_=G.shape
    c=sp.asarray(G.sum(axis=0)).reshape(-1)

    k=c.nonzero()[0]

    D=sprs.csr_matrix((1/c[k],(k,k)),shape=(n,n))

    pv=pv.reshape(n,1)
    e=(pv/pv.sum())*n 
    
    
    z = (((1-p)*(c!=0) + (c==0))/n)[sp.newaxis,:]
    G = p*G.dot(D)
    
    x = e/n 
    oldx = sp.zeros((n,1));
    
    while sp.linalg.norm(x-oldx) > tol:
        oldx = x
        x = G.dot(x) + e.dot(z.dot(x))
    x = x/sum(x)
    
    return x.reshape(-1)

# Testing the algorithm

In [10]:
#temp
import networkx as nx
import random
import timeit
import numpy as np
min_size=100
max_size=300
m=10

passed=True
for i in range(m):
    G_size = random.randint(min_size,max_size)
    p=random.uniform(0.1, 0.5)
    G=nx.fast_gnp_random_graph(G_size, p, seed=None, directed=True)
    A=nx.to_scipy_sparse_matrix(G)
    
    #netx_pagerank[i] = timeit.timeit(lambda: nx.pagerank(G, alpha=0.85, tol=1e-03), number=n)/n
    e=np.random.random(G_size)
    ex=dict(enumerate(e.reshape(-1)))
    
    Xnx  = nx.pagerank_numpy(G, alpha=0.85,personalization=ex) 
    Xnx =  np.array([v for k,v in Xnx.iteritems() ])
    Ynx  = nx.pagerank_scipy(G, alpha=0.85, tol=1e-06, personalization=ex)
    Ynx =  np.array([v for k,v in Ynx.iteritems() ])
    
    Xml =  moler_pagerank_sparse(A, p=0.85, pv=e)
    Yml =  moler_pagerank_sparse_power(A, p=0.85, tol=1e-06, pv=e)
    
    #print all(X==Y)    
    good_direct = all(np.isclose(Xnx, Xml, rtol=1e-5, atol=0, equal_nan=False))    
    good_power  = all(np.isclose(Xnx, Yml, rtol=1e-5, atol=0, equal_nan=False))    
    if not good_direct or not good_power :
        passed=False
        print "Failed"
        break
if passed:
    print "Passed"


Passed
