## Problem 1

In [128]:
import numpy as np
import scipy as sp
from scipy import sparse
from scipy import linalg

def adjacency(filename, N):
    vals = []
    with open(filename, 'r') as myfile:
        for line in myfile:
            vals.append(line.strip().split())
    dok_matrix = sp.sparse.dok_matrix((N,N))
    for item in vals:
        try:
            dok_matrix[int(item[0]),int(item[1])] = 1
        except:
            pass
    return sp.sparse.csr_matrix.toarray(dok_matrix)
    
A = np.asarray(adjacency('matrix.txt',8))
print(A)
%timeit adjacency('matrix.txt',8)

[[ 0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  1.  0.  0.  0.  1.  0.]
 [ 1.  0.  0.  0.  0.  1.  1.  0.]
 [ 1.  0.  0.  0.  0.  0.  1.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.]]
1000 loops, best of 3: 411 µs per loop


## Problem 2

In [85]:
def get_D(array):
    """Returns a vector whose elements would make up the diagonal entries of D"""
    D = np.sum(array, axis=1)
    D = np.diag(D)
    return D

def get_Am(array):
    """Modifies A to remove any sinks"""
    Am = np.copy(array)
    n = np.shape(array)[0]
    for i in range(n):
        if ((Am[i,:]) == (np.zeros((1,n)))).all():
            Am[i,:] += 1
    return Am

def get_K(array):
    """Returns the matrix K, callst the functions get_D and get_Am"""
    Am = get_Am(array)
    D_inv = np.linalg.inv(get_D(Am))
    K1 = np.dot(D_inv, Am)
    K = K1.T
    return K

print(get_K(A)) 

[[ 0.          1.          0.125       0.33333333  0.33333333  0.5         1.
   1.        ]
 [ 0.          0.          0.125       0.          0.          0.          0.
   0.        ]
 [ 0.          0.          0.125       0.33333333  0.          0.          0.
   0.        ]
 [ 0.          0.          0.125       0.          0.          0.          0.
   0.        ]
 [ 0.          0.          0.125       0.          0.          0.          0.
   0.        ]
 [ 0.          0.          0.125       0.          0.33333333  0.          0.
   0.        ]
 [ 0.          0.          0.125       0.33333333  0.33333333  0.5         0.
   0.        ]
 [ 1.          0.          0.125       0.          0.          0.          0.
   0.        ]]


## Problem 3

In [163]:
def iterate_rank(array, N=None, d=0.85, tol=1E-5):
    """Uses an iterative method to obtain the page ranks"""
    if N == None:
        m, n = np.shape(array)
        size = m
        K = get_K(array)
    else:
        K = get_K(array[:N,:N])
        size = N
    O = np.ones(size)
    p_old = O/size
    diff = 1
    iterations = 0
    while diff > tol:
        p_new = np.dot(d*K, p_old.T) + ((1-d)/size)*O
        diff = np.linalg.norm(p_new-p_old)
        p_old = p_new
        iterations += 1
    print("Converged after " + str(iterations) + " iterations")
    return p_new

In [164]:
#Test Cases

print(iterate_rank(A))
print(iterate_rank(A,3))

Converged after 64 iterations
[ 0.43868966  0.02171029  0.02786154  0.02171029  0.02171029  0.02786154
  0.04585394  0.39460246]
Converged after 9 iterations
[ 0.48052121  0.25973939  0.25973939]


## Problem 4

In [161]:
def eigenvalue_rank(array, N=None, d=0.85, tol=1E-5):
    """Uses an iterative method to obtain the page ranks"""
    if N == None:
        m, n = np.shape(array)
        size = m
        K = get_K(array)
    else:
        K = get_K(array[:N,:N])
        size = N
    E = np.ones((size,size))
    B = d*K+((1-d)/size)*E
    w, v = sp.linalg.eig(B)
    return (v[:,0]/sum(v[:,0]))

In [162]:
eigenvalue_rank(A)

array([ 0.43869288,  0.02171029,  0.02786154,  0.02171029,  0.02171029,
        0.02786154,  0.04585394,  0.39459924])

## Problem 5

In [175]:
def team_adjacency(filename):
    winners = []
    losers = []
    with open(filename, 'r') as file:
        file.readline()
        games = file.readlines()
    for game in games:
        opponents = game.split(',')
        winners.append(opponents[0])
        losers.append(opponents[1].strip())
    teams = np.column_stack((winners,losers))
    m = np.shape(teams)[0]
    n = np.shape(teams)[1] 
    school, pos = np.unique(teams, return_inverse=True)
    pos = pos.reshape((m,n))
        
    #Set up the network 
    k = len(school)
    network = np.zeros([k,k], dtype=float)
    i = 0
    while i < m:
        network[pos[i,1],pos[i,0]] = 1
        i += 1
    return network, school

In [192]:
from scipy.stats import rankdata

ncaa2013 = (team_adjacency('ncaa2013.csv')[0])
pagerank =iterate_rank(ncaa2013, d=0.7)
ranked = rankdata(pagerank)
maximum = np.argmax(ranked)
school = team_adjacency('ncaa2013.csv')[1]
for i in range(len(ranked)):
    maximum = np.argmax(ranked)
    print(str(school[maximum]) + ": PageRank " + str(pagerank[maximum]))
    ranked = np.delete(ranked, maximum)
    school = np.delete(school, maximum)
    

Converged after 12 iterations
Duke: PageRank 0.009653830227
Butler: PageRank 0.00853460116879
Louisville: PageRank 0.00128053704598
Illinois: PageRank 0.00171226615658
Indiana: PageRank 0.00124628485562
Miami FL: PageRank 0.00508505805005
Syracuse: PageRank 0.00675028050823
Ohio St: PageRank 0.00396105599936
Michigan St: PageRank 0.00187677986107
Kansas: PageRank 0.00130490715983
Minnesota: PageRank 0.00187677986107
Michigan: PageRank 0.00508505805005
Georgetown: PageRank 0.00214122547556
Wisconsin: PageRank 0.00208283799786
St Louis: PageRank 0.00202182927555
New Mexico: PageRank 0.00124258770517
Virginia: PageRank 0.00171619708327
Marquette: PageRank 0.00215492434238
Notre Dame: PageRank 0.00117609873044
NC State: PageRank 0.00201352435388
VA Commonwealth: PageRank 0.00195501359061
Oklahoma St: PageRank 0.00151167359824
Florida: PageRank 0.00308209525487
Villanova: PageRank 0.00126567346679
Temple: PageRank 0.0032859331971
Arizona: PageRank 0.00610642508359
UNLV: PageRank 0.002644483