# Imports and defs for lecture

In [None]:
# These are the standard imports for CS 111. 
# This list may change as the quarter goes on.

import os
import math
import time
import struct
import json
import pandas as pd
import networkx as nx
import numpy as np
import numpy.linalg as npla
import scipy
import scipy.sparse.linalg as spla
from scipy import sparse
from scipy import linalg
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
%matplotlib tk


In [None]:
def make_M_from_E(E):
    """Make the PageRank matrix from the adjacency matrix of a graph.
        Not for sparse matrices.
    """
    n = E.shape[0]
    outdegree = np.sum(E,0)
    for j in range(n):
        if outdegree[j] == 0:
            E[:,j] = np.ones(n)
            E[j,j] = 0
    A = E / np.sum(E,0)
    S = np.ones((n,n)) / n
    m = 0.15
    M = (1 - m) * A + m * S
    return M

In [None]:
np.set_printoptions(precision = 4)

# Lecture starts here

In [None]:
E2 = np.load('PageRankEG2.npy')
E2

In [None]:
#spy plot example using E2 (simple)
%matplotlib inline
plt.spy(E2)

In [None]:
# Calculating the Link Matrix A:
A = E2 / np.sum(E2,0)
A

In [None]:
# Calculate the eigenvalues/vectors for the Link Matrix A:
n = A.shape[0]
d,V = linalg.eig(A)
d

In [None]:
# Create the Markov Matrix M from A
# Alternatively, we can the make_M_from_E() function
m = .15
M = (1-m) * A + m * np.ones((n,n))/n
M

In [None]:
d,V = linalg.eig(M)
V[:,1]

In [None]:
make_M_from_E(E2)

## *Example with Harvard webpages (500 nodes)*

In [None]:
E3 = np.load('PageRankEG3.npy')

In [None]:
E3.shape

In [None]:
%matplotlib inline
plt.spy(E3)

In [None]:
with open('PageRankEG3.nodelabels') as f:
    labels = f.read().splitlines()

In [None]:
for i in range(10):
    print(i, labels[i])

In [None]:
M = make_M_from_E(E3)

In [None]:
M.shape

In [None]:
d, V = linalg.eig(M)

In [None]:
d[0]

In [None]:
v = V[:,0]

In [None]:
perm = np.argsort(v)[::-1]

In [None]:
perm[:10]

In [None]:
for i in range(10):
    print(i, labels[perm[i]])

## Google Web small subset example (>900k pages)

In [None]:
E = sparse.load_npz('webGoogle.npz')

In [None]:
E.shape

In [None]:
# Too big, won't plot in a comprehensive way
%matplotlib inline
plt.spy(E)

In [None]:
d, V = spla.eigs(E)

In [None]:
d

## Iterative Method

In [None]:
E = np.load('PageRankEG1.npy')
E

In [None]:
M = make_M_from_E(E)
M

In [None]:
# Iterative method begins here:
# Set x with an initial value
x = np.ones(4)/4
x

In [None]:
# Power Method
for i in range(25):
    x = M @ x
    x = x / npla.norm(x)
    print(i, x)

In [None]:
# Compare with numpy eigenvalue function:
d, V = linalg.eig(M)
V[:,0].real