## Q2.8 ##

### a) ###

In [75]:
import numpy as np
import os
import scipy.io as sio

# Helper functions.
def norm(x):
    return np.sqrt(np.sum(x ** 2))

# Load data.
data_folder = 'PS02_dataSet'
J = sio.loadmat(os.path.join(data_folder, 'pagerank_adj.mat'))['J']
J = np.float32(J)

# Create A and verify column sums.
A = J / np.sum(J, axis=0)
A_col_sums = np.sum(A, axis=0)
max_allowable_err = 1E-5
for col in A_col_sums:
    assert np.abs(col - 1.0) < max_allowable_err, 'No way!'
print('Each column in A sums to 1.')

Each column in A sums to 1.


### b) ###

In [134]:
import plotly.offline as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected=True)

def init_x(length):
    return np.ones(length) / np.float32(length)

power_iters = 14
power_iter_errors = [0]
x_1 = init_x(len(A))
x_1 /= norm(x_1)
identity = np.identity(len(A))
for i in range(power_iters):
    y = np.matmul(A, x_1)
    x_1 = y / norm(y)
    A_x = np.matmul(A, x_1)
    eigenval = np.matmul(x_1.T, A_x)
    err = np.log(norm(A_x - x_1))
    power_iter_errors.append(err)

# Plotting.
trace1 = go.Scatter(
    x = np.arange(len(power_iter_errors)),
    y = np.array(power_iter_errors).astype(np.float32),
    mode = 'lines',
    name = 'Vanilla Power Iteration'
)

data = [trace1]
py.iplot(data, filename='power-iteration-errors')


Casting complex values to real discards the imaginary part



### c) ###

In [135]:
# Shifted inverse power iteration algorithm.
shifted_power_iter_errors = [0]
sigma = 0.99
x_2 = init_x(len(A))
x_2 /= norm(x_2)
shifted_inverse = np.linalg.inv(A - sigma * identity)
for i in range(power_iters):
    y = np.matmul(shifted_inverse, x_2)
    x_2 = y / norm(y)
    A_x = np.matmul(A, x_2)
    err = np.log(norm(A_x - x_2))
    shifted_power_iter_errors.append(err)

# Rayleigh quotient iteration algorithm.
# This one gives the lowest error so we will use its result
# for our PageRank analysis.
rayleigh_iter_errors = [0]
sigma = 0.99
x_3 = init_x(len(A))
x_3 /= norm(x_3)
for i in range(power_iters):
    if i > 1:
        numerator = np.matmul(x_3.T, A_x)
        sigma = numerator / np.matmul(x_3.T, x_3)
    shifted_inverse = np.linalg.inv(A - sigma * identity)
    y = np.matmul(shifted_inverse, x_3)
    x_3 = y / norm(y)
    print(np.sum(x_3))
    A_x = np.matmul(A, x_3)
    err = np.log(norm(A_x - x_3))
    rayleigh_iter_errors.append(err)

# Plotting.
trace2 = go.Scatter(
    x = np.arange(len(shifted_power_iter_errors)),
    y = np.array(shifted_power_iter_errors).astype(np.float32),
    mode = 'lines',
    name = 'Shifted Inverse'
)
trace3 = go.Scatter(
    x = np.arange(len(rayleigh_iter_errors)),
    y = np.array(rayleigh_iter_errors).astype(np.float32),
    mode = 'lines',
    name = 'Rayleigh'
)
data = [trace1, trace2, trace3]
py.iplot(data, filename='all-algos-iteration-errors')

(9.29175448824336+0j)
(9.743079840789322+0j)
(-11.771669880134658+0j)
(8.68909061774447+0j)
(-7.94889825984543+0j)
(7.8916627256569+0j)
(-2.671696759426164+0j)
(3.0251496294347975+0j)
(2.5697279549972256+0j)
(-2.6400031490969655+0j)
(2.645751310551427+0j)
(-2.6457513110645907+0j)
(2.645751311064591+0j)
(-2.6457513110645903+0j)



Casting complex values to real discards the imaginary part


Casting complex values to real discards the imaginary part



### d) ###

In [136]:
k = 10
print(np.sum(x_3))
x = np.abs(x_3)
x = x / np.sum(x)
sorted_indices = x.argsort()
max_indices = sorted_indices[-k:][::-1]
min_indices = sorted_indices[:k][::-1]

# Top scores.
print('The top 10 page indices along with PageRank scores:')
for i in range(k):
    idx = max_indices[i]
    print('Page Index: ' + str(1 + idx) + ', Score: ' + str(x[idx]))

print(' ')
print(' ')

# Bottommost scores.
print('The bottom 10 page indices along with PageRank scores:')
for i in range(k):
    idx = min_indices[i]
    print('Page Index: ' + str(1 + idx) + ', Score: ' + str(x[idx]))

(-2.6457513110645903+0j)
The top 10 page indices along with PageRank scores:
Page Index: 2555, Score: 0.14285714285714288
Page Index: 2552, Score: 0.14285714285714285
Page Index: 2553, Score: 0.14285714285714285
Page Index: 2554, Score: 0.14285714285714285
Page Index: 2556, Score: 0.14285714285714285
Page Index: 2512, Score: 0.14285714285714285
Page Index: 2557, Score: 0.14285714285714285
Page Index: 1612, Score: 1.351317062357362e-34
Page Index: 443, Score: 6.227563127878136e-35
Page Index: 437, Score: 6.168253007762668e-35
 
 
The bottom 10 page indices along with PageRank scores:
Page Index: 316, Score: 2.411455379057349e-120
Page Index: 10, Score: 2.3360406784145444e-120
Page Index: 93, Score: 2.206822379873865e-120
Page Index: 4, Score: 2.0841810929953754e-120
Page Index: 1, Score: 1.5245926609905587e-120
Page Index: 400, Score: 1.3724006377271358e-120
Page Index: 5, Score: 1.2974347307150301e-120
Page Index: 20, Score: 1.0433562301601864e-120
Page Index: 48, Score: 6.744023353664