# Search Engine Based on Counting In-Links

In [2]:
import numpy as np

In [3]:
# Build an adjacency matrix based on whether the two points are connected

A = np.array([[0,1,0,0,1,0],
     [1,0,1,0,1,0],
     [0,0,0,0,1,0],
     [0,0,1,0,0,1],
     [0,1,0,1,0,0],
     [0,0,0,0,0,0]])
A

array([[0, 1, 0, 0, 1, 0],
       [1, 0, 1, 0, 1, 0],
       [0, 0, 0, 0, 1, 0],
       [0, 0, 1, 0, 0, 1],
       [0, 1, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [7]:
# count-in links
def count_out_links(A):
    c = A.sum(axis=0)
    return c
count_out_links(A)

array([1, 2, 2, 1, 3, 1])

In [14]:
# Calculate Probability Matrix
def compute_P(A):
    temp = count_out_links(A)
    P = (A@np.diag(1/temp))
    return P
P = compute_P(A)

In [15]:
def random_walk_one_step(P, x):
    y = P@x
    return y
x =  np.ones(6)/6
random_walk_one_step(P,x)

array([0.13888889, 0.30555556, 0.05555556, 0.25      , 0.25      ,
       0.        ])

In [17]:
def all_close(x, y, tol=0.01):
    c = np.allclose(x,y,atol=tol)
    return c


True

In [18]:
def random_walk(P, x, tol=0.01, max_steps=100):
    for i in range(max_steps):
        if all_close(random_walk_one_step(P, x), x, tol):
            break
        x = random_walk_one_step(P, x)
    return x

In [20]:
def search_engine_v1(A):
    n = A.shape[0]  
    x = np.ones(n)/n
    P = compute_P(A)
    s = random_walk(P,x)
    s = np.argsort(s)[::-1]
    return s
search_engine_v1(A)

array([1, 0, 4, 2, 3, 5], dtype=int64)