In [1]:
import numpy as np
from numba import njit
import json

## Loading json

In [2]:
with open('web_structure.json', 'r') as json_file:
    webs_dict = json.load(json_file)

## Creating matrix based on webpages' references

In [3]:
def dict_to_adjacency_matrix(input_dict):
    elements_set = set()

    def process_dict(d):
        for key, value in d.items():
            elements_set.add(key)
            if isinstance(value, dict):
                process_dict(value)

    process_dict(input_dict)
    elements_list = list(reversed(list(elements_set)))
    index_map = {element: idx for idx, element in enumerate(elements_list)}
    size = len(elements_list)
    adjacency_matrix = np.zeros((size, size), dtype=int)

    def fill_adjacency_matrix(d, source_idx):
        for key, value in d.items():
            target_idx = index_map[key]
            adjacency_matrix[source_idx, target_idx] = 1
            if isinstance(value, dict):
                fill_adjacency_matrix(value, target_idx)

    for key in input_dict:
        source_idx = index_map[key]
        fill_adjacency_matrix(input_dict[key], source_idx)

    return adjacency_matrix, elements_list

In [4]:
adj_matrix, elements = dict_to_adjacency_matrix(webs_dict)

In [5]:
print(len(elements))

99


## Normalization of the matrix

In [6]:
def normalize_adjacency_matrix(adj_matrix):
    row_sums = adj_matrix.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1
    normalized_matrix = adj_matrix / row_sums
    return normalized_matrix

In [7]:
normalized_matrix = normalize_adjacency_matrix(adj_matrix)

## Calculation of final ranks using njit(accelerator)

In [10]:
# @njit
def vector_calculator(h_matrix: np.ndarray, epsilon: float = 1e-7, max_iterations: int = 1000):
    h_matrix = h_matrix.astype(np.float64)
    n = h_matrix.shape[0]
    v_0 = np.full(n, 1 / n, dtype=np.float64)
    iteration = 0
    while iteration < max_iterations:
        v_1 = np.dot(h_matrix, v_0)
        if np.linalg.norm(v_1 - v_0) < epsilon:
            break
        v_0 = v_1
        iteration += 1
    
    return v_1

In [11]:
print(sum(vector_calculator(normalized_matrix)))

0.8124744354668184


#### As we can see the sum is not equal to 1.

## Converting zero rows

In [12]:
def convert_zero_rows(h_matrix: np.ndarray = None) -> np.ndarray:
    n = h_matrix.shape[0]
    for i in range(n):
        if np.all(h_matrix[i] == 0):
            h_matrix[i] = np.full(n, 1/n)
            
    return h_matrix

In [13]:
converted_matrix = convert_zero_rows(normalized_matrix)

In [14]:
print(sum(vector_calculator(converted_matrix)))

0.999999999999998


## Creation of G matrix (included dampling factor)

In [15]:
def g_matrix_calculation(no_zero_row_matrix: np.ndarray, d = 0.85):
    n = no_zero_row_matrix.shape[0]
    return d * no_zero_row_matrix + (1-d) * np.full((n, n), 1 / n)

In [17]:
g_matrix = g_matrix_calculation(converted_matrix)

## Vector calculation with matrix G

In [19]:
print(sum(vector_calculator(g_matrix)))

0.9999999999999984
