In [1]:
import torch

t = torch.arange(10, 14)
index = torch.tensor([True, True, False, False])
t[index]= torch.tensor([1, 1])
t

tensor([ 1,  1, 12, 13])

## PPR HOMEWORK

In [3]:
indptr = torch.tensor([0, 1, 2, 3])
indices = torch.tensor([1, 2, 0])
degree = torch.tensor([1, 1, 1])
g = (indptr, indices, degree)


def approx_ppr(g_, a_, t_, e_):
    indptr_, indices_, degree_ = g_
    num_nodes = degree_.size(-1)

    # initialize
    p = torch.zeros(num_nodes)
    r = torch.zeros(num_nodes)
    r[t_] = a_

    threshold = a_ * e_ * degree_
    while True:
        print('p: ', p, ' r: ', r)
        mask = r > threshold
        if mask.sum() == 0:
            break

        # update
        p[mask] += r[mask]
        m = (1 - a_) * r[mask] / degree_[mask]
        r[mask] = 0

        # can be optimized by using scatter()
        v_idx = mask.nonzero(as_tuple=False).view(-1)
        for i, v in enumerate(v_idx):
            u_idx = indices_[indptr_[v]: indptr_[v+1]]
            r[u_idx] += m[i]

    return p


ppr_score = approx_ppr(g, 0.5, 0, 1e-5)
ppr_score

p:  tensor([0., 0., 0.])  r:  tensor([0.5000, 0.0000, 0.0000])
p:  tensor([0.5000, 0.0000, 0.0000])  r:  tensor([0.0000, 0.2500, 0.0000])
p:  tensor([0.5000, 0.2500, 0.0000])  r:  tensor([0.0000, 0.0000, 0.1250])
p:  tensor([0.5000, 0.2500, 0.1250])  r:  tensor([0.0625, 0.0000, 0.0000])
p:  tensor([0.5625, 0.2500, 0.1250])  r:  tensor([0.0000, 0.0312, 0.0000])
p:  tensor([0.5625, 0.2812, 0.1250])  r:  tensor([0.0000, 0.0000, 0.0156])
p:  tensor([0.5625, 0.2812, 0.1406])  r:  tensor([0.0078, 0.0000, 0.0000])
p:  tensor([0.5703, 0.2812, 0.1406])  r:  tensor([0.0000, 0.0039, 0.0000])
p:  tensor([0.5703, 0.2852, 0.1406])  r:  tensor([0.0000, 0.0000, 0.0020])
p:  tensor([0.5703, 0.2852, 0.1426])  r:  tensor([0.0010, 0.0000, 0.0000])
p:  tensor([0.5713, 0.2852, 0.1426])  r:  tensor([0.0000, 0.0005, 0.0000])
p:  tensor([0.5713, 0.2856, 0.1426])  r:  tensor([0.0000, 0.0000, 0.0002])
p:  tensor([0.5713, 0.2856, 0.1428])  r:  tensor([0.0001, 0.0000, 0.0000])
p:  tensor([0.5714, 0.2856, 0.1428]) 

tensor([0.5714, 0.2857, 0.1429])

## PPR EXP (power-iter vs local-push)

In [172]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
import numpy as np

dataset = Planetoid(root='/data/gangda/pyg', name='Cora', pre_transform=T.ToSparseTensor())
data = dataset[0]

data.adj_t.set_value_(torch.rand(data.num_edges), layout='csr')
indptr, indices, value = data.adj_t.csc()
degree = data.adj_t.sum(dim=0).to(torch.float)

norm_adj_t = data.adj_t * degree.pow(-1).view(1, -1)

g = {
    'indptr': indptr,
    'indices': indices,
    'edge_weights': value,
    'weighted_degree': degree,
}
g, norm_adj_t.csc()

({'indptr': tensor([    0,     3,     6,  ..., 10548, 10552, 10556]),
  'indices': tensor([ 633, 1862, 2582,  ...,  598, 1473, 2706]),
  'edge_weights': tensor([0.9676, 0.6667, 0.6464,  ..., 0.5209, 0.2987, 0.3243]),
  'weighted_degree': tensor([2.2807, 1.3552, 3.3382,  ..., 0.6819, 1.7165, 1.8738])},
 (tensor([    0,     3,     6,  ..., 10548, 10552, 10556]),
  tensor([ 633, 1862, 2582,  ...,  598, 1473, 2706]),
  tensor([0.4243, 0.2923, 0.2834,  ..., 0.2780, 0.1594, 0.1731])))

In [442]:
def power_iter_ppr(P_w, target_id_, alpha_, epsilon_, max_iter):
    num_nodes = P_w.size(0)
    s = torch.zeros(num_nodes)
    s[target_id_] = alpha_
    s = s.view(-1, 1)

    x = s.clone()
    for i in range(max_iter):
        x_last = x
        x = alpha_ * s + (1 - alpha_) * (P_w @ x)

        # total num of operations

        if sum(abs(x - x_last)) < num_nodes * epsilon_:
            print('Base Iterations: ', i)
            return x.view(-1)

    print(f'Failed to converge with tolerance({epsilon_}) and iter({max_iter})')
    return x.view(-1)


def approx_ppr(g_, target_id_, alpha_, epsilon_):
    num_nodes = g_['weighted_degree'].size(-1)
    p = torch.zeros(num_nodes)
    r = torch.zeros(num_nodes)
    r[target_id_] = alpha_

    iterations, num_push = 0, 0
    threshold = alpha_ * epsilon_ * g_['weighted_degree']
    while True:
        v_mask = r > threshold
        if v_mask.sum() == 0:
            break

        # update
        p[v_mask] += alpha * r[v_mask]
        m = (1 - alpha) * r[v_mask] / g_['weighted_degree'][v_mask]
        r[v_mask] = 0

        v_idx = v_mask.nonzero(as_tuple=False).view(-1)
        for i, v in enumerate(v_idx):
            start, end =  g_['indptr'][v], g_['indptr'][v+1]
            u_idx = g_['indices'][start: end]
            u_weights = g_['edge_weights'][start: end]
            r[u_idx] += m[i] * u_weights

            num_push += end - start

        iterations += 1

    print('Approx Iterations: ', iterations, '\nTotal Push Operations: ', num_push.item())

    return p

In [455]:
alpha = 0.15
epsilon = 1e-5
top_k = 100
num_source = 50

total_concur = 0.
for target_id in torch.randperm(data.num_nodes)[:num_source]:
    base_p = power_iter_ppr(norm_adj_t, target_id, alpha, 1e-10, 100)
    approx_p = approx_ppr(g, target_id, alpha, epsilon)

    print(f'base_p nnz = {sum(base_p > 0)}')
    print(f'approx_p nnz = {sum(approx_p > 0)}')
    print()

    # sum(abs(base_p - approx_p))
    _, base_top_k = torch.sort(base_p, descending=True)
    _, approx_top_k = torch.sort(approx_p, descending=True)
    total_concur += np.intersect1d(base_top_k[:top_k], approx_top_k[:top_k]).shape[0]

print(f'precision@{top_k}: {total_concur/(top_k*num_source): .3f}')

Base Iterations:  54
Approx Iterations:  43 
Total Push Operations:  66538
base_p nnz = 2485
approx_p nnz = 1576

Base Iterations:  52
Approx Iterations:  42 
Total Push Operations:  99874
base_p nnz = 2485
approx_p nnz = 1916

Base Iterations:  59
Approx Iterations:  47 
Total Push Operations:  64405
base_p nnz = 2485
approx_p nnz = 1553

Base Iterations:  54
Approx Iterations:  42 
Total Push Operations:  117502
base_p nnz = 2485
approx_p nnz = 2224

Base Iterations:  51
Approx Iterations:  40 
Total Push Operations:  110186
base_p nnz = 2485
approx_p nnz = 2096

Base Iterations:  52
Approx Iterations:  41 
Total Push Operations:  103139
base_p nnz = 2485
approx_p nnz = 2116

Base Iterations:  85
Approx Iterations:  75 
Total Push Operations:  75
base_p nnz = 2
approx_p nnz = 2

Base Iterations:  58
Approx Iterations:  63 
Total Push Operations:  21269
base_p nnz = 2485
approx_p nnz = 460

Base Iterations:  52
Approx Iterations:  40 
Total Push Operations:  80859
base_p nnz = 2485
ap

## PPRGO (close to C implementation)

In [343]:
def _calc_ppr_node(target_id_, indptr_, indices_, degree_, weights_, alpha_, epsilon_):
    alpha_eps = alpha_ * epsilon_
    f32_0 = 0.
    p = {target_id_: f32_0}
    r = {target_id_: alpha_}
    q = [target_id_]
    while len(q) > 0:
        unode = q.pop()

        res = r[unode] if unode in r else f32_0
        if unode in p:
            p[unode] += res
        else:
            p[unode] = res
        r[unode] = f32_0
        for vnode in indices_[indptr_[unode]:indptr_[unode + 1]]:
            _val = (1 - alpha_) * res * weights_[unode] / degree_[unode]
            if vnode in r:
                r[vnode] += _val
            else:
                r[vnode] = _val

            res_vnode = r[vnode] if vnode in r else f32_0
            if res_vnode >= alpha_eps * degree_[vnode]:
                if vnode not in q:
                    q.append(vnode)

    return list(p.keys()), list(p.values())

# PPRGO Implementation, needs numba compiler acceleration
# Root nodes level parallelization
_calc_ppr_node(0, indptr.tolist(), indices.tolist(), degree.tolist(), value.tolist(), 0.15, 1e-3)

2