In [2]:
import numpy as np
from numpy.linalg import matrix_power

def is_drazin(A, k, AD):
    return np.allclose(A@AD, AD@A) and np.allclose(matrix_power(A,k + 1)@AD, matrix_power(A,k)) and np.allclose(AD@A@AD, AD)

A = np.array([[1, 3, 0, 0],
[0, 1, 3, 0],
[0, 0, 1, 3],
[0, 0, 0, 0]])

AD = np.array([
[1, -3, 9, 81],
[0, 1, -3, -18],
[0, 0, 1, 3],
[0, 0, 0, 0]])

k = 1

is_drazin(A, k, AD)

True

In [3]:
import scipy.linalg as la

def drazin(A, tol):
    (n, n) = np.shape(A)
    Q1, S, k1 = la.schur(A, sort=lambda x: abs(x) > tol)
    Q2, T, k2 = la.schur(A, sort=lambda x: abs(x) <= tol)
    U = np.concatenate((S[:,:k1], T[:,:n-k1]), 1)
    U_inv = la.inv(U)
    V = U_inv@A@U
    Z = np.zeros((n, n), dtype = np.float64)
    if k1 != 0:
        M_inv = la.inv(V[:k1,:k1])
        Z[:k1,:k1] = M_inv
    return U@Z@U_inv

tol=1e-5

drazin(A, tol)

array([[  1.,  -3.,   9.,  81.],
       [  0.,   1.,  -3., -18.],
       [  0.,   0.,   1.,   3.],
       [  0.,   0.,   0.,   0.]])

In [12]:
def R(A):
    (n, n) = np.shape(A)
    D = np.diag(A.sum(1))
    L = D - A
    I = np.identity(n)
    R = np.zeros((n, n))
    for j in range(n):
        L_j = L.copy()
        L_j[j, :] = I[j, :]
        L_jD = drazin(L_j, 1e-5)
        R[:, j] = np.diag(L_jD)
        R[j, j] = 0
    return R

A=np.array([[0, 4],[4,0]])
B=np.array([[0,1,1],[1,0,1],[1,1,0]])
C=np.array([[0,1,0,0],[1,0,1,0],[0,1,0,1],[0,0,1,0]])
print(R(A)[0,1]==0.25)
print(R(B)[0,1]==2/3)
print(R(C)[0,3]-3.0<1e-10)

True
True
True


In [100]:
import pandas as pd

class LinkPredictor(object):
    def __init__(self, filename):
        data = pd.read_csv(filename)
        self.names = pd.unique(data.values.ravel())
        self.n = len(self.names)
        self.A = pd.DataFrame(np.zeros((self.n, self.n)), index=self.names, columns=self.names)
        for row in data.values:
            self.A.loc[row[0], row[1]] +=1
            self.A.loc[row[1], row[0]] += 1
        self.R = pd.DataFrame(R(self.A.values), index=self.names, columns=self.names)
    
    def predict_link(self, node=None):
        if node is None:
            index = np.unravel_index(np.nanargmin(self.R[(self.A == 0)&(np.identity(self.n) == 0)].values), (self.n, self.n))
            return self.A.index[index[0]], self.A.index[index[1]]
        else:
            try:
                return self.R[(self.A == 0)&(np.identity(self.n) == 0)][node].idxmin()
            except:
                raise ValueError('Node is not in the network.')
    
    def add_link(self, node1, node2):
        try:
            self.A.loc[node1, node2] = 1
            self.A.loc[node2, node1] = 1
            self.R = pd.DataFrame(R(self.A.values), index=self.names, columns=self.names)
        except:
            raise ValueError('Node is not in the network.')

lp = LinkPredictor('../social_network.csv')
print(lp.predict_link('Alan'))
lp.add_link('Alan', 'Sonia')
print(lp.predict_link('Alan'))
lp.add_link('Alan', 'Piers')
print(lp.predict_link('Alan'))

Sonia
Piers
Abigail
