# Effective Transition

In [3]:
from scipy.sparse import linalg as la
import numpy as np
import networkx as nx
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import time
from math import floor, ceil

In [4]:
def complement(first, second):
    """
    returns the compliment of the first list in the second
    """
    second = set(second)
    return np.array([item for item in first if item not in second])

def get_loc(pos, n):
    """
    takes position in flattened array and gives location in non-flattened
    """
    row = int(pos/n)
    col = pos % n
    return (row, col)

def partition(M, r, c):
    """
    returns the r rows and c columns of sparse matrix M
    """
    part = M[:,c]
    part = part[r]
    return part
    """part = []
    for x in r:
        for y in c:
            part.append(M[x,y])
    return np.array(part).reshape((len(r),len(c)))"""

def scipy_eigsh(M, dim=1, tol=1e-8):
    """
    returns the eigenvalue of largest magnitude corresponding to matrix M
    """
    M = M.astype(np.float64)
    sigma = la.eigsh(M, k=dim, which='LM', tol=tol, return_eigenvectors=False)
    return sigma[0]

def brute_effective_transition(M,k):
    """
    predicts k links for network with adjacency matrix M
    """
    n = M.shape[0]
    r = list(range(n))
    eigval = scipy_eigsh(M)

    R = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            if i != j:
                first = partition(M,np.array([i,j]),np.array([i,j]))
                comp = complement(r,[i,j])
                second = partition(M,np.array([i,j]),comp) 
                temp = partition(M, comp, comp)
                third = np.linalg.inv(partition(M, comp, comp)-eigval*np.identity(n-2))
                fourth = partition(M, comp,np.array([i,j]))
                #print(first.shape, second.shape, third.shape, fourth.shape)
                okay = second @ third @ fourth
                this = first-okay
                R[i,j] = this[0,1]

    pred = np.asarray(np.argsort(-1*(R - 10*M).reshape(n*n)))[0]

    prediction = []
    for p in pred:
        prediction.append(get_loc(p,n))
        
    prediction = [(p[0],p[1]) for p in prediction]
    return prediction[:k]

In [5]:
# example
G = nx.Graph()
G.add_nodes_from(range(1,11))
edges = [(1,2),(1,3),(1,6),(1,10),(2,3),(2,4),(2,6),(3,8),(3,9),(4,6),
         (4,8),(4,10),(5,7),(5,9),(5,10),(6,8),(6,9),(7,8),(8,9),(9,10)]
G.add_edges_from(edges)
A = nx.adjacency_matrix(G)

start = time.time()
print(brute_effective_transition(A, 5))
print(time.time()-start)

[(5, 2), (2, 5), (0, 3), (3, 0), (3, 2)]
1.293748140335083


In [6]:
# example
G = nx.Graph()
G.add_nodes_from(range(1,11))
edges = [(1,2),(1,4),(1,7),(1,8),(1,10),(2,5),(2,6),(2,7),(2,8),(2,9),
         (2,10),(3,10),(4,5),(4,9),(4,8),(5,6),(5,8),(5,10),(6,9),(7,8)]
G.add_edges_from(edges)
A = nx.adjacency_matrix(G)

start = time.time()
print(brute_effective_transition(A, 3))
print(time.time()-start)

start = time.time()
print(old_brute_et(A, 3))
print(time.time()-start)

[(0, 4), (4, 0), (3, 6)]
0.8602361679077148
[[0, 4], [3, 6], [6, 9]]
1.0931041240692139


In [7]:
def load_data(filename):
    myfile = open(filename, 'r')
    lines = myfile.readlines()
    lines = [line.split() for line in lines]
    time = 3 if len(lines[0])==4 else 2
    lines = [[int(line[0]),int(line[1]),int(line[time])] for line in lines]
    lines.sort(key=lambda x: x[2])
    n = len(lines)
    chop = floor(3*n/4)
    train = lines[:chop]
    to_train = [(edge[0],edge[1]) for edge in train]
    test = lines[chop:]
    k = len(test)
    to_predict = [(edge[0],edge[1]) for edge in test]
    G = nx.DiGraph()
    G.add_edges_from(to_train)
    G = max(nx.strongly_connected_component_subgraphs(G), key=len)
    nodes = G.nodes()
    test = lines[chop:]
    to_predict = [(edge[0],edge[1]) for edge in test if edge[0] in nodes and edge[1] in nodes]

    return G, to_predict

In [8]:
def get_score(fname):
    G, to_predict = load_data(fname)
    size = len(to_predict)
    A = nx.adjacency_matrix(G)
    d = np.sum(A,axis=1)
    M = A/d
    start = time.time()
    pred = brute_effective_transition(M, size)
    print('time',time.time()-start)
    inter = list(set(pred)&set(to_predict))
    print('score',len(inter)/len(pred))

# Results

In [9]:
filename = '/Users/bbb/link_prediction/data/manufacturing.txt'
get_score(filename)

time 29.45850396156311
score 0.1690602166792643


In [10]:
filename = '/Users/bbb/link_prediction/data/Dept4Emails.txt'
get_score(filename)

time 8.109071969985962
score 0.07769516728624536


In [11]:
filename = '/Users/bbb/link_prediction/data/Dept3Emails.txt'
get_score(filename)

time 0.909125804901123
score 0.004162330905306972


In [12]:
filename = '/Users/bbb/link_prediction/data/Dept2Emails.txt'
get_score(filename)

time 0.4884638786315918
score 0.014444444444444444
