# Effective Transition

In [23]:
from scipy.sparse import linalg as la
import numpy as np
import networkx as nx
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import time
from math import floor, ceil

In [85]:
def delete_duplicates(pairs):
    """
    deletes duplicate pairs from list pairs
    """
    n = len(pairs)
    copy = []
    for pair in pairs:
        copy.append(sorted(pair))

    for i in range(1,n):
        if copy.count(copy[n-i]) > 1:
            del copy[n-i]
    return copy

def complement(first, second):
    """
    returns the compliment of the first list in the second
    """
    second = set(second)
    return np.array([item for item in first if item not in second])

def old_complement(first, second):
    """
    returns the compliment of the first list in the second
    """
    second = set(second)
    return np.array([item for item in first if item not in second])

def get_loc(pos, n):
    """
    takes position in flattened array and gives location in non-flattened
    """
    row = int(pos/n)
    col = pos % n
    return (row, col)

"""X = csr_matrix([[1,3,4],[3,2,2],[4,8,1]])
X = X[:,np.array([0,2])]
X = X[np.array([0,2])]"""
def partition(M, r, c):
    """
    returns the r rows and c columns of sparse matrix M
    """
    part = M[:,c]
    part = part[r]
    return part
    """part = []
    for x in r:
        for y in c:
            part.append(M[x,y])
    return np.array(part).reshape((len(r),len(c)))"""
    
def old_partition(M, r, c):
    """
    returns the r rows and c columns of sparse matrix M
    """
    part = []
    for x in r:
        for y in c:
            part.append(M[x,y])
    return np.array(part).reshape((len(r),len(c)))

def scipy_eigsh(M, dim=1, tol=1e-8):
    """
    returns the eigenvalue of largest magnitude corresponding to matrix M
    """
    M = M.astype(np.float64)
    sigma = la.eigsh(M, k=dim, which='LM', tol=tol, return_eigenvectors=False)
    return sigma[0]

def old_brute_et(M,k):
    """
    predicts k links for network with adjacency matrix M
    """
    n = M.shape[0]
    r = list(range(n))
    eigval = scipy_eigsh(M)

    R = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            if i != j:
                first = old_partition(M,[i,j],[i,j])
                comp = complement(r,[i,j])
                second = old_partition(M,np.array([i,j]),comp) 
                temp = old_partition(M, comp, comp)
                third = np.linalg.inv(partition(M, comp, comp)-eigval*np.identity(n-2))
                fourth = old_partition(M, comp,[i,j])
                #print(first.shape, second.shape, third.shape, fourth.shape)
                okay = second @ third @ fourth
                this = first-okay
                R[i,j] = this[0,1]

    pred = np.asarray(np.argsort(-1*(R - 10*M).reshape(n*n)))[0]

    prediction = []
    for p in pred:
        prediction.append(get_loc(p,n))
        
    return delete_duplicates(prediction)[:k]

def brute_effective_transition(M,k):
    """
    predicts k links for network with adjacency matrix M
    """
    n = M.shape[0]
    r = list(range(n))
    eigval = scipy_eigsh(M)

    R = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            if i != j:
                first = partition(M,np.array([i,j]),np.array([i,j]))
                comp = complement(r,[i,j])
                second = partition(M,np.array([i,j]),comp) 
                temp = partition(M, comp, comp)
                third = np.linalg.inv(partition(M, comp, comp)-eigval*np.identity(n-2))
                fourth = partition(M, comp,np.array([i,j]))
                #print(first.shape, second.shape, third.shape, fourth.shape)
                okay = second @ third @ fourth
                this = first-okay
                R[i,j] = this[0,1]

    pred = np.asarray(np.argsort(-1*(R - 10*M).reshape(n*n)))[0]

    prediction = []
    for p in pred:
        prediction.append(get_loc(p,n))
        
    prediction = [(p[0],p[1]) for p in prediction]
    return prediction[:k]

In [3]:
# example
G = nx.Graph()
G.add_nodes_from(range(1,11))
edges = [(1,2),(1,3),(1,6),(1,10),(2,3),(2,4),(2,6),(3,8),(3,9),(4,6),
         (4,8),(4,10),(5,7),(5,9),(5,10),(6,8),(6,9),(7,8),(8,9),(9,10)]
G.add_edges_from(edges)
A = nx.adjacency_matrix(G)

start = time.time()
print(brute_effective_transition(A, 5))
print(time.time()-start)

[[2, 5], [0, 3], [2, 3], [3, 8], [1, 7]]
1.5166690349578857


In [4]:
# example
G = nx.Graph()
G.add_nodes_from(range(1,11))
edges = [(1,2),(1,3),(1,6),(1,10),(2,3),(2,4),(2,6),(3,8),(3,9),(4,6),
         (4,8),(4,10),(5,7),(5,9),(5,10),(6,8),(6,9),(7,8),(8,9),(9,10)]
G.add_edges_from(edges)
A = nx.adjacency_matrix(G)

start = time.time()
print(old_brute_et(A, 5))
print(time.time()-start)

[[2, 5], [0, 3], [2, 3], [3, 8], [1, 7]]
1.2645361423492432


In [5]:
# example
G = nx.Graph()
G.add_nodes_from(range(1,11))
edges = [(1,2),(1,4),(1,7),(1,8),(1,10),(2,5),(2,6),(2,7),(2,8),(2,9),
         (2,10),(3,10),(4,5),(4,9),(4,8),(5,6),(5,8),(5,10),(6,9),(7,8)]
G.add_edges_from(edges)
A = nx.adjacency_matrix(G)

start = time.time()
print(brute_effective_transition(A, 3))
print(time.time()-start)

start = time.time()
print(old_brute_et(A, 3))
print(time.time()-start)

[[0, 4], [3, 6], [6, 9]]
0.9199318885803223
[[0, 4], [3, 6], [6, 9]]
1.0843408107757568


In [6]:
X = csr_matrix([[1,3,4],[3,2,2],[4,8,1]])

In [7]:
X.toarray()

array([[1, 3, 4],
       [3, 2, 2],
       [4, 8, 1]], dtype=int64)

In [8]:
X = csr_matrix([[1,3,4],[3,2,2],[4,8,1]])
X = X[:,np.array([0,2])]
X = X[np.array([0,2])]

In [9]:
X.toarray()

array([[1, 4],
       [4, 1]], dtype=int64)

In [10]:
ra = np.array(list(range(1,10)))

In [11]:
ra

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
ij = np.array([3,4])

In [13]:
ra[~ij]

array([6, 5])

In [14]:
~ij

array([-4, -5])

In [78]:
def load_data(filename):
    myfile = open(filename, 'r')
    lines = myfile.readlines()
    lines = [line.split() for line in lines]
    time = 3 if len(lines[0])==4 else 2
    lines = [[int(line[0]),int(line[1]),int(line[time])] for line in lines]
    lines.sort(key=lambda x: x[2])
    n = len(lines)
    chop = floor(3*n/4)
    train = lines[:chop]
    to_train = [(edge[0],edge[1]) for edge in train]
    test = lines[chop:]
    k = len(test)
    to_predict = [(edge[0],edge[1]) for edge in test]
    G = nx.DiGraph()
    G.add_edges_from(to_train)
    G = max(nx.strongly_connected_component_subgraphs(G), key=len)
    nodes = G.nodes()
    test = lines[chop:]
    to_predict = [(edge[0],edge[1]) for edge in test if edge[0] in nodes and edge[1] in nodes]

    return G, to_predict

In [88]:
filename = '/Users/bbb/link_prediction/data/manufacturing.txt'
G, to_predict = load_data(filename)
print(len(to_predict))
nodes = G.nodes()
A = nx.adjacency_matrix(G)
d = np.sum(A,axis=1)
M = A/d

20441


In [89]:
start = time.time()
pred = brute_effective_transition(M, 500)
print(time.time()-start)

inter = list(set(pred)&set(to_predict))
print(inter)
print(len(inter)/len(pred))

28.9510760307312
[(94, 123), (53, 122), (1, 123), (61, 96), (115, 96), (56, 96), (54, 96), (76, 123), (117, 77), (12, 96), (86, 123), (15, 96), (95, 123), (33, 96), (79, 123), (59, 123), (39, 122), (115, 123), (121, 96), (42, 96), (104, 123), (88, 96)]
0.044


In [84]:
inter

[(12, 96), (6, 106), (41, 50), (8, 27), (15, 26), (15, 17)]

In [86]:
filename = '/Users/bbb/link_prediction/data/Dept4Emails.txt'
G, to_predict = load_data(filename)
print(len(to_predict))
nodes = G.nodes()
A = nx.adjacency_matrix(G)
d = np.sum(A,axis=1)
M = A/d

2690


In [87]:
start = time.time()
pred = brute_effective_transition(M, 2690)
print(time.time()-start)

inter = list(set(pred)&set(to_predict))
print(inter)
print(len(inter)/len(pred))

7.498342990875244
[(43, 48), (48, 43), (60, 11), (23, 60), (14, 4), (48, 67), (54, 58), (19, 4), (25, 12), (25, 1), (35, 58), (23, 7), (70, 19), (54, 67), (35, 66), (4, 19), (30, 66), (22, 58), (54, 48), (64, 13), (0, 70), (70, 56), (25, 15), (49, 58), (23, 44), (4, 52), (52, 17), (24, 52), (48, 17), (66, 58), (35, 57), (25, 49), (18, 46), (33, 14), (47, 63), (54, 14), (12, 11), (52, 37), (35, 16), (16, 19), (60, 13), (49, 48), (35, 46), (43, 70), (14, 49), (60, 62), (44, 14), (23, 66), (16, 60), (58, 66), (25, 44), (54, 4), (17, 52), (35, 9), (60, 7), (7, 8), (26, 24), (25, 16), (71, 58), (52, 24), (39, 13), (49, 66), (16, 54), (14, 33), (11, 52), (44, 30), (1, 60), (25, 47), (30, 4), (64, 1), (25, 30), (48, 63), (25, 19), (60, 53), (48, 24), (71, 8), (35, 14), (13, 44), (60, 30), (52, 54), (56, 49), (54, 49), (2, 43), (58, 60), (49, 52), (48, 60), (25, 14), (48, 15), (71, 60), (54, 43), (60, 58), (49, 14), (16, 48), (60, 16), (35, 13), (33, 13), (4, 17), (52, 43), (35, 30), (18, 58),

In [90]:
filename = '/Users/bbb/link_prediction/data/Dept3Emails.txt'
G, to_predict = load_data(filename)
print(len(to_predict))
nodes = G.nodes()
A = nx.adjacency_matrix(G)
d = np.sum(A,axis=1)
M = A/d

4175


In [91]:
start = time.time()
pred = brute_effective_transition(M, 4175)
print(time.time()-start)

inter = list(set(pred)&set(to_predict))
print(len(inter))
print(len(inter)/len(pred))

0.5450179576873779
4
0.004162330905306972


In [92]:
len(pred)

961

In [93]:
filename = '/Users/bbb/link_prediction/data/Dept2Emails.txt'
G, to_predict = load_data(filename)
print(len(to_predict))
nodes = G.nodes()
A = nx.adjacency_matrix(G)
d = np.sum(A,axis=1)
M = A/d

6055


In [94]:
start = time.time()
pred = brute_effective_transition(M, 6055)
print(time.time()-start)

inter = list(set(pred)&set(to_predict))
print(len(inter))
print(len(inter)/len(pred))

0.5578720569610596
13
0.014444444444444444


In [95]:
len(pred)

900