In [11]:
import networkx as nx
import operator

# **Link Prediction**

What new edges are most likely to form in a network? Given a pair of nodes, how to assess weather they are likely to connect?

**Triadic closure**: the tendency for people who share connections in a asocial network to become connected.

### **Measure 1 - Common Neighbors**

The number of common neighbors of nodes $X$ and $Y$ is:

$comm\_neigh(X, Y) = |N(X) \bigcap N(Y)|$

where $N(X)$ is the set of neighbors of node $X$


For the graph

<img src="../assets/undirected_graph.png" width=300px>


In [12]:
G = nx.read_adjlist(
    '../assets/undirected_graph.txt', 
    nodetype=str,
    create_using=nx.Graph()
)

# non_edges gets only non connected pairs of nodes
common_neigh = [(
    e[0], e[1], 
    len(list(nx.common_neighbors(G, e[0], e[1])))
) for e in nx.non_edges(G)]

common_neigh

[('D', 'O', 0),
 ('D', 'A', 3),
 ('D', 'F', 0),
 ('D', 'N', 0),
 ('D', 'H', 0),
 ('D', 'J', 0),
 ('D', 'K', 0),
 ('D', 'I', 0),
 ('D', 'L', 0),
 ('D', 'G', 0),
 ('D', 'M', 0),
 ('O', 'A', 1),
 ('O', 'E', 0),
 ('O', 'B', 0),
 ('O', 'F', 1),
 ('O', 'C', 0),
 ('O', 'H', 0),
 ('O', 'I', 1),
 ('O', 'G', 1),
 ('O', 'M', 2),
 ('A', 'F', 1),
 ('A', 'K', 0),
 ('A', 'H', 1),
 ('A', 'J', 1),
 ('A', 'I', 0),
 ('A', 'L', 1),
 ('A', 'M', 0),
 ('B', 'F', 0),
 ('B', 'N', 1),
 ('B', 'K', 0),
 ('B', 'H', 0),
 ('B', 'J', 0),
 ('B', 'I', 0),
 ('B', 'L', 0),
 ('B', 'G', 1),
 ('B', 'M', 0),
 ('E', 'F', 0),
 ('E', 'N', 1),
 ('E', 'K', 0),
 ('E', 'H', 0),
 ('E', 'J', 0),
 ('E', 'I', 0),
 ('E', 'L', 0),
 ('E', 'G', 1),
 ('E', 'M', 0),
 ('F', 'C', 0),
 ('F', 'N', 0),
 ('F', 'K', 0),
 ('F', 'H', 2),
 ('F', 'L', 0),
 ('F', 'M', 0),
 ('C', 'N', 1),
 ('C', 'K', 0),
 ('C', 'H', 0),
 ('C', 'J', 0),
 ('C', 'I', 0),
 ('C', 'L', 0),
 ('C', 'G', 1),
 ('C', 'M', 0),
 ('N', 'K', 2),
 ('N', 'H', 0),
 ('N', 'J', 1),
 ('N', '

### **Measure 2 - Jaccard Coefficient**

Number of common neighbors normalized by the total number of neighbors

The Jaccard coefficient of nodes $X$ and $Y$ is

$jaccard\_coeff(X, Y) = {{|N(X) \bigcap N(Y)}\over{|N(X) \bigcup N(Y)|}}$

In [13]:
list(nx.jaccard_coefficient(G))

[('D', 'O', 0.0),
 ('D', 'A', 0.6),
 ('D', 'F', 0.0),
 ('D', 'N', 0.0),
 ('D', 'H', 0.0),
 ('D', 'J', 0.0),
 ('D', 'K', 0.0),
 ('D', 'I', 0.0),
 ('D', 'L', 0.0),
 ('D', 'G', 0.0),
 ('D', 'M', 0.0),
 ('O', 'A', 0.125),
 ('O', 'E', 0.0),
 ('O', 'B', 0.0),
 ('O', 'F', 0.16666666666666666),
 ('O', 'C', 0.0),
 ('O', 'H', 0.0),
 ('O', 'I', 0.16666666666666666),
 ('O', 'G', 0.14285714285714285),
 ('O', 'M', 0.5),
 ('A', 'F', 0.14285714285714285),
 ('A', 'K', 0.0),
 ('A', 'H', 0.16666666666666666),
 ('A', 'J', 0.125),
 ('A', 'I', 0.0),
 ('A', 'L', 0.125),
 ('A', 'M', 0.0),
 ('B', 'F', 0.0),
 ('B', 'N', 0.16666666666666666),
 ('B', 'K', 0.0),
 ('B', 'H', 0.0),
 ('B', 'J', 0.0),
 ('B', 'I', 0.0),
 ('B', 'L', 0.0),
 ('B', 'G', 0.14285714285714285),
 ('B', 'M', 0.0),
 ('E', 'F', 0.0),
 ('E', 'N', 0.16666666666666666),
 ('E', 'K', 0.0),
 ('E', 'H', 0.0),
 ('E', 'J', 0.0),
 ('E', 'I', 0.0),
 ('E', 'L', 0.0),
 ('E', 'G', 0.14285714285714285),
 ('E', 'M', 0.0),
 ('F', 'C', 0.0),
 ('F', 'N', 0.0),
 ('F

### **Measure 3 - Resource Allocation**

Fraction of a "resource" that a node can send to another trough their common neighbors.

The resource allocation index of nodes $X$ and $Y$ is:

$resc\_alloc(X, Y) = \sum_{u \in N(X)\cap N(Y)}{1 \over |N(u)|}$

In [14]:
L = list(nx.resource_allocation_index(G))
L.sort(key=operator.itemgetter(2), reverse=True)
L

[('I', 'G', 1.0833333333333333),
 ('D', 'A', 0.75),
 ('O', 'M', 0.5833333333333333),
 ('F', 'H', 0.5833333333333333),
 ('H', 'J', 0.5833333333333333),
 ('N', 'K', 0.5),
 ('O', 'A', 0.3333333333333333),
 ('A', 'L', 0.3333333333333333),
 ('O', 'F', 0.25),
 ('O', 'I', 0.25),
 ('O', 'G', 0.25),
 ('A', 'F', 0.25),
 ('A', 'H', 0.25),
 ('A', 'J', 0.25),
 ('N', 'J', 0.25),
 ('N', 'M', 0.25),
 ('J', 'L', 0.25),
 ('J', 'K', 0.25),
 ('B', 'N', 0.2),
 ('B', 'G', 0.2),
 ('E', 'N', 0.2),
 ('E', 'G', 0.2),
 ('C', 'N', 0.2),
 ('C', 'G', 0.2),
 ('N', 'G', 0.2),
 ('D', 'O', 0),
 ('D', 'F', 0),
 ('D', 'N', 0),
 ('D', 'H', 0),
 ('D', 'J', 0),
 ('D', 'K', 0),
 ('D', 'I', 0),
 ('D', 'L', 0),
 ('D', 'G', 0),
 ('D', 'M', 0),
 ('O', 'E', 0),
 ('O', 'B', 0),
 ('O', 'C', 0),
 ('O', 'H', 0),
 ('A', 'K', 0),
 ('A', 'I', 0),
 ('A', 'M', 0),
 ('B', 'F', 0),
 ('B', 'K', 0),
 ('B', 'H', 0),
 ('B', 'J', 0),
 ('B', 'I', 0),
 ('B', 'L', 0),
 ('B', 'M', 0),
 ('E', 'F', 0),
 ('E', 'K', 0),
 ('E', 'H', 0),
 ('E', 'J', 0),
 