# Part 2B - New Connections Prediction

In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import pickle

In [2]:
future_connections = pd.read_csv('Future_Connections.csv', index_col=0, converters={0: eval})
future_connections.head(10)

Unnamed: 0,Future Connection
"(6, 840)",0.0
"(4, 197)",0.0
"(620, 979)",0.0
"(519, 872)",0.0
"(382, 423)",0.0
"(97, 226)",1.0
"(349, 905)",0.0
"(429, 860)",0.0
"(309, 989)",0.0
"(468, 880)",0.0


## Generate graph form the given dataframe

In [3]:
indice = list(future_connections.index)

src = [index[0] for index in indice]
dst = [index[1] for index in indice]

future_connections['src'] = pd.Series(src, index=future_connections.index)
future_connections['dst'] = pd.Series(dst, index=future_connections.index)

future_connections.head()
# G = nx.from_pandas_dataframe(future_connections)

Unnamed: 0,Future Connection,src,dst
"(6, 840)",0.0,6,840
"(4, 197)",0.0,4,197
"(620, 979)",0.0,620,979
"(519, 872)",0.0,519,872
"(382, 423)",0.0,382,423


In [4]:
G = nx.from_pandas_dataframe(future_connections, 'src', 'dst', create_using=nx.DiGraph())

print(nx.info(G))

Name: 
Type: DiGraph
Number of nodes: 1005
Number of edges: 488446
Average in degree: 486.0159
Average out degree: 486.0159


In [5]:
# graph checking
print("Weakly Connected: {}".format(nx.is_weakly_connected(G)))
print("Strongly Connected: {}".format(nx.is_weakly_connected(G)))
print("Directed: {}".format(nx.is_directed(G)))

Weakly Connected: True
Strongly Connected: True
Directed: True


In [6]:
future_connections.info()

<class 'pandas.core.frame.DataFrame'>
Index: 488446 entries, (6, 840) to (75, 101)
Data columns (total 3 columns):
Future Connection    366334 non-null float64
src                  488446 non-null int64
dst                  488446 non-null int64
dtypes: float64(1), int64(2)
memory usage: 14.9+ MB


## Th graph can use the one from Part 2A

See the post [Construction of Graph for part 2B](https://www.coursera.org/learn/python-social-network-analysis/discussions/weeks/4/threads/38KCVKcJEee6bw62IA80dA) for reference

In [7]:
import networkx as nx
import pandas as pd
import numpy as np
import pickle

In [8]:
G = nx.read_gpickle('email_prediction.txt')

In [9]:
future_edges = pd.DataFrame()

future_edges.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Empty DataFrame

In [32]:
# Link prediction methods for {6, 480}
print("Common Neighbors: {}".format(list(nx.common_neighbors(G, 6, 480))))
print("Jaccard coefficient: {}".format(list(nx.jaccard_coefficient(G, [future_connections.index[0]]))))
print("Adamic Adar Index: {}".format(list(nx.adamic_adar_index(G, [future_connections.index[0]]))))
print("Resource Allocation: {}".format(list(nx.resource_allocation_index(G, [future_connections.index[0]]))))
print("Preferential Attachement: {}".format(list(nx.preferential_attachment(G, [future_connections.index[0]]))))

Common Neighbors: [667, 375, 184, 362, 183, 549, 418, 21, 252, 211]
Jaccard coefficient: [(6, 840, 0.07377049180327869)]
Adamic Adar Index: [(6, 840, 2.110314079181727)]
Resource Allocation: [(6, 840, 0.13672123667645245)]
Preferential Attachement: [(6, 840, 2070)]


In [46]:
future_connections['comm_neigh'] = [len(list(nx.common_neighbors(G, edge[0], edge[1]))) for edge in future_connections.index]
future_connections['jaccard'] = [list(nx.jaccard_coefficient(G, [edge]))[0][2] for edge in future_connections.index]
future_connections['adamic_adar'] = [list(nx.adamic_adar_index(G, [edge]))[0][2] for edge in future_connections.index]
future_connections['res_alloc'] = [list(nx.resource_allocation_index(G, [edge]))[0][2] for edge in future_connections.index]
future_connections['pref_attach'] = [list(nx.preferential_attachment(G, [edge]))[0][2] for edge in future_connections.index]

future_connections.head()

Unnamed: 0,Future Connection,src,dst,comm_neigh,jaccard,adamic,adamic_adar,res_alloc,pref_attach
"(6, 840)",0.0,6,840,9,0.07377,2.110314,2.110314,0.136721,2070
"(4, 197)",0.0,4,197,2,0.015504,0.363528,0.363528,0.008437,3552
"(620, 979)",0.0,620,979,0,0.0,0.0,0.0,0.0,28
"(519, 872)",0.0,519,872,2,0.060606,0.507553,0.507553,0.039726,299
"(382, 423)",0.0,382,423,0,0.0,0.0,0.0,0.0,205
