# Collaboration Network Analysis

In [3]:
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

## 1. Load and preprocess the collaboration data

In [4]:
coautherfile = open('data/AMiner-Coauthor.txt')
coauthers = []
for line in coautherfile:
    auther1, auther2, count = line[1:-1].split('\t')
    coauthers.append((auther1, auther2, int(count)))

In [5]:
coauthers[:10]

[('522324', '1034146', 1),
 ('1355779', '1229932', 2),
 ('688814', '947067', 2),
 ('1329221', '1140429', 1),
 ('742331', '314944', 1),
 ('898041', '1061829', 1),
 ('1075448', '1040028', 1),
 ('1218654', '1244844', 2),
 ('117148', '364153', 2),
 ('1335705', '738530', 1)]

In [6]:
sortedcoauthers = sorted(coauthers, key = lambda coauth: -coauth[2])
sortedcoauthers[:10]

[('111806', '977442', 320),
 ('966551', '111806', 320),
 ('966551', '977442', 320),
 ('980079', '68033', 310),
 ('549347', '80953', 306),
 ('324627', '33938', 234),
 ('860814', '1693619', 216),
 ('946534', '1536687', 194),
 ('833156', '815734', 143),
 ('218997', '173556', 132)]

## 2. Visualize the top 10 collaboration pairs

In [14]:
# Credit: https://gist.github.com/quadrismegistus/92a7fba479fc1e7d2661909d19d4ae7e

def visualize(networkx_graph, name):
    pyvis_graph = Network(height=800, width=800, notebook=True)
    for node,node_attrs in networkx_graph.nodes(data=True):
        pyvis_graph.add_node(node,**node_attrs)

    # for each edge and its attributes in the networkx graph
    for source,target,edge_attrs in networkx_graph.edges(data=True):
        # if value/width not specified directly, and weight is specified, set 'value' to 'weight'
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            # place at key 'value' the weight of the edge
            edge_attrs['value']=edge_attrs['weight']
        # add the edge
        pyvis_graph.add_edge(source,target,**edge_attrs)

    return pyvis_graph.show('docs/' + name)

In [11]:
def make_graph(coauth_tuples):
    G = nx.Graph()
    for coauther in coauth_tuples:
        G.add_edge(coauther[0], coauther[1], weight=coauther[2])
    return G

In [12]:
visualize(make_graph(sortedcoauthers[:10]), 'top10.html')

In [17]:
visualize(make_graph(sortedcoauthers[:1000]), 'top1000.html')