In [None]:
import numpy as np
import pandas
import matplotlib.pyplot as plt
import networkx as nx
import arxiv

In [None]:
# Load data
data = pandas.read_csv('publications.csv')

In [None]:
# Parse unique authors
import ast 
authors = data['authors']
authors = [ast.literal_eval(x) for x in authors.values]
unique = set()
for author_list in authors:
    for author in author_list:
        unique.add(author)
print("Found", len(unique), 'unique authors')

In [None]:
%%script false --no-raise-error
import requests
import json
import time
authors = []
baseUrl = "https://inspirehep.net/api/authors?q="
count = 0
for author in list(unique):
    count +=1
    author_names = author.split(' ')
    author_names.reverse()
    author_name = ','.join(author_names)
    url = baseUrl + author_name
    response = requests.get(url).json()
    try:
        hits = response["hits"]["hits"]
        for hit in hits:
            result_name = ' '.join(hit['metadata']['name']['value'].split(','))
            if result_name == author_name:
                org = hit['metadata']['positions'][0]['institution']
                if org:
                    authors.append({'author': author, 'org': org})
        if count % 15 == 0:
            time.sleep(5.1)
            print(authors)
    except KeyError:
        continue
print(authors)
# No authors to save :/

In [None]:
# Option 2: generate network based on cooperation

coop = {}
for author in unique:
    coop[author] = {}

for author_list in authors:
    for cooperator in author_list:
        for coop2 in author_list:
            if cooperator != coop2:
                if coop2 not in coop[cooperator].keys():
                    coop[cooperator][coop2] = 1
                else:
                    coop[cooperator][coop2] += 1

In [None]:
# normalize values
max_val = 0
for author, coops in coop.items():
    for cooperator, number in coops.items():
        if number > max_val:
            max_val = number

for author, coops in coop.items():
    for cooperator, number in coops.items():
        coop[author][cooperator] = (number) / (max_val)


In [None]:
# Generate tuples
tuples = []
for node, val in coop.items():
    for node2, weight in val.items():
        tuple = (node, node2, weight)
        tuples.append(tuple)
# Create graph
graph = nx.Graph()
graph.add_nodes_from(coop.keys())
graph.add_weighted_edges_from(tuples)

In [None]:
# Print key characteristics
__length = len(graph.nodes)
__connected = [x for x in nx.connected.connected_components(graph)]
print(graph)
print('Degree centrality {:.2f}'.format(sum(nx.degree_centrality(graph).values()) / __length))
print('Closeness {:.2f}'.format(sum(nx.closeness_centrality(graph).values()) / __length))
print('Betweenness {:.2f}'.format(sum(nx.betweenness_centrality(graph).values()) / __length))
print('Clustering coefficient {:.2f}'.format( sum(nx.clustering(graph).values()) / __length))
print('Connected components', len(__connected))
# diameter, pathlength, clustering coefficient, connected components

In [None]:
# Draw graph
pos = nx.kamada_kawai_layout(graph, scale=10)
nx.draw(graph, pos=pos, with_labels=False)

In [None]:
# Generate communities
import itertools
comp = nx.community.girvan_newman(graph)
limited = itertools.takewhile(lambda x: len(x) <= 100, comp)
communities = [ list(x) for x in next(comp)]
singles = []
multis = []
for com in communities:
    if len(com) == 1:
        singles.append(com[0])
    else:
        multis.append(com)
print('Singles', len(singles), ', Multis', len(multis))
d = [len(x) for x in multis]
print(max(d), min(d))

In [None]:
import random
color_map = []
for node in graph:
    if node in singles:
        color_map.append('gray')
    else:
        for i, m in enumerate(multis):
            if node in m:
                n  = i / 178
                color = [len(m) / 30, n , n]
                color_map.append(color)
nx.draw(graph, node_color=color_map, pos=nx.kamada_kawai_layout(graph), with_labels=False)
plt.title('Author communities by color')
plt.show()