In [64]:
import itertools

In [65]:
from pybtex.database import parse_file as pybtex_parse_file

In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

In [67]:
%matplotlib inline

In [68]:
vispapers = pybtex_parse_file('groups-in-graphs.corinna-vehlow.com.bib')

In [134]:
G = nx.Graph()

In [135]:
for name, entry in vispapers.entries.iteritems():
    authors_this_paper = []
    for author in entry.persons['author']:
        author_name = "{}, {}".format(author.get_part_as_text('last'), author.get_part_as_text('first'))
        author_name = author_name.replace('"', 'QUOT_REPLACE')
        if not G.has_node(author_name):
            G.add_node(author_name, attr_dict={'author_name': author_name})
        authors_this_paper.append(author_name)
    if len(authors_this_paper) > 1:
        for a1, a2 in itertools.combinations(authors_this_paper, 2):
            if G.has_edge(a1, a2):
                G[a1][a2]['weight'] += 1
            else:
                G.add_edge(a1, a2, weight=1)
        

In [71]:
components = list(nx.connected_components(G))

In [72]:
for a, b, data in sorted(G.edges(data=True), key=lambda x: x[2]['weight'], reverse=True):
    print('{a} | {b} | {w}'.format(a=a, b=b, w=data['weight']))

Weiskopf, Daniel | Vehlow, Corinna | 8
Burch, Michael | Weiskopf, Daniel | 7
Beck, Fabian | Diehl, Stephan | 6
Munzner, Tamara | Archambault, Daniel | 5
Munzner, Tamara | Auber, David | 5
Auber, David | Archambault, Daniel | 5
Burch, Michael | Diehl, Stephan | 5
Didimo, Walter | Liotta, Giuseppe | 5
Auber, David | Bourqui, Romain | 4
Fekete, Jean-Daniel | Henry, Nathalie | 4
Feng, Qing-Wen | Eades, Peter | 4
Montecchiani, Fabrizio | Didimo, Walter | 4
Hu, Yifan | Kobourov, Stephen | 4
Byelas, Heorhiy | Telea, Alexandru | 4
Marriott, Kim | Dwyer, Tim | 4
Muelder, Chris | Ma, Kwan-Liu | 4
Auber, David | Jourdan, Fabien | 3
Nguyen, Quang | Huang, Mao | 3
Beck, Fabian | Burch, Michael | 3
Beck, Fabian | Vehlow, Corinna | 3
Beck, Fabian | Weiskopf, Daniel | 3
Burch, Michael | Vehlow, Corinna | 3
Tal, Ayellet | Frishman, Yaniv | 3
Wijk, Jarke | Pretorius, A. | 3
Schmalstieg, Dieter | Streit, Marc | 3
Schmalstieg, Dieter | Lex, Alexander | 3
Lex, Alexander | Streit, Marc | 3
Bourqui, Romain |

In [73]:
import igraph

In [74]:
g2 = igraph.Graph.Adjacency((nx.to_numpy_matrix(G)).tolist())

In [75]:
g2.community_infomap()

<igraph.clustering.VertexClustering at 0x113f34c90>

In [76]:
nx.write_pajek(G, 'coauthorship.net')

In [77]:
with open('coauthorship.tree', 'r') as f:
    rows = []
    for line in f:
        if line[0] == '#':
            continue
        
        line = line.strip().split()
        author = line[2].strip('"')
        node = line[3]
        cl = line[0]
        flow = line[1]
        row = ( node, author, cl, flow )
        rows.append(row)

In [78]:
df = pd.DataFrame(rows, columns='node author cl flow'.split())

In [137]:
df = pd.read_csv('coauthorship.tree', skiprows=2, quotechar='"', delimiter=' ', names='cl flow author node'.split())

In [138]:
df['cl_bottom'] = df.cl.apply(lambda x: ':'.join(x.split(':')[:-1]))
df['cl_top'] = df.cl.apply(lambda x: x.split(':')[0])

In [139]:
df.set_index('author', inplace=True)

In [140]:
df

Unnamed: 0_level_0,cl,flow,node,cl_bottom,cl_top
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Weiskopf, Daniel",1:1:1:1,0.018467,142,1:1:1,1
"Vehlow, Corinna",1:1:1:2,0.014774,276,1:1:1,1
"Burch, Michael",1:1:1:3,0.011542,103,1:1:1,1
"Diehl, Stephan",1:1:1:4,0.010157,136,1:1:1,1
"Beck, Fabian",1:1:1:5,0.009234,77,1:1:1,1
"Fritz, Peter",1:1:1:6,0.001385,39,1:1:1,1
"Schmauder, Hansj{\QUOT_REPLACE{o}}rg",1:1:1:7,0.001385,403,1:1:1,1
"Auw{\QUOT_REPLACEa}rter, Patrick",1:1:1:8,0.001385,432,1:1:1,1
"Schmidt, Benjamin",1:1:1:9,0.000923,52,1:1:1,1
"Hlawatsch, Marcel",1:1:1:10,0.000923,53,1:1:1,1


In [141]:
from collections import Counter
c = Counter()
for comp in components:
    c[len(comp)] += 1

In [142]:
c

Counter({1: 5,
         2: 24,
         3: 13,
         4: 3,
         5: 5,
         6: 2,
         7: 6,
         9: 4,
         10: 2,
         11: 1,
         12: 1,
         14: 1,
         23: 1,
         31: 1,
         127: 1})

In [143]:
nx.set_node_attributes(G, 'cl_bottom', df.cl_bottom.to_dict())
nx.set_node_attributes(G, 'cl_top', df.cl_top.to_dict())

In [144]:
G.nodes(data=True)

[('Wu, Jie', {'author_name': 'Wu, Jie', 'cl_bottom': '5:1', 'cl_top': '5'}),
 ('Smith, Adam',
  {'author_name': 'Smith, Adam', 'cl_bottom': '20', 'cl_top': '20'}),
 ('Nakazawa, Rina',
  {'author_name': 'Nakazawa, Rina', 'cl_bottom': '2:1:2:2', 'cl_top': '2'}),
 ('Mader, Martin',
  {'author_name': 'Mader, Martin', 'cl_bottom': '12', 'cl_top': '12'}),
 ('Sangal, Neeraj',
  {'author_name': 'Sangal, Neeraj', 'cl_bottom': '27', 'cl_top': '27'}),
 ('Elmqvist, Niklas',
  {'author_name': 'Elmqvist, Niklas', 'cl_bottom': '4:1', 'cl_top': '4'}),
 ('Philippi, Stephan',
  {'author_name': 'Philippi, Stephan', 'cl_bottom': '8', 'cl_top': '8'}),
 ('Vernik, Rudi',
  {'author_name': 'Vernik, Rudi', 'cl_bottom': '30', 'cl_top': '30'}),
 ('Goesmann, Alexander',
  {'author_name': 'Goesmann, Alexander', 'cl_bottom': '21', 'cl_top': '21'}),
 ('Tanaka, Jiro',
  {'author_name': 'Tanaka, Jiro', 'cl_bottom': '22:1', 'cl_top': '22'}),
 ('Mary, Patrick',
  {'author_name': 'Mary, Patrick', 'cl_bottom': '2:1:1:1', 

In [145]:
G = nx.relabel_nodes(G, df.node.to_dict())

In [146]:
G.nodes(data=True)

[(1, {'author_name': 'Wu, Jie', 'cl_bottom': '5:1', 'cl_top': '5'}),
 (2, {'author_name': 'Smith, Adam', 'cl_bottom': '20', 'cl_top': '20'}),
 (3, {'author_name': 'Nakazawa, Rina', 'cl_bottom': '2:1:2:2', 'cl_top': '2'}),
 (4, {'author_name': 'Mader, Martin', 'cl_bottom': '12', 'cl_top': '12'}),
 (5, {'author_name': 'Sangal, Neeraj', 'cl_bottom': '27', 'cl_top': '27'}),
 (6, {'author_name': 'Elmqvist, Niklas', 'cl_bottom': '4:1', 'cl_top': '4'}),
 (7, {'author_name': 'Philippi, Stephan', 'cl_bottom': '8', 'cl_top': '8'}),
 (8, {'author_name': 'Vernik, Rudi', 'cl_bottom': '30', 'cl_top': '30'}),
 (9,
  {'author_name': 'Goesmann, Alexander', 'cl_bottom': '21', 'cl_top': '21'}),
 (10, {'author_name': 'Tanaka, Jiro', 'cl_bottom': '22:1', 'cl_top': '22'}),
 (11, {'author_name': 'Mary, Patrick', 'cl_bottom': '2:1:1:1', 'cl_top': '2'}),
 (12, {'author_name': 'Nguyen, QuanHoang', 'cl_bottom': '12', 'cl_top': '12'}),
 (13, {'author_name': 'Kramer, Andrei', 'cl_bottom': '1:1:2', 'cl_top': '1'}),

In [147]:
from networkx.readwrite import json_graph
import json

In [148]:
json_data = json_graph.node_link_data(G)
with open('coauthorship.json', 'w') as outf:
    json.dump(json_data, outf)

In [153]:
with open('coauthorship.json', 'r') as f:
    j = json.load(f)

In [154]:
ctypes = Counter()
for node in j['nodes']:
    ctypes[len(node)] += 1

In [155]:
ctypes

Counter({4: 457})

In [156]:
G.nodes(data=True)

[(1, {'author_name': 'Wu, Jie', 'cl_bottom': '5:1', 'cl_top': '5'}),
 (2, {'author_name': 'Smith, Adam', 'cl_bottom': '20', 'cl_top': '20'}),
 (3, {'author_name': 'Nakazawa, Rina', 'cl_bottom': '2:1:2:2', 'cl_top': '2'}),
 (4, {'author_name': 'Mader, Martin', 'cl_bottom': '12', 'cl_top': '12'}),
 (5, {'author_name': 'Sangal, Neeraj', 'cl_bottom': '27', 'cl_top': '27'}),
 (6, {'author_name': 'Elmqvist, Niklas', 'cl_bottom': '4:1', 'cl_top': '4'}),
 (7, {'author_name': 'Philippi, Stephan', 'cl_bottom': '8', 'cl_top': '8'}),
 (8, {'author_name': 'Vernik, Rudi', 'cl_bottom': '30', 'cl_top': '30'}),
 (9,
  {'author_name': 'Goesmann, Alexander', 'cl_bottom': '21', 'cl_top': '21'}),
 (10, {'author_name': 'Tanaka, Jiro', 'cl_bottom': '22:1', 'cl_top': '22'}),
 (11, {'author_name': 'Mary, Patrick', 'cl_bottom': '2:1:1:1', 'cl_top': '2'}),
 (12, {'author_name': 'Nguyen, QuanHoang', 'cl_bottom': '12', 'cl_top': '12'}),
 (13, {'author_name': 'Kramer, Andrei', 'cl_bottom': '1:1:2', 'cl_top': '1'}),

In [157]:
j['links']

[{u'source': 0, u'target': 247, u'weight': 1},
 {u'source': 0, u'target': 328, u'weight': 1},
 {u'source': 0, u'target': 201, u'weight': 1},
 {u'source': 0, u'target': 281, u'weight': 1},
 {u'source': 0, u'target': 14, u'weight': 1},
 {u'source': 1, u'target': 96, u'weight': 1},
 {u'source': 1, u'target': 74, u'weight': 1},
 {u'source': 1, u'target': 364, u'weight': 1},
 {u'source': 1, u'target': 192, u'weight': 1},
 {u'source': 2, u'target': 223, u'weight': 1},
 {u'source': 2, u'target': 67, u'weight': 1},
 {u'source': 2, u'target': 252, u'weight': 1},
 {u'source': 2, u'target': 117, u'weight': 2},
 {u'source': 3, u'target': 97, u'weight': 1},
 {u'source': 4, u'target': 144, u'weight': 1},
 {u'source': 4, u'target': 68, u'weight': 1},
 {u'source': 4, u'target': 13, u'weight': 1},
 {u'source': 5, u'target': 255, u'weight': 1},
 {u'source': 5, u'target': 113, u'weight': 1},
 {u'source': 5, u'target': 114, u'weight': 1},
 {u'source': 5, u'target': 115, u'weight': 1},
 {u'source': 5, u'ta