In [None]:
import numpy as np
import pandas
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
# Load data
data = pandas.read_csv('authors_complete.csv')
pubs = pandas.read_csv('publications.csv')

In [None]:
# Generate organization information
df = data[['name', 'org']]
authors = {}
for author in df.itertuples():
    authors[author.name] = author.org
print(authors)

In [None]:
# Generate edges
import ast
edges = {}
missing_persons = set()
for pub in pubs.itertuples():
    orgs = set()
    pub_authors = ast.literal_eval(pub.authors)
    try:
        for author in pub_authors:
            orgs.add(authors[author])
    except KeyError:
        missing_persons.add(author)
    pairs = set()
    for a in orgs:
        for b in orgs:
            if a == b:
                continue
            pairs.add((a,b))
    for pair in pairs:
        if pair in edges.keys():
            edges[pair] += 1 
        elif pair[::-1] in edges.keys():
            edges[pair[::-1]] += 1
        else:
            edges[pair] = 1
print("Found edges for", len(edges.keys()), "organizations")
print("Missing persons", len(missing_persons))
print(edges)


In [None]:
# Generate adjancency matrix
x = set()
for pair in edges.keys():
    a,b = pair
    x.add(a)
    x.add(b)

x = list(x)
side = len(x)
matrix = np.zeros((side, side))
print(x)
for i, row in enumerate(matrix):
    for j, col in enumerate(row):
        pair = (x[i], x[j])
        if pair in edges.keys():
            matrix[i][j] = edges[pair]
        if pair[::-1] in edges.keys():
            matrix[i][j] = edges[pair[::-1]]
adj_matrix = pandas.DataFrame(matrix, x, x)

print(adj_matrix)
adj_matrix.to_excel('org_matrix.xlsx')

In [None]:
# Generate tuples
tuples = []
nodes = set()
for pair, weight in edges.items():
    a,b = pair
    nodes.add(a)
    nodes.add(b)
    tuples.append((a,b,weight))
# Create graph
graph = nx.Graph()
graph.add_nodes_from(list(nodes))
graph.add_weighted_edges_from(tuples)

In [None]:
# Print key characteristics
__length = len(graph.nodes)
__connected = [x for x in nx.connected.connected_components(graph)]
__cent = nx.betweenness_centrality(graph)
_max_cent = max(__cent.values())
_inf_node = [x for x in __cent if __cent[x] == _max_cent]
__short = [max(j.values()) for (i,j) in nx.shortest_path_length(graph)]
print(graph)
print('Degree centrality {:.2f}'.format(sum(nx.degree_centrality(graph).values()) / __length))
print('Closeness {:.2f}'.format(sum(nx.closeness_centrality(graph).values()) / __length))
print('Betweenness {:.2f}'.format(sum(nx.betweenness_centrality(graph).values()) / __length))
print('Clustering coefficient {:.2f}'.format( sum(nx.clustering(graph).values()) / __length))
print('Connected components', len(__connected))
print('Diameter', max(__short))
print('Average shortest path length {:.2f}'.format(sum(__short) / len(__short)))
print('Most influential node', _inf_node[0])
# diameter, pathlength, clustering coefficient, connected components

In [None]:
# Draw graph
pos = nx.kamada_kawai_layout(graph, scale=6)
nx.draw(graph, pos=pos, with_labels=False)
plt.title("Organizations network")

In [None]:
# Generate communities
import itertools
comp = nx.community.girvan_newman(graph)
limited = itertools.takewhile(lambda x: len(x) <= 100, comp)
communities = [ list(x) for x in next(comp)]
singles = []
multis = []
for com in communities:
    if len(com) == 1:
        singles.append(com[0])
    else:
        multis.append(com)
print('Singles', len(singles), ', Multis', len(multis))
d = [len(x) for x in multis]
print(max(d), min(d))

In [None]:
import random
color_map = []
colors = [[254,235,226],
[252,197,192],
[250,159,181],
[247,104,161],
[221,52,151],
[174,1,126],
[122,1,119]]
colors = [[x / 255 for x in y] for y in colors]
for node in graph:
    if node in singles:
        color_map.append('gray')
    else:
        for i, m in enumerate(multis):
            if node in m:
                color_map.append(colors[i])
layout = nx.spring_layout(graph)
label = nx.draw_networkx_labels(graph, pos=layout, font_size=5)
plt.title('Organization communities by color, spring layout')
nx.draw(graph, node_color=color_map, pos=layout)
plt.show()

In [None]:
label = nx.draw_networkx_labels(graph, pos=nx.kamada_kawai_layout(graph), font_size=5)
nx.draw(graph, node_color=color_map, pos=nx.kamada_kawai_layout(graph))
plt.title('Organization communities by color, Kamada Kawai layout')

In [None]:
# Partition quality
print("Graph modularity {:.2f}".format(nx.community.modularity(graph, communities)))
print('Graph partition quality: Coverage {0:.2f}, Performance {1:.2f}'.format(*nx.community.partition_quality(graph, communities)))