In [None]:
import numpy as np
import pandas as pd
import networkx as nx
from bokeh.io import export_png
from bokeh.io import output_file, show
from bokeh.plotting import figure, from_networkx, output_file
from bokeh.models import ColumnDataSource, LabelSet, Circle, MultiLine
from matplotlib.colors import is_color_like

Reading .xlsx file

In [None]:
node_colour = '#161547' # default value
edge_colour = '#e0265e' # default value

# EFFECTS : Converts chosen .xlsx file into df and filters for author names
#           Allows user to choose node and edge colours
# MODIFIES : Sets node_colour and edge_colour
def reader():
    filepath = input("Enter file path:")
    while(True):
        try:
            df = pd.read_excel(filepath)
            break 
        except FileNotFoundError:
            filepath = input("Invalid path. Please enter a valid file path")
    pub_authors_list = df["team-members"]
    global node_colour
    global edge_colour
    node_colour = input("Enter a colour for the nodes as a hexidemical (ex: #A1B2C3)")
    while(True):
        if not is_color_like(node_colour):
             node_colour = input("Invalid colour. Please try again")
        else:
             break
    edge_colour = input("Enter a colour for the nodes as a hexidemical (ex: #D4E5F6)")
    while(True):
        if not is_color_like(edge_colour):
             edge_colour = input("Invalid colour. Please try again")
        else:
             break
    clean_data(pub_authors_list)

Cleaning data and generating edge list

In [None]:
# REQUIRES : List of publication authors, with author names seperated by "/"
# EFFECTS : Turns pub_authors_list into 2d array of author names, creates set of author names
#           and generates all possible pairwise combinations of names
def clean_data(pub_authors_list):
    authors_set = set()
    pub_list = []
    edge_list = {}
    for pub_authors in pub_authors_list:
        authors_list = list(pub_authors.split("/"))
        for name in authors_list:
            authors_set.add(name)
        pub_list.append(authors_list)
    authors_list = list(authors_set) 
    for i in range(len(authors_list)):
        for j in range(i+1, len(authors_list)):
            edge_list.update({(authors_list[i], authors_list[j]): 0})
    create_graph(edge_list, authors_set, pub_list)

Creating and populating network graph

In [None]:
# REQUIRES : edge_data - list of all possible pairwise combinations
#            node_set - set of all author names
#            pubs_list - 2d array of author names, names must be contained in node_set
# EFFECTS : Creates circular network graph of authors, with edge weight corresponding to shared
#           publications between two nodes
def create_graph(edge_data, node_set, pubs_list):   
    G = nx.Graph()
    node_list = list(node_set)
    for node in node_list:
        G.add_node(node)
    for key in list(edge_data.keys()):
        G.add_edge(*key, weight=0, color  = edge_colour)
    for authors in pubs_list:
        for i in range(len(authors)):
            for j in range(i+1, len(authors)):
                G.edges[(authors[i], authors[j])]['weight'] = G.edges[(authors[i], authors[j])]['weight'] + 1   
    for u,v in G.edges:
        if G[u][v]['weight'] == 0:
            G.remove_edge(u, v)  
    pos = nx.circular_layout(G)
    plot_graph(G, pos)

Converting network graph to bokeh plot

In [None]:
# REQUIRES : Valid NetworkX graph and layout
# EFFECTS : Converts graph into a bokeh visual, add labels for each node
def plot_graph(G, layout):
    node_list = list(G.nodes)
    p = figure()
    graph = from_networkx(G, layout, scale=1, center=(0,0))
    graph.edge_renderer.data_source.data["line_color"] = [G.get_edge_data(a,b)['color'] for a, b in G.edges()]
    graph.edge_renderer.glyph.line_color = {'field': 'line_color'}
    graph.edge_renderer.data_source.data["line_width"] = [G.get_edge_data(a,b)['weight'] for a, b in G.edges()]
    graph.edge_renderer.glyph.line_width = {'field': 'line_width'}
    graph.node_renderer.glyph = Circle(size = 10, line_color = node_colour, fill_color = '#161547')
    
    x,y = zip(*layout.values())
    
    # returns text_align value based on location around circle
    alignment = lambda i : 'center' if round(10*x[i]) == 0 else 'left' if x[i] > 0 else 'right'
    
    source = ColumnDataSource({'x':x, 'y':y, 'name': [node_list[i] for i in range(len(x))], 
                               'text_align': [alignment(i) for i in range(len(x))], 
                               'x_offset': [0 if alignment(i) == 'center' else 10*np.sign(x[i]) for i in range(len(x))],
                               'y_offset': [0 if round(10*y[i]) == 0 else (15 if alignment(i) == 'center' else 10)*np.sign(y[i]) for i in range(len(x))]})
    labels = LabelSet(x ='x', y = 'y', text = 'name', text_align = 'text_align', 
                      x_offset = 'x_offset', y_offset = 'y_offset',  source = source, text_baseline = 'middle')
    
    p.renderers.append(labels)
    p.renderers.append(graph)
    p.xaxis.visible = False
    p.xgrid.visible = False
    p.yaxis.visible = False
    p.ygrid.visible = False
    show(p)

reader()