In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.graph_objs as go
import plotly as py
from fa2 import ForceAtlas2
import matplotlib.colors as pltcolors

In [2]:
G = nx.read_gpickle('Data/postive_books_graph.pcl')

d = dict(G.degree)
d_between = dict(nx.betweenness_centrality(G))
d_eigen = dict(nx.eigenvector_centrality(G))

total = pd.DataFrame(data = {'Name' : list(d.keys()), 
                             'Degree' : list(d.values()),
                             'Betweenness_centrality' : list(d_between.values()),
                             'Eigenvector_centrality' : list(d_eigen.values())})

In [None]:
print(f"The network consists of {nx.number_of_nodes(G)} nodes and {nx.number_of_edges(G)} edges.")

# Plotting network

In [None]:
colors, pos = set_generic_node_attributes(G, 'genre', scaling=0.1, grav=10, log=False,  n_f2_iter=2000)

In [None]:
edge_trace = edges_to_plot(G, pos)

In [None]:
degree_node_trace = nodes_to_plot(G, 'Degree', colors, pos)
G_degree = make_fig(edge_trace, degree_node_trace, 'Network with nodesize <br>proportional to degree')

In [None]:
py.offline.plot(G_degree, filename = 'Plots/G_degree.html', auto_open=False)

In [None]:
between_node_trace = nodes_to_plot(G, 'Betweenness_centrality', colors, pos)
G_between = make_fig(edge_trace, between_node_trace, 'Network with nodesize <br>proportional to betweenness-centrality')

In [None]:
py.offline.plot(G_between, filename = 'Plots/G_between.html', auto_open=False)

In [None]:
eigen_node_trace = nodes_to_plot(G, 'Eigenvector_centrality', colors, pos)
G_eigen = make_fig(edge_trace, eigen_node_trace, 'Network with nodesize <br>proportional to eigenvector-centrality')

In [None]:
py.offline.plot(G_eigen, filename = 'Plots/G_eigen.html', auto_open=False)

In [11]:
n_removed = int(G.number_of_nodes()/100)
G_new = remove_nodes_based_on_degree(G, n_top=n_removed)
print(f"Excluding the {n_removed} nodes with highest indegree and removing nodes left without edges, results en a \
subnetwork with {nx.number_of_nodes(G_new)} nodes and {nx.number_of_edges(G_new)} edges.")

Excluding the 19 nodes with highest indegree and removing nodes left without edges, results en a subnetwork with 1687 nodes and 6692 edges.


In [12]:
colors_new, pos_new = set_generic_node_attributes(G_new, 'genre', scaling=0.1, grav=10, log=False,  n_f2_iter=2000)

In [15]:
edge_trace_new = edges_to_plot(G_new, pos_new)

In [28]:
node_trace_new = nodes_to_plot(G_new, 'Degree', colors_new, pos_new, for_community=True)
G_degree_new = make_fig(edge_trace_new, node_trace_new, 'Sub-Network with nodesize <br>proportional to degree')

Getting node coordinates
Setting hover text for nodes
Creating figure


In [29]:
py.offline.plot(G_degree_new, filename = 'Plots/G_degree_new.html', auto_open=True)

'Plots/G_degree_new.html'

# Functions

In [3]:
def set_generic_node_attributes(G, color_by, scaling=1, grav=1, log=False,  n_f2_iter=2000):
    '''
    Function to get colors given a given group, and initialise the Force Atalas algorithm. 
    
    Parameters:
    G: networkx graph with attribute color_by
    color_by: attribute in graph G that graph should be colored by
    scaling: hyperparameter for Force Atlas algorithm. By default set to 1
    grav: gravity hyperparameter for Force Atlas algorithm. By default set to 1
    log: if True progress bar of tuning of Force Atlas will be displayed
    n_f2_iter: number of iterations for Force Atlas algorithm. By default set to 2000
    
    Return:
    colors: map for color of node in graph
    pos: positions for nodes in graph
    '''
    
    # Getting mapping of colors for nodes to seperate on genre:
    groups = set(nx.get_node_attributes(G,color_by).values())
    colors_tmp = list(pltcolors._colors_full_map.values())[0:len(groups)]
    cmap = dict(zip(groups, colors_tmp))
    nodes = G.nodes()
    colors = [cmap.get(G.node[n][color_by]) for n in nodes]

    #Defining positions using Force Atlas algorithm
    forceatlas2 = ForceAtlas2(
                        # Behavior alternatives
                        outboundAttractionDistribution=True,  # Dissuade hubs
                        linLogMode=False,  # NOT IMPLEMENTED
                        adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
                        edgeWeightInfluence=1.0,

                        # Performance
                        jitterTolerance=1.0,  # Tolerance
                        barnesHutOptimize=True,
                        barnesHutTheta=1.2,
                        multiThreaded=False,  # NOT IMPLEMENTED

                        # Tuning
                        scalingRatio=scaling,
                        strongGravityMode=False,
                        gravity=grav,

                        # Log
                        verbose=log)
    
    pos = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=n_f2_iter)
    
    return colors, pos

In [4]:
def make_plot(G, d_measure, title, colors = 0):
    '''
    Function generating an interactive plot of network.
    
    INPUT:
    - G: Graph as networkx graph
    - d_measure: Measure defining size of nodes as string (possible values: 'Degree', 'Betweenness_centrality', 'Eigenvector_centrality')
    - title: Title on plot as string
    - Colors: Mapping of colors (Should not be set )
    '''
    
    
    
    if d_measure == 'Degree':
        add = 50
        scale = 80
    elif d_measure == 'Betweenness_centrality':
        add = 0.001
        scale = 100 
    else:
        add = 0
        scale = 25
    
    # Get coordinates of edges
    print("Getting edge coordinates")
    edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')
    for edge in G.edges():
        x0, y0 = pos.get(edge[0])
        x1, y1 = pos.get(edge[1])
        edge_trace['x'] += tuple([x0, x1, None])
        edge_trace['y'] += tuple([y0, y1, None])
    
    #Get coordinates of nodes
    print("Getting node coordinates")
    node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=False,
        colorscale='YlGnBu',
        reversescale=True,
        color=colors,
        size=((total[d_measure]+add)/total[d_measure].max())*scale,
        line=dict(width=2)))
    for node in G.nodes():
        x, y = pos.get(node)
        node_trace['x'] += tuple([x])
        node_trace['y'] += tuple([y])
    
    # Get hover text for nodes
    print("Setting hover text for nodes")
    for node, adjacencies in enumerate(G.adjacency()):
        node_info = 'Name: ' + str(adjacencies[0]) + '<br># of ingoing connections: '+str(len(adjacencies[1]))
        node_trace['text']+=tuple([node_info])
    
    # Create figures
    print("Creating figure")
    fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title=title,
                titlefont=dict(size=20),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                plot_bgcolor = 'white'))
    return fig

In [5]:
def edges_to_plot(G, pos):
    '''Function that gets the position for all the edges'''
    edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')
    for edge in G.edges():
        x0, y0 = pos.get(edge[0])
        x1, y1 = pos.get(edge[1])
        edge_trace['x'] += tuple([x0, x1, None])
        edge_trace['y'] += tuple([y0, y1, None])
    return edge_trace

In [27]:
def nodes_to_plot(G, d_measure, colors, pos, for_community=False):
    '''Function that gets the position for all the nodes and adds hover text'''
    if d_measure == 'Degree':
        add = 50
        scale = 80
    elif d_measure == 'Betweenness_centrality':
        add = 0.001
        scale = 100 
    else:
        add = 0
        scale = 25
        
    #Get coordinates of nodes
    print("Getting node coordinates")
    node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=False,
        colorscale='YlGnBu',
        reversescale=True,
        color=colors,
        size=((total[d_measure]+add)/total[d_measure].max())*scale,
        line=dict(width=2)))
    for node in G.nodes():
        x, y = pos.get(node)
        node_trace['x'] += tuple([x])
        node_trace['y'] += tuple([y])
    
    # Get hover text for nodes
    print("Setting hover text for nodes")
    genres = nx.get_node_attributes(G, 'genre')
    for node, adjacencies in enumerate(G.adjacency()):
        if for_community:
            node_info = 'Name: ' + str(adjacencies[0]) + '<br>Genre: ' + genres[str(adjacencies[0])]
        else:
            node_info = 'Name: ' + str(adjacencies[0]) + '<br># of ingoing connections: '+str(len(adjacencies[1]))
        node_trace['text']+=tuple([node_info])
    
    return node_trace

In [7]:
def make_fig(edge_trace, node_trace, title):
    '''Function that creates a figure based on edge- and node-trace'''
    # Create figures
    print("Creating figure")
    fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title=title,
                titlefont=dict(size=20),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                plot_bgcolor = 'white'))
    return fig

In [8]:
def make_fig2(node_trace, title):
    '''Function that creates a figure based on node-trace (note there is no edges in this figure)'''
    # Create figures
    print("Creating figure")
    fig = go.Figure(data=[node_trace],
             layout=go.Layout(
                title=title,
                titlefont=dict(size=20),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                plot_bgcolor = 'white'))
    return fig

In [9]:
def remove_nodes_based_on_degree(G, n_top):
    ''' Function that removes the top n nodes based on degree and further removes the nodes
    that are left with no edges 
    
    Parameters:
    G: networkx graph 
    n_top: Number of nodes that should be removed. By default set to 7
    
    Return:
    G: networkx graph
    '''
    
    Degree_top1pct = pd.DataFrame(list(sorted(G.degree, key=lambda x: x[1], reverse=True)),
                                  columns = ("Name", "Degree")).iloc[n_top]["Degree"]
    remove_top = [node for node,degree in G.degree if degree >= Degree_top1pct]
    G.remove_nodes_from(remove_top)
    remove_0 = [node for node,degree in G.degree if degree == 0]
    G.remove_nodes_from(remove_0 )
    
    return G