# Graph exploration

In [1]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import matplotlib
import networkx as nx

Data_path = 'Data/'

Load the nodes attributes DataFrame

In [24]:
nodes_attributes_df = pd.read_csv(Data_path+'nodes_attributes.csv')

Load the Adjacency matrix

In [None]:
with open(Data_path+'Adjacency_matrix.pickle', 'rb') as src:
    A = pickle.load(src)
    print(f'>>> Loading Adjacency matrix matrix with shape {A.shape}')

In [None]:
# checking that there is the good amount of rows (should equals number of node)
print(f'There are {A.shape[0]} nodes and {nodes_attributes_df.shape[0]} attributes rows.')

Build a networkX graph from the adjacency matrix

In [None]:
def graph_summary(G):
    """
    Display properties of the given nx.Graph
    ------
    Input 
        |---- G : nx.Graph
    Output 
        |---- None
    """
    n_edges = G.number_of_edges()
    n_nodes = G.number_of_nodes()
    n_connect_comp = nx.number_connected_components(G)
    avg_degree = 2*n_edges/n_nodes
    avg_cluster_coef = nx.average_clustering(G)

    print(f'>>> Network summary : \n', \
          f'--> {n_edges} edges\n', \
          f'--> {n_nodes} nodes\n', \
          f'--> average degree : {avg_degree:.0f}\n', \
          f'--> connected compoenent : {n_connect_comp}\n', \
          f'--> average clustering coeeficient : {avg_cluster_coef:.3f}')

In [None]:
A = np.where(A < 0.25, 0, A)

In [None]:
G=nx.from_numpy_matrix(A)
graph_summary(G)

Add the node attributes

In [None]:
nx.set_node_attributes(G, dict(nodes_attributes_df.transpose()))

Get the largest component and display it. 

In [None]:
G_large = max(nx.connected_component_subgraphs(G), key=len)
graph_summary(G_large)

Plot the visual summary for the graph

In [None]:
def get_graph_properties(G):
    """ return the graph parameters in string to be plotted in a table """
    n_edges = G.number_of_edges()
    n_nodes = G.number_of_nodes()
    n_cc = nx.number_connected_components(G)
    GC_size = max(nx.connected_component_subgraphs(G), key=len).number_of_nodes()
    avg_degree = 2*n_edges/n_nodes
    avg_cluster_coef = nx.average_clustering(G)
    
    labels =np.array( ['N edges', 'N nodes', 'N connected\ncomponents', 'Giant Comp. \nsize', 'avg degree', 'avg clustering\ncoefficient'])
    data =  np.array([f'{n_edges:.0f}', f'{n_nodes:.0f}', f'{n_cc:.0f}', f'{GC_size:.0f}', f'{avg_degree:.3f}', f'{avg_cluster_coef :.3f}'])
    return data, labels

In [None]:
# The network to display
the_graph = G_large
network_name = 'Giant Component'
pos=nx.spring_layout(the_graph)

In [None]:
# compute graph info for table
data_whole, labels = get_graph_properties(G)
data_sub, _ = get_graph_properties(the_graph)
data = np.stack([data_whole, data_sub], axis=1)

In [None]:
fig = plt.figure(figsize=(15,10))
gs = plt.GridSpec(2, 3, wspace=0.2, hspace=0.1, width_ratios=[0.02, 0.63, 0.35], height_ratios=[0.5, 0.5])
title_fs = 12

# Network visualization
atr = 'Oscars'
cmap = matplotlib.colors.LinearSegmentedColormap.from_list('node_cmap', ['cornsilk', 'goldenrod'])
cmap.set_under(color='darkgray')
ax_net = fig.add_subplot(gs[:,1])
nx.draw_networkx(the_graph, pos, ax=ax_net, \
                                with_labels=False, \
                                node_size=10, \
                                node_color =  list(nx.get_node_attributes(the_graph, atr).values()),\
                                vmin=0.01, cmap=cmap, \
                                alpha=1, \
                                linewidths=0.15, \
                                width=0.5, \
                                edge_color='lightgray')
ax_net.collections[0].set_edgecolor("#000000") 
ax_net.set_title('Movie Giant Component Network', loc='left', fontsize=title_fs)

# colorbar
color_ax = fig.add_subplot(gs[:,0])
nrm = matplotlib.colors.Normalize(vmin=nodes_attributes_df[atr].min(), vmax=nodes_attributes_df[atr].max())
fig.colorbar(matplotlib.cm.ScalarMappable(norm=nrm, cmap=cmap),\
                    cax=color_ax, extend='min')
color_ax.yaxis.set_ticks_position('left')
color_ax.yaxis.set_label_position('left')
color_ax.set_ylabel(atr)

# degree distribution 
ax_deg = fig.add_subplot(gs[0,2])
degrees = dict(the_graph.degree()).values()

ax_deg.hist(degrees, color='goldenrod', bins=80, linewidth=0.2, edgecolor='black', log=True)
ax_deg.set_title('Degree distribution', loc='left', fontsize=title_fs)
ax_deg.set_xlabel('k')
ax_deg.set_ylabel('number of nodes')

# table info
ax_table = fig.add_subplot(gs[1,2])
ax_table.set_axis_off()
table = ax_table.table(cellText=data, rowLabels=labels, cellLoc='center', \
                                     colColours=['gainsboro'] * len(labels), colLabels=['Whole Network', network_name], loc='center', \
                                     bbox=[0.3, 0, 0.7, 0.9])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1,1)

fig.savefig('Figures/network_summary.png', dpi=200, bbox_inches='tight')
plt.show()