# Load libraries

In [None]:
%matplotlib inline

import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

# Create a network object with NetworkX

As the first example, we will load and play around with data extracted from email data from a large European research institution [1,2].

In [None]:
network_data_frame = pd.read_csv('data/email-Eu-core.txt', sep=' ', header = None)

network = nx.Graph()
network.add_edges_from(network_data_frame.values)

Let's display the network to see what it looks like

In [None]:
nx.draw(network, node_size = 10, edge_color = 'gray')

In [None]:
parts_generator = nx.connected_components(network)

parts = list(parts_generator)
parts

In [None]:
singles = [list(x)[0] for x in parts[1:]]

network.remove_nodes_from(singles)

In [None]:
nx.draw(network, node_size = 5, edge_color = 'gray')

In [None]:
list(network.degree)

In [None]:
degree_list = [d for n, d in network.degree]

plt.hist(degree_list)
plt.show()

In [None]:
nx.diameter(network)

In [None]:
clustering_coeff = nx.clustering(network)
clustering_coeff

In [None]:
np.mean(list(clustering_coeff.values()))

In [None]:
plt.hist(clustering_coeff.values())
plt.show()

In [None]:
nx.average_shortest_path_length(network)

In [None]:
tree = nx.minimum_spanning_tree(network)

nx.draw(tree, node_size = 5, edge_color = 'gray')

In [None]:
tree = nx.dfs_tree(network, 0)
nx.draw(tree, node_size = 5, edge_color = 'gray')

In [None]:
tree = nx.bfs_tree(network, 0)
nx.draw(tree, node_size = 5, edge_color = 'gray')

# Community detection

In [None]:
community_network = nx.karate_club_graph()
nx.draw(community_network)

In [None]:
partitions = nx.algorithms.community.girvan_newman(community_network)

In [None]:
part_list = list(partitions)
part_list

In [None]:
communities = part_list[3]
communities

In [None]:
colors = np.zeros(nx.number_of_nodes(community_network))

for i in range(len(communities)):
    nodes = list(communities[i])
    for j in range(len(nodes)):
        colors[nodes[j]] = i

nx.draw(community_network, node_color = colors)


# Final task

In the data/ directory, there are three files (N1.txt, N2.txt, N3.txt) that represent three different networks (all are undirected and unweighted). 

One is a social network of email exchanges at a Spanish university [3], one is the Western States power grid [4], and one is the (largest cluster of the) protein interaction network in yeast [5].

Your task now it to use the tools to decide which network corresponds to which file. The more information you gather to support your conclusions, the better.




### References


[1] Hao Yin, Austin R. Benson, Jure Leskovec, and David F. Gleich. "Local Higher-order Graph Clustering." In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017.

[2] J. Leskovec, J. Kleinberg and C. Faloutsos. Graph Evolution: Densification and Shrinking Diameters. ACM Transactions on Knowledge Discovery from Data (ACM TKDD), 1(1), 2007.

[3] R. Guimer, L. Danon, A. Daz-Guilera, F. Giralt, and A. Arenas. Self-similar community structure in a network of human interactions. Physical Review E, 68(6):065103, December 2003.

[4] Duncan J. Watts and Steven H. Strogatz. Collective dynamics of ‘small-world’ networks. Nature, 393(6684):440–442, June 1998.

[5] H. Jeong, S. P. Mason, A.-L. Barabasi, and Z. N. Oltvai. Lethality andcentrality in protein networks. Nature, 411(6833):41–42, May 2001.
