In [None]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

rng = np.random.default_rng()

## Error and attack tolerance of networks

In this exercise, we will numerically investigate the error and attack tolerance of networks by removing nodes in a specified or random order and monitoring what happens to the size of the network's largest connected component (LCC). We will first replicate the main results of [Albert, Jeong, and Barabási, Nature 406, 2000](https://www.nature.com/articles/35019019). For this, we will generate Erdős-Rényi networks and scale-free networks, comparing how their LCC sizes depend on random or targeted node removal.

You'll need to do some sorting of lists in this exercise, so you might want to check out https://wiki.python.org/moin/HowTo/Sorting

### a) Calculating the largest connected component size (1 pt)

First, modify the function `calc_largest_component_size(G)` to return the size of the largest connected component of the network G. Then generate an Erdős-Rényi network and a Barabási-Albert scale-free network, each with 5000 nodes and 9996 edges, calculate their largest component sizes, and answer the MyCourses quiz. 

Hint: In Barabási-Albert model, you can start from a star graph with 3 nodes and 2 edges, and add $m=2$ edges at every time step. 


In [None]:
def calc_largest_component_size(G):
    '''
    Returns the number of nodes in the largest component of the graph G. 
    '''
    lcsize = 0 # largest component size. REPLACE! See the documentation for networkx.connected_components()
    # YOUR CODE HERE
    components = nx.connected_components(G)
    for component in components:
        if len(component) > lcsize:
            lcsize = len(component)

    return lcsize

In [None]:
N=5000 # number of nodes for both networks
E=9996 # number of links for both networks
m=2 # parameter for Barabási-Albert model (new node comes with 2 links)

ER_network=None
BA_network=None

## TODO: GENERATE THE TWO NETWORKS USING THE PARAMETERS ABOVE
## Use gnm_random_graph() for the ER network to get exactly the same 
## number of links as the BA network has

# YOUR CODE HERE
ER_network = nx.gnm_random_graph(N, E)
BA_network = nx.barabasi_albert_graph(N, m)

print('LCC size in the E-R network: '+str(calc_largest_component_size(ER_network))+' nodes')
print('LCC size in the Barabási-Albert network: '+str(calc_largest_component_size(BA_network))+' nodes')

print('Links in the E-R network: '+str(len(ER_network.edges)))
print('Links in the Barabási-Albert network: '+str(len(BA_network.edges)))


### b) Determining the order of node removal (2 pts)

We will remove nodes in three ways: i) in increasing order of degree, from lowest to highest, ii) in decreasing order of degree, from highest to lowest, and iii) entirely randomly. Let us start by writing code for ordering the nodes. Please complete the function below, and then test it using the code block further down. Then answer the MyCourses quiz. 

In [None]:

def order_of_removal(G, order='increasing'):

    '''Inputs: 
                G=nx.Graph() object
                order='increasing','decreasing','random'
    
       Returns a list containing the nodes of graph G ordered as follows:
       order='increasing' — from the lowest to the highest degree
       order='decreasing' — from the highest to the lowest degree
       order='random' — in a random order'''

    node_order=[]
    nodes = list(G.degree()) # this is a list of tuples [(node_i,degree_i),(node_j,degree_j),...]
    
    if order=='increasing':

        ## TODO add the nodes to the list node_order from lowest to highest degree
        ## hint: use sorted() followed by a list comprehension.

        # YOUR CODE HERE
        nodes = sorted(nodes, key=lambda x: x[1])
        node_order = [node[0] for node in nodes]

    elif order=='decreasing':

        ## TODO add the nodes to the list node_order from highest to lowest degree
        ## hint: use sorted() followed by a list comprehension.

        # YOUR CODE HERE
        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)
        node_order = [node[0] for node in nodes]

    elif order=='random':

        ## TODO add the nodes to the list node_order in random order
        ## TODO hint: you can use rng.permutation()
        # YOUR CODE HERE
        node_order = rng.permutation(nodes)
        node_order = [node[0] for node in node_order]

    else:

        print("Order not recognized, use 'increasing','decreasing',or 'random'")
        return None

    return node_order
    

In [None]:
# NO NEED TO TOUCH THIS

def orders_to_degrees(G,order):
    '''Reads in a network 'G' and a node order 'order'
       Returns a list of node degrees in the same order'''
    degrees=[]
    for node in order:
        degrees.append(G.degree(node))
    return degrees

In [None]:
BA_low_to_high=order_of_removal(BA_network,order='increasing')
BA_high_to_low=order_of_removal(BA_network,order='decreasing')
BA_random=order_of_removal(BA_network,order='random')

ER_low_to_high=order_of_removal(ER_network,order='increasing')
ER_high_to_low=order_of_removal(ER_network,order='decreasing')
ER_random=order_of_removal(ER_network,order='random')

# get the degrees of nodes following these orderings and plot them

degrees_BA_low_to_high=orders_to_degrees(BA_network,BA_low_to_high)
degrees_BA_high_to_low=orders_to_degrees(BA_network,BA_high_to_low)
degrees_BA_random=orders_to_degrees(BA_network,BA_random)

degrees_ER_low_to_high=orders_to_degrees(ER_network,ER_low_to_high)
degrees_ER_high_to_low=orders_to_degrees(ER_network,ER_high_to_low)
degrees_ER_random=orders_to_degrees(ER_network,ER_random)

fig=plt.figure(figsize=(10,5),layout='constrained')
ax=fig.add_subplot(1,2,1)

ax.plot(degrees_BA_random,marker='.',linestyle='none',color='#ff7f0e',label='BA random')
ax.plot(degrees_BA_low_to_high,marker='.',linestyle='none',color='#1f77b4',label='BA low to high')
ax.plot(degrees_BA_high_to_low,marker='.',linestyle='none',color='#d62728',label='BA high to low')
ax.set_xlabel('node')
ax.set_ylabel('degree')
ax.legend(loc=0)

ax=fig.add_subplot(1,2,2)

ax.plot(degrees_ER_random,marker='.',linestyle='none',color='#ff7f0e',label='ER random')
ax.plot(degrees_ER_low_to_high,marker='.',linestyle='none',color='#1f77b4',label='ER low to high')
ax.plot(degrees_ER_high_to_low,marker='.',linestyle='none',color='#d62728',label='ER high to low')
ax.set_xlabel('node')
ax.set_ylabel('degree')
ax.legend(loc=0)

In [None]:
path='./' # replace with your own
fig.savefig(path+'BA_ER_degree_orders.pdf')

### c) LCC size vs removed nodes (3 pts)

Next, we'll use the node orders generated above to remove nodes from our networks one by one, while keeping track of the largest connected component size. First, complete the code below to produce the two requested lists (fraction of nodes removed, LCC size). Then compute these lists for both networks and different node removal orders using the code block further below, and answer the questions in MyCourses.

In [None]:
def LCC_sizes_sequential_node_removal(original_G, order_removal):
    """
    Calculates the size of the largest component under a sequential node removal process.

    Parameters
    ----------
    original_G : networkx.Graph
        The original network
    order_removal : list of int
        Nodes sorted in the order in which they will be removed

    Returns
    -------
    lc_sizes : list of int
        The size of the largest component under sequential node removal process.
        Sampled every time additional 1% of the original nodes are removed.
        lc_sizes[0] is the size of the largest component of the original network.
    """
    G = original_G.copy()
    
    LCC_fractions=[] # this list will contain fractions of nodes removed
    LCC_sizes =[] # this list will contain corresponding LCC sizes

  
    # TODO: Loop over nodes following the order defined in the input variable 'order_removal'
    # Calculate the size of the largest connected component for 1%, 2%, ... 100% of nodes removed
    # (E.g., by keeping track of how many nodes have been removed and computing the entry for LCC size 
    # only whenever the next percentage is reached)
    
    # YOUR CODE HERE
    N = G.number_of_nodes()

    for i in range(N):
        G.remove_node(order_removal[i])
        if i % (N//100) == 0:
            LCC_fractions.append(i/N)
            LCC_sizes.append(calc_largest_component_size(G))
    
    return LCC_fractions,LCC_sizes

In [None]:

## TODO: for both of your networks and all three orderings (high to low, low to high, random),
## compute two lists: LCC_network_order_fractions that contains fractions of nodes removed, and
## LCC_network_order_sizes that contains the corresponding network sizes. Use the function defined above.

LCC_BA_high_to_low_fractions=[] # fill in all these lists
LCC_BA_high_to_low_sizes=[]
LCC_BA_low_to_high_fractions=[]
LCC_BA_low_to_high_sizes=[]
LCC_BA_random_fractions=[]
LCC_BA_random_sizes=[]

LCC_ER_high_to_low_fractions=[]
LCC_ER_high_to_low_sizes=[]
LCC_ER_low_to_high_fractions=[]
LCC_ER_low_to_high_sizes=[]
LCC_ER_random_fractions=[]
LCC_ER_random_sizes=[]

# YOUR CODE HERE
LCC_BA_high_to_low_fractions, LCC_BA_high_to_low_sizes = LCC_sizes_sequential_node_removal(BA_network, BA_high_to_low)
LCC_BA_low_to_high_fractions, LCC_BA_low_to_high_sizes = LCC_sizes_sequential_node_removal(BA_network, BA_low_to_high)
LCC_BA_random_fractions, LCC_BA_random_sizes = LCC_sizes_sequential_node_removal(BA_network, BA_random)

LCC_ER_high_to_low_fractions, LCC_ER_high_to_low_sizes = LCC_sizes_sequential_node_removal(ER_network, ER_high_to_low)
LCC_ER_low_to_high_fractions, LCC_ER_low_to_high_sizes = LCC_sizes_sequential_node_removal(ER_network, ER_low_to_high)
LCC_ER_random_fractions, LCC_ER_random_sizes = LCC_sizes_sequential_node_removal(ER_network, ER_random)

fig=plt.figure(figsize=(10,5),layout='constrained')
ax=fig.add_subplot(1,2,1)

ax.plot(LCC_BA_random_fractions,LCC_BA_random_sizes,marker='.',linestyle='none',color='#ff7f0e',label='BA random')
ax.plot(LCC_BA_low_to_high_fractions,LCC_BA_low_to_high_sizes,marker='.',linestyle='none',color='#1f77b4',label='BA low to high')
ax.plot(LCC_BA_high_to_low_fractions,LCC_BA_high_to_low_sizes,marker='.',linestyle='none',color='#d62728',label='BA high to low')
ax.set_xlabel('fraction of nodes removed')
ax.set_ylabel('LCC size')
ax.legend(loc=0)

ax=fig.add_subplot(1,2,2)

ax.plot(LCC_ER_random_fractions,LCC_ER_random_sizes,marker='.',linestyle='none',color='#ff7f0e',label='ER random')
ax.plot(LCC_ER_low_to_high_fractions,LCC_ER_low_to_high_sizes,marker='.',linestyle='none',color='#1f77b4',label='ER low to high')
ax.plot(LCC_ER_high_to_low_fractions,LCC_ER_high_to_low_sizes,marker='.',linestyle='none',color='#d62728',label='ER high to low')
ax.set_xlabel('fraction of nodes removed')
ax.set_ylabel('LCC size')
ax.legend(loc=0);

In [None]:
path="./" # replace
fig.savefig(path+'error_attack_tolerance_models.pdf')

### d) Error and attack tolerance of a real-world network (2 pts)

Finally, let's check how the different node removal strategies work with a real-world network. This network is from a Facebook-like online service (source: https://toreopsahl.com/datasets/). First, load the network and visualize it. 

In [None]:
# Select data directory
import os
if os.path.isdir('/coursedata'):
    course_data_dir = '/coursedata'
elif os.path.isdir('../data'):
    course_data_dir = '../data'
else:
    # Specify course_data_dir on your machine
    course_data_dir = '.'
    # YOUR CODE HERE
    #raise NotImplementedError()

print('The data directory is %s' % course_data_dir)
network_path = os.path.join(course_data_dir, 'OClinks_w_undir.edg')
# Load the network
FBnet_raw = nx.read_weighted_edgelist(network_path, nodetype=int)
print('The number of nodes in the network is: {}'.format(FBnet_raw.number_of_nodes()))

In [None]:
FBnet=FBnet_raw.subgraph(max(nx.connected_components(FBnet_raw), key=len)).copy() # starting with the LCC of the original
fig, ax = plt.subplots(figsize=(8, 8))
nx.draw(FBnet, node_size=8, ax=ax)

Next, we'll do the same as above: generate node orders from high to low degree, low to high degree, and randomly. Then, we'll reuse the code above to compute the LCC size. 

In [None]:
FBnet_low_to_high=[]
FBnet_high_to_low=[]
FBnet_random=[]

## TODO fill the above lists with node orders, recycling your code from above

# YOUR CODE HERE
FBnet_low_to_high = order_of_removal(FBnet, order='increasing')
FBnet_high_to_low = order_of_removal(FBnet, order='decreasing')
FBnet_random = order_of_removal(FBnet, order='random')

## TODO then generate the lists for fractions of nodes removed and LCC sizes

LCC_FBnet_high_to_low_fractions,LCC_FBnet_high_to_low_sizes=[],[]
LCC_FBnet_low_to_high_fractions,LCC_FBnet_low_to_high_sizes=[],[]
LCC_FBnet_random_fractions,LCC_FBnet_random_sizes=[],[]

# YOUR CODE HERE
LCC_FBnet_high_to_low_fractions, LCC_FBnet_high_to_low_sizes = LCC_sizes_sequential_node_removal(FBnet, FBnet_high_to_low)
LCC_FBnet_low_to_high_fractions, LCC_FBnet_low_to_high_sizes = LCC_sizes_sequential_node_removal(FBnet, FBnet_low_to_high)
LCC_FBnet_random_fractions, LCC_FBnet_random_sizes = LCC_sizes_sequential_node_removal(FBnet, FBnet_random)

fig=plt.figure(figsize=(6,5),layout='constrained')
ax=fig.add_subplot(1,1,1)

ax.plot(LCC_FBnet_random_fractions,LCC_FBnet_random_sizes,marker='.',linestyle='none',color='#ff7f0e',label='FBnet random')
ax.plot(LCC_FBnet_low_to_high_fractions,LCC_FBnet_low_to_high_sizes,marker='.',linestyle='none',color='#1f77b4',label='FBnet low to high')
ax.plot(LCC_FBnet_high_to_low_fractions,LCC_FBnet_high_to_low_sizes,marker='.',linestyle='none',color='#d62728',label='FBnet high to low')
ax.set_xlabel('fraction of nodes removed')
ax.set_xticks(np.arange(0,1.1,0.1))
ax.set_ylabel('LCC size')
ax.legend(loc=0)

In [None]:
path="./" # replace with your own
fig.savefig(path+'error_attack_tolerance_fblike.pdf')