# Lab3 (Teacher version): robustness of a graph 

We can use the following libraries.

In [None]:
import matplotlib.pyplot as plt
import math
import sys
import json
from random import *
from collections import deque


In this lab session, we investigate the notion of robustness of a graph: how well a graph remains connected when nodes disappear following random failures or degree-based failures.

## Exercise 1: preliminary work

### Question 1

Using the code seen in previous labs, load the following graph as a dictonary of lists:

http://lioneltabourier.fr/documents/inet.txt


In [None]:
def load_clean_graph(input_name, output_name = None):
    '''
    load_clean_graph(input_name, output_name = None)
    
    Delete self-loops and duplicated edges existing in the graph and writes it in a new test file"
        Parameters:
            input_name (string) : name of the file storing the graph with self loops and duplicated edges
            output_name (string) (optional) : name of the file that will store the graph without self 
            loops and duplicated edges. If no output is defined the new graph will not be stored in a file
        Returns:
            my_graph (dictionary of lists) : the graph without self loops and duplicated edges
    '''
    my_graph = {}
    if output_name is not None:
        output_file = open(output_name, "w")
    with open(input_name, "r") as input_file:
        for line in input_file:

            if line[0] != "#":
                line = line.split()
                node1 = int(line[0])
                node2 = int(line[1])
                if node1 != node2:
                    if node1 in my_graph:
                        if node2 not in my_graph[node1]:
                            my_graph[node1].append(node2)
                            if output_name is not None:
                                output_file.write(f"{node1} {node2}\n")
                    else:
                        my_graph[node1] = [node2]
                    if node2 in my_graph:
                        if node1 not in my_graph[node2]:
                            my_graph[node2].append(node1)
                            if output_name is not None:
                                output_file.write(f"{node2} {node1}\n")
                    else:
                        my_graph[node2] = [node1]
    if output_name is not None:
        output_file.close()
    return my_graph

In [None]:
inet = "graphs/inet.txt"
if __name__ == '__main__'  and '__file__' not in globals():
    inet_graph = load_clean_graph(inet)
    print(json.dumps(inet_graph, indent = 4))

### Question 2

Determine the size of the largest connected component (LCC) of a graph, and use the code to determine the size of the LCC of the example graph.

Suggested implementation:

- Create a function that takes a graph as input and outputs a dictionary of the connected component that each node belongs to. (This function is derived from a BFS).

- Then, create another function which takes the dictionary of the connected component as input and computes the size of the largest connected component of the graph.


In [None]:
def bfs(my_graph):
    '''
    bfs(my_graph)
    
    Evaluate the size of the largest connect component of the graph 'my_graph' by going through all nodes of the 
    graph and identifying the connected component that contains the node
        Parameters:
            my_graph (dictionary of lists)
        Returns:
            my_lcc : the size of the largest connected component of 'my_graph'
    '''
    my_cc = {}
    cc_index = 0
    for source in my_graph.keys():
        if source not in my_cc:
            my_queue = [source]
            marked_node = [source]
            while my_queue:
                node1 = my_queue.pop(0)
                my_cc[node1] = cc_index
                for node2 in my_graph[node1]:
                    if node2 not in marked_node:
                        my_queue.append(node2)
                        marked_node.append(node2)
            cc_index += 1
    
    cc_sizes = {}
    for node in my_cc:
        cc_index = my_cc[node]
        if cc_index in cc_sizes:
            cc_sizes[cc_index] += 1
        else:
            cc_sizes[cc_index] = 1
    return max(list(cc_sizes.values()))
if __name__ == '__main__'  and '__file__' not in globals():
    lcc_size = bfs(inet_graph)
    print(f"Size of largest connected component in the graph 'inet' : {lcc_size}")

## Exercise 2: robustness to random failures

### Question 3

In this question, we plot the size of the LCC as a function of the number of nodes which removed. This is a way to evaluate the robustness of the network to random failures.

Suggested implementation:

- create a function that deletes $n_s$ nodes from the original graph

- use the function of question 2 to compute the size of the LCC

- combine these two functions and iterate to get a dictionary which keys are $n_s$ and values are the corresponding size of the LCC



In [None]:
def delete_node(my_graph, nodes):
    '''
    delete_node(my_graph, nodes)
    
    Deletes the nodes 'nodes' from the graph 'my_graph'
        Parameters:
            my_graph (dictionary of lists)
            nodes (list) : list of nodes to be deleted
        Return:
        
    '''
    for node1 in nodes:
        for node2 in my_graph[node1]:
            my_graph[node2].remove(node1)
            #TODO catch exception if remove fail
        my_graph.pop(node1, None)
        #TODO raise exception if pop fail => return None
def random_failure(my_graph, limit, step):
    '''
    random_failure(my_graph, limit, step)
    
    Return the evolution of the largest connected component size according to the number of nodes that are 
    deleted from the graph randomly with at most 'limit' nodes deleted with a step of 'step'
        Parameters:
            my_graph (dictionary of lists)
            limit (int) : the total number of nodes that will be deleted
            step (int) : number of nodes to be deleted at each step
        Returns
            my_data (dictionary) : dictionary which key are the numbers of nodes removed and the value the
            size of the largest connected component when the corresponding number of nodes have been deleted 
            from the graph
    
    '''
    my_data = {}
    nodes_left = list(my_graph.keys())
    for key in range(step, limit, step):
        # generation of the nodes to be deleted from the graph
        to_delete = []
        for i in range(step):
            to_delete.append(nodes_left.pop(randint(0, len(nodes_left) - 1)))
        delete_node(my_graph, to_delete)
        my_data[key] = bfs(my_graph)
    return my_data
    
    

In [None]:
if __name__ == '__main__'  and '__file__' not in globals():
    inet_graph = load_clean_graph(inet)
    my_data = random_failure(inet_graph, 100, 10)
    print(json.dumps(my_data, indent = 4))


## Exercise 3: robustness to targeted (degree-based) failures 

### Question 4

In this question, we do the same as in the previous question, except for the fact that nodes are not chosen randomly, but by decreasing degree order.

Suggested implementation:

- create a function that outputs a list of nodes ordered by decreasing degree

- then follow the same principle as in the previous question

In [None]:
def create_order_nodes(my_graph):
    '''
    create_order_nodes(my_graph)
     
    Returns a list of nodes ordered by decreasing degree
        Parameters:
            my_graph (dictionary of lists)
        Return
            nodes_ordered (list of int): list of the nodes sorted by decreasing degree
    '''
    my_degrees = {}
    for node in my_graph:
        my_degrees[node] = len(my_graph[node])
    my_degrees = sorted(my_degrees.items(), key=lambda item: item[1], reverse = True)
    return [node for node, degree in my_degrees]

def target_failure(my_graph, limit, step):
    '''
    target_failure(my_graph, limit, step)
    
    Return the evolution of the largest connected component size according to the number of nodes that are 
    deleted from the graph by decreasing degree
        Parameters:
            my_graph (dictionary of lists)
            limit (int) : the total number of nodes that will be deleted
            step (int) : number of nodes to be deleted at each step
        Returns
            my_data (dictionary) : dictionary which key are the numbers of nodes removed and the value the size 
            of the largest connected component when the corresponding number of nodes have been deleted from 
            the graph
    '''
    my_data = {}
    nodes_left = create_order_nodes(my_graph)
    for key in range(step, limit, step):
        # generation of the nodes to be deleted from the graph
        to_delete = []
        for i in range(step):
            to_delete.append(nodes_left.pop(0))
        #print(to_delete)
        delete_node(my_graph, to_delete)
        my_data[key] = bfs(my_graph)
    return my_data

In [None]:
if __name__ == '__main__'  and '__file__' not in globals():
    inet_graph = load_clean_graph(inet)
    my_data = target_failure(inet_graph, 5, 1)
    print(json.dumps(my_data, indent = 4))
    

### Question 5

Compare the two curves (random deletions and targeted deletions): are they different? What does it mean?

In [None]:
def plot_curves(file_name, limit, step):
    '''
    plot_curves(file_name, limit, step)
    
    Plot the evolution of the largest connected component of the graph stored in 'file_name' according to the nodes
    that are deleted which is at most 'limit' and with a sampling rate fo 'step' nodes, two plots will be printed,
    a red curve representing the evolution with random deletions and a blue curve representing the evolution with
    deletion of nodes by decreasing degree.
        Parameters :
            file_name (string) : the name of the file that contain the graph
            limit (int) : the upper bound limit of nodes deletion
            step (int) : the sampling rate i.e how many nodes are deleted at each step
        Returns:
    '''
    inet_graph = load_clean_graph(file_name)
    dic_target = target_failure(inet_graph, limit,step)
    inet_graph = load_clean_graph(file_name)
    dic_random = random_failure(inet_graph, limit,step)
    x, y = dic_target.keys(), dic_target.values()
    x1, y1 = dic_random.keys(), dic_random.values()
    plt.plot(x1,y1, color='red') 
    plt.plot(x,y, color='blue')
    plt.show()

In [None]:
if __name__ == '__main__'  and '__file__' not in globals():
    plot_curves(inet, 8000,100)