# Lab3 (Teacher version): robustness of a graph 

We can use the following libraries.

In [None]:
import matplotlib.pyplot as plt
import math
import sys
from random import *
from collections import deque
import copy, random
print(sys.version)

In this lab session, we investigate the notion of robustness of a graph: how well a graph remains connected when nodes disappear following random failures or degree-based failures.

## Exercise 1: preliminary work

### Question 1

Using the code seen in previous labs, load the following graph as a dictonary of lists:

http://lioneltabourier.fr/documents/inet.txt


In [None]:
def remove_loop_dupes(graph):
    for node in graph:
        graph[node] = list(dict.fromkeys(graph[node]))
        try:
            graph[node].remove(node)
        except ValueError:
            pass

def graph_from_file(file_name):
    graph = {}
    with open(file_name, "r") as graph_file:
        for line in graph_file:
            try:
                node1, node2 = [int(node) for node in line.split()]
                if node1 not in graph:
                    graph[node1] = []
                graph[node1].append(node2)
                if node2 not in graph:
                    graph[node2]= []
                graph[node2].append(node1)
            except:
                pass
    remove_loop_dupes(graph)
    return graph


def graph_to_file(graph, file_name):
    with open(file_name, "w") as graph_file:
        for node1 in graph:
            for node2 in graph[node1]:
                graph_file.write("{} {}\n".format(node1, node2))
def count_links(graph):
    link_count = 0
    for node in graph:
        link_count += len(graph[node])
    return link_count
def compute_degree_dist(graph):
    degree_dist = {}
    for node in graph:
        degree = len(graph[node])
        if degree not in degree_dist:
            degree_dist[degree] = 0
        degree_dist[degree] += 1
    return degree_dist

### Question 2

Determine the size of the largest connected component (LCC) of a graph, and use the code to determine the size of the LCC of the example graph.

Suggested implementation:

- Create a function that takes a graph as input and outputs a dictionary of the connected component that each node belongs to. (This function is derived from a BFS).

- Then, create another function which takes the dictionary of the connected component as input and computes the size of the largest connected component of the graph.


In [None]:
def bfs(graph, node_start):
    queue = [node_start]
    marked = [node_start]
    while queue:
        node1 = queue.pop(0)
        for node2 in graph[node1]:
            if node2 not in marked:
                queue.append(node2)
                marked.append(node2)
    return marked

def compute_size_lcc(graph):
    nodes_cc_index = {}
    cc_index = 0
    cc_sizes = []
    for node in graph:
        nodes_cc_index[node] = -1
    for node in graph:
        if nodes_cc_index[node] == -1:
            cc = bfs(graph, node)
            cc_sizes.append(len(cc))
            for node_marked in cc:
                nodes_cc_index[node_marked] = cc_index
            cc_index += 1
    return max(cc_sizes)
    

## Exercise 2: robustness to random failures

### Question 3

In this question, we plot the size of the LCC as a function of the number of nodes which removed. This is a way to evaluate the robustness of the network to random failures.

Suggested implementation:

- create a function that deletes $n_s$ nodes from the original graph

- use the function of question 2 to compute the size of the LCC

- combine these two functions and iterate to get a dictionary which keys are $n_s$ and values are the corresponding size of the LCC



In [None]:
def remove_nodes(graph, nodes_deleted):
    tmp = copy.deepcopy(graph)
    for node1 in nodes_deleted:
        for node2 in tmp[node1]:

            tmp[node2].remove(node1)
        tmp.pop(node1)

    return tmp

def random_failure(graph, max_deleted=8000, step=100):
    robust_dic = {}
    for n in range(0, max_deleted, step):
        nodes = list(graph.keys())
        random.shuffle(nodes)
        tmp = remove_nodes(graph, nodes[0:n])
        lcc_size = compute_size_lcc(tmp)
        robust_dic[n] = lcc_size
    return robust_dic
        
            

In [None]:
inet_graph = graph_from_file("res/inet.txt")
random_failure(inet_graph)

## Exercise 3: robustness to targeted (degree-based) failures 

### Question 4

In this question, we do the same as in the previous question, except for the fact that nodes are not chosen randomly, but by decreasing degree order.

Suggested implementation:

- create a function that outputs a list of nodes ordered by decreasing degree

- then follow the same principle as in the previous question

In [None]:
def order_nodes(graph):
    nodes = list(graph.keys())
    node_degree = {}
    for node in graph:
        node_degree[node] = len(graph[node])
    return [node for node, degree in sorted(node_degree.items(), key=lambda x : x[1], reverse=True)]

def targeted_failure(graph, max_deleted=8000, step=100):
    robust_dic = {}
    for n in range(0, max_deleted, step):
        nodes =  order_nodes(graph)
        tmp = remove_nodes(graph, nodes[0:n])
        lcc_size = compute_size_lcc(tmp)
        robust_dic[n] = lcc_size
    return robust_dic

In [None]:
targeted_failure(inet_graph)

### Question 5

Compare the two curves (random deletions and targeted deletions): are they different? What does it mean?

In [None]:
random_dic = random_failure(inet_graph)
targeted_dic = targeted_failure(inet_graph)
x_random, y_random = random_dic.keys(), random_dic.values()
x_target, y_target = targeted_dic.keys(), targeted_dic.values()
plt.plot(x_random, y_random)
plt.plot(x_target, y_target)