# Lab4-5 (Student version): standard graph models

We can use the following libraries.

In [None]:
import matplotlib.pyplot as plt
import math
import sys
import random
import json

This lab work will spread over sessions 4 and 5. 

Session 4 should focus on making sure that the codes of previous sessions work correctly (ex.1) and on testing them on an Erdös-Rényi model (ex.2).

Session 5 should focus on the two other models (ex.3 and ex.4).

## Exercise 1: Preliminary work

### Question 1

Download the graph http://lioneltabourier.fr/documents/as_caida.txt and load it in memory as a dictionary of lists (as usual). This graph is a partial map of the Internet at the AS level as obtained using BGP tables during the CAIDA project in 2007. It will be used during the rest of this practical work. 

Apply the codes seen in the previous labs to:
- count its number of nodes and links, 
- plot its degree distribution,
- compute its number of triangles,
- give an approximation of its diameter.


In [None]:
def load_clean_graph(input_name, output_name = None):
    '''
    load_clean_graph(input_name, output_name = None)
    
    Delete self-loops and duplicated edges existing in the graph and writes it in a new test file"
        Parameters:
            input_name (string) : name of the file storing the graph with self loops and duplicated edges
            output_name (string) (optional) : name of the file that will store the graph without self 
            loops and duplicated edges. If no output is defined the new graph will not be stored in a file
        Returns:
            my_graph (dictionary of lists) : the graph without self loops and duplicated edges
    '''
    my_graph = {}
    if output_name is not None:
        output_file = open(output_name, "w")
    with open(input_name, "r") as input_file:
        for line in input_file:

            if line[0] != "#":
                line = line.split()
                node1 = int(line[0])
                node2 = int(line[1])
                if node1 != node2:
                    if node1 in my_graph:
                        if node2 not in my_graph[node1]:
                            my_graph[node1].append(node2)
                            if output_name is not None:
                                output_file.write(f"{node1} {node2}\n")
                    else:
                        my_graph[node1] = [node2]
                    if node2 in my_graph:
                        if node1 not in my_graph[node2]:
                            my_graph[node2].append(node1)
                            if output_name is not None:
                                output_file.write(f"{node2} {node1}\n")
                    else:
                        my_graph[node2] = [node1]
    if output_name is not None:
        output_file.close()
    return my_graph
    
def node_link(file_name):
    '''
    node_link(file_name)
    
    Returns the number of edges and the number of nodes in the graph stored in 'file_name'
        Parameters:
            file_name(string) : the name of the file storing the graph
        Returns:
            node_count, link_count (int, int) : a tuple containing the number of nodes and the number of links in 
            the graph
    '''
    node_set = set()
    node_count = 0
    link_count = 0
    with open(file_name, "r") as my_file:
        for line in my_file:
            if line[0] != "#": # supposing that comments start with a '#' symbol
                line = line.split() # supposing a space between two nodes
                node1 = int(line[0]) # supposing that nodes are numbers formated in file
                node2 = int(line[1])
                link_count += 1 # if a same link appears several times, it will be counted as many times
                if node1 not in node_set:
                    node_set.add(node1)
                    node_count += 1
                if node2 not in node_set:
                    node_set.add(node2)
                    node_count +=1  
    return node_count, link_count

def degree_dist(my_graph):
    '''
    degree_dist(my_graph)
    
    Computes the degree distribution of a graph
        Parameters:
            my_graph (dictionary of lists)
        Returns:
            my_degree_dist (dictionary) : a dictionary wich key are the degrees that appear in the graph and 
            the values are the number of occurences
    '''
    my_degree_dist = {}
    for node in my_graph:
        node_degree = len(my_graph[node])
        if node_degree in my_degree_dist:
            my_degree_dist[node_degree] += 1
        else:
            my_degree_dist[node_degree] = 1
    return my_degree_dist

def plot_degree_dist(my_graph, log = True, limits = (0.5, 10000, 0.5, 10000), my_legend = "Curve"):
    '''
    plot_degree_dist(my_graph)
    
    Plot the degree distribution in log scale
        Parameters:
            my_graph (dictionary of lists)
            log (boolean) (default = True): Plotting the graph with a axis in log scale
            limits (int, int, int, int) (default = (0.5,10000,0.5,10000)) : the limits of the plot with the following format (xmin, xmax, ymin, ymax)
        Returns
    '''
    my_degree_dist = degree_dist(my_graph)
    xmin, xmax, ymin, ymax = limits
    if log:
        plt.xscale('log')
        plt.yscale('log')
        plt.xlim([xmin, xmax])
        plt.ylim([ymin, ymax])
    s = plt.scatter(my_degree_dist.keys(), my_degree_dist.values(), label = my_legend)
    plt.legend()

def triangle(my_graph):
    '''
    triangle(my_graph)
    
    Returns the number of triangles in the graph 'my_graph'
        Parameters:
            my_graph (dictionary of lists)
        Returns:
            triangle_count (int) : the number of triangles in the graph
    '''
    triangle_count = 0
    for node1 in my_graph:
        for node2 in my_graph[node1]:
            if node1 < node2:
                for node3 in my_graph[node1]:
                    if node3 in my_graph[node2]: # node3 in N(node1) and N(node2)
                        if node2 < node3:
                            triangle_count += 1
    return triangle_count

def distances(my_graph, source_node):
    '''
    distances(my_graph, source_node)
    
    Returns the distances of each node to the source node as a dictionary
        Parameters:
            my_graph (dictionary of lists)
            source_node (int) : the node from which we will compute the distances
        Returns:
            my_distances : A dictionary which keys are the nodes and the values are the distances from the 
            key to 'source_node'
    '''
    my_queue = [source_node]
    my_distances = {}
    for node in my_graph:
        my_distances[node] = -1
    my_distances[source_node] = 0
    while my_queue:
        node1 = my_queue.pop(0)
        for node2 in my_graph[node1]:
            if my_distances[node2] == -1:
                my_queue.append(node2)
                my_distances[node2] = my_distances[node1] + 1
    return my_distances

def diameter(my_graph, sample_size):
    '''
    diameter(my_graph, sample_size)
    
    Compute an approximate diameter of the  graph 'my_graph' by running a bfs algorithm on a sample of nodes 
    and taking the max of the distances
        Parameters:
            my_graph (dictionary of lists)
            sample_size (int) : the number of nodes in the graph that will be used to find the diameter
        Returns:
            my_diameter (int) : the approximate maximum distance in the graph
    '''
    my_diameter = -1
    nodes = list(my_graph.keys())
    for i in range(sample_size):
        my_distances = distances(my_graph, nodes.pop(random.randint(0,len(nodes) - 1)))
        my_diameter = max(my_diameter, max(list(my_distances.values())))
    return my_diameter

def add_link(my_link, my_graph):
    '''
    add_link((node1, node2), my_graph)
    
    Add the link ('node1', 'node2') in the graph 'my_graph'
        Parameters:
            (node1, node2) (int, int) : the link  that will be added
            my_graph (dictionary of lists) : the graph that will contain the link
        Returns:    
    '''
    node1, node2 = my_link
    if node1 in my_graph:
        my_graph[node1].append(node2)
    else:
        my_graph[node1] = [node2]
    if node2 in my_graph:
        my_graph[node2].append(node1)
    else:
        my_graph[node2] = [node1]
    return

caida = 'graphs/as_caida.txt'

In [None]:
if __name__ == '__main__'  and '__file__' not in globals():
    caida_graph = load_clean_graph(caida)
    count_node, count_link = node_link(caida)
    print(f"Caida \n\tNodes : {count_node}, Links : {count_link}")
    plot_degree_dist(caida_graph, log = True)
    print(f"\tTriangles : {triangle(caida_graph)}")
    print(f"\tDiameter : {diameter(caida_graph,100)}")
    
    

## Exercise 2: Erdös-Rényi model

### Question 2

Create an Erdös-Rényi graph with the same number of nodes and links as the original graph.

In [None]:
def erdos(node_count, link_count):
    '''
    erdos(node_count, link_count)
    
    Generates a graph with 'node_count' nodes and 'link_count' links
        Parameters:
            node_count (int)
            link_count (int)
        Returns:
            my_graph (dictionary of lists) : 
    '''
    my_graph = {}
    for node in range(node_count):
        my_graph[node] = []
    current_link_count = 0
    while current_link_count <= link_count:
        node1 = random.randint(0, node_count -1)
        node2 = random.randint(0, node_count -1)
        link_exists = node1 in my_graph and node2 in my_graph
        link_exists = link_exists and node1 in my_graph[node2] # node 1 is a neighbour of node 2
        link_exists = link_exists and node2 in my_graph[node1] # node 2 is a neighbour of node 1
        if not link_exists:
            add_link((node1, node2), my_graph)
            current_link_count += 1
    return my_graph

In [None]:
erdos_graph = erdos(count_node,count_link)

print(json.dumps(erdos_graph , indent = 4))

### Question 3

Compare its degree distribution, its number of triangles, its approximate diameter (of the largest component) to the one of the original graph.

In [None]:
print("Erdos")
plot_degree_dist(erdos_graph, log = False, my_legend = "erdos")
print(f"\tTriangles : {triangle(erdos_graph)}")
print(f"\tDiameter : {diameter(erdos_graph,100)}")

## Exercise 2: Barabasi-Albert model


### Question 4

Create a Barabasi-Albert graph with a number of links and nodes comparable to the original graph. We remind that in a BA model with $n$ nodes, the number of links $m$ is roughly equal to $\alpha n$ where $ \alpha $ is the parameter of the model. 

In [None]:
import copy
def sum_degree(my_graph):
    '''
    sum_degree(my_graph)
    
    Compute the sum of the degree of the nodes in the graph 'my_graph'
        Parameters:
            my_graph (dictionary of lists)
        Returns:
            my_sum_degree (int) : the sum of the degree of the nodes
    '''
    my_sum_degree = 0
    for node in my_graph:
        my_sum_degree += len(my_graph[node])
    return my_sum_degree
def barabasi(graph_size, my_graph, alpha):
    '''
    barabasi(graph_size, base_graph, alpha)
    
    Generates a Barabasi-Albert graph starting with the graph 'base_graph' and adding nodes until
    the graph's size equal to 'graph_size'. The added nodes should have degree equal to 'alpha'
        Parameters:
            graph_size (int) : the size of the graph that will be generated
            base_graph (dictionary of list): the graph that will be used as the base
            alpha (int): the degree of the nodes that will be added
        Returns
    '''
    base_graph = copy.deepcopy(my_graph)
    origin_size = len(base_graph)
    for new_node in range(origin_size + 1, graph_size + 1):
        
        print(f"\r Loading the barabasi graph : {new_node} / {graph_size}",end = "", flush = True)
        node_odd = []
        my_sum_degree = sum_degree(base_graph)
        cumul = 0
        for node in base_graph: # Compute the odds for each node to be linked to the new node
            cumul += len(base_graph[node]) / my_sum_degree
            node_odd.append((node, cumul))
        base_graph[new_node] = [] # New node added to graph
        degree_new_node = 0
        while degree_new_node < alpha:
            tmp = random.uniform(0,1)
            neighbour_node = None
            for (node, odd) in node_odd:
                if tmp < odd:
                    neighbour_node = node
                    break
            link_exist = neighbour_node in base_graph[new_node] and new_node in base_graph[neighbour_node] 
            if not link_exist:
                add_link((new_node, neighbour_node), base_graph)
                degree_new_node += 1
    return base_graph

In [None]:
if __name__ == '__main__' and '__file__' not in globals():
    my_erdos = erdos(100,100)
    print("Base of the graph generated")
    n_barabasi = 2647
    alpha = 2
    my_barabasi = barabasi(n_barabasi, my_erdos, alpha)
    print(json.dumps(my_barabasi, indent = 4))


### Question 5

Compare its degree distribution, its number of triangles, its approximate diameter (of the largest component) to the one of the original graph.

In [None]:
if __name__ == '__main__' and '__file__' not in globals():
    #print(json.dumps(degree_dist(my_barabasi), indent = 4))
    plot_degree_dist(my_barabasi, my_legend = "Barabasi")
    
    print(f"\t Triangles : {triangle(my_barabasi)}")
    print(f"\t Diameter : {diameter(my_barabasi, 100)}")
    

## Exercise 3: Watts-Strogatz model

### Question 6

Create a regular graph with a number of nodes $n$ equals to the one of the initial CAIDA graph. We have these constraints:

* all nodes of a regular graph have the same degree $k$, choose $k$ so that the number $m$ of edges is close to the one of the CAIDA graph,

* each node is connected to the nodes with the closest index, for example, if $k=6$, node $i$ will be connected to nodes $ i-1 $, $ i-2 $, $ i-3$ and $ i+1 $, $ i+2 $, $ i+3 $.  

In [None]:
def regular_graph(node_count, node_degree):
    '''
    Generates a regular graph with a number of nodes equal to 'node_count', each node have the same degree equal to
    'node_degree'
        Parameters:
            node_count (int): the number of nodes in the generated graph
            node_degree (int) : the degree of each nodes in the generated graph
        Returns:
            my_graph (dictionary of lists): a graph with 'node_count' nodes and each node have te same degree 'k'
    '''
    my_graph = {}
    for node in range(node_count):
        my_graph[node] = []
        for neighbour in range(- round(node_degree / 2) + node, round(node_degree / 2) + 1 + node):
            if neighbour != node:
                my_graph[node].append(neighbour % node_count)
    return my_graph
        

In [None]:
if __name__ == "__main__" and "__file__" not in globals():
    my_watts = regular_graph(10000, 4)
    print(json.dumps(my_watts, indent = 4))

### Question 7

Starting from the graph created in the previous question, generate Watts-Strogatz models with several values of the parameter $p$: 0.01, 0.1, 0.3.

In [None]:
import copy
def watts(base_graph, p):
    '''
    watts(base_graph, p)
    
    Generates a graph according to Watts-Strogatz model
        Parameters:
            base_graph (dictionary of lists)
            p (float)
        Returns
    '''
    new_graph = copy.deepcopy(base_graph)
    for node1 in base_graph:
        for node2 in base_graph[node1]: # going through all links in the graph
            if node1 < node2:
                #print(f"Processing {node1} {node2}")
                loop = True
                #multi_edge = False
                while loop:
                    ri_node = node1
                    rj_node = node2
                    ri = random.uniform(0,1)
                    rj = random.uniform(0,1)
                    if ri < p: 
                        ri_node = random.randint(0, len(base_graph) - 1)
                        while ri_node == node1:
                            ri_node = random.randint(0, len(base_graph) - 1)
                    if rj < p:
                        rj_node = random.randint(0, len(base_graph) - 1)
                        while rj_node == node1:
                            rj_node = random.randint(0, len(base_graph) - 1)
                    loop = ri_node == rj_node
                    multi_edge = (ri_node != node1 or ri_node != node2) and ri_node in new_graph[rj_node] and rj_node in new_graph[ri_node]
                new_graph[node1].remove(node2)
                new_graph[node2].remove(node1)
                #print(f"linking {ri_node} {rj_node}")
                add_link((ri_node, rj_node), new_graph)
    return new_graph   
    

In [None]:
if __name__ == "__main__" and "__file__" not in globals():
    my_watts = regular_graph(10000, 4)
    watts(my_watts, 0.01)
    print(json.dumps(my_watts, indent = 4))

### Question 8

Compare their degree distribution, their number of triangles, their approximate diameter (of the largest component) to the one of the original graph.

In [None]:
if __name__ == "__main__" and "__file__" not in globals():
    #p1 = 0.01; p2 = 0.1; p3 = 0.3; 
    node_count = 26450; k = 6; diameter_sample = 100
    probs = [0.01, 0.1, 0.3]
    my_regular = regular_graph(node_count, k)

    for p in probs:
        print(f"Model Watts p = {p}")
        my_watts = watts(my_regular, p)
        print(f"\tTriangle : {triangle(my_watts)}")
        print(f"\tDiameter : {diameter(my_watts,diameter_sample)}")
        plot_degree_dist(my_graph = my_watts, log = False, my_legend = f"p = {p}")
    
    
    #print(my_watts)