

# Solver for Problem 1 (Clique Clustering Model - G=(V,E))

### Ítalo Gomes Santana, Rafael Azevedo e Rodrigo Laigner

### Pontifical Catholic University of Rio de Janeiro (PUC-RIO) 2018.2



In [None]:

import numpy as np

import sys
import math
import random
import time as tm
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from gurobipy import *




### Loading and Initialization of the Input Graph for Problem 1 (Clique Clustering Model - G=(V,E)) 


In [None]:
# Read Instance
def read_graph(file_name):
    
    print("\nLoading instance from file: ", file_name)
    f = open(file_name,'r+')
    
    vmap = {}
    graph = nx.Graph()
    
    for line in f.readlines():
        
        # Ignore comment lines and blocks in the file.
        if line.startswith("#"):
            continue

        # Read a line containing two integers, representing the two vertices of an edge.
        item_data = line.split()
        if len(item_data) != 2:
            # Invalid line data: 
            continue

        # Get the first vertex of the edge
        i = int(item_data[0])
        # Get the second vertex of the edge
        j = int(item_data[1])

        # Map each read vertex to a position of the vmap array, setting a unique integer to each vertex.
        if i not in vmap:
            vmap[i] = len(vmap) + 1
        if j not in vmap:
            vmap[j] = len(vmap) + 1

        # Set the source vertex index of the edge as the one mapped to the minimum integer.
        # Set the sink vertex index of the edge as the one mapped to the maximum integer.
        source = min(vmap[i], vmap[j])
        sink   = max(vmap[i], vmap[j])
        
        # Compute edges ignoring loops because distance is 0.
        # Edges weights are set as 1 by default.
        if source != sink and (source,sink) not in graph.edges(): 
            graph.add_edge(source, sink, weight=1)

    #print(vmap)
    f.close()
    
    V = { index:vertex for vertex,index in vmap.items() }
    
    print("N vértices: ", len(V))
    print("M edges: ", len(graph.edges()))
    
    # FOR DEBUG PURPOSES ONLY.
    #print("\nVertices: \n {}\n".format(graph.nodes()))
    #print("\nEdges: \n {}\n".format(graph.edges()))
    #print("\nEdges with weights: \n {}\n".format(graph.edges(data='weight')))
    #print(vmap.keys())
    
    return len(V), len(graph.edges()), vmap, graph, V

In [None]:
def compute_distances(V,E):
    n = len(V)
    adjs = {}
    
    for v in V:
        adjs[v] = []
    
    # Compute the edge adjacency list for each vertex.
    for e in E:
        adjs[e[0]].append(e[1])
        adjs[e[1]].append(e[0])

    n = len(V)
    dist = {}

    # BFS to compute the minimum distance between each edge (v, s) for all vertices v, s of V. 
    for v in V:
        L=[v]
        dist[v,v]=0
        # While the frontier is not empty, continue BFS.
        while( len(L) > 0 ):
            c = L[0]
            L.remove(c)
            for s in adjs[c]:
                if ( (v,s) not in dist ):
                    dist[v,s] = dist[v,c] + 1
                    L.append(s)
    return dist

In [None]:
# Basically a reverse map of vertices of an edge.
def getEdgeMapping(source, sink, vmap):
    return (vmap[source],vmap[sink])

# Get the edge vertices integer indexed by a unique integer.
# Basically a reverse map of vertices of an edge for every edge in E.
def remap_edges(E,V):
    return  [ (V[i],V[j]) for i,j in E]

In [None]:
def visualizeGraph(graph, layout_attr='circular', color_map=None, scale=10, enable_edges=True, node_size=30):
    
    layout = None
    
    if(layout_attr=="bipartite"):
        # Position nodes in two straight lines.
        print("\nBipartite Graph: ")
        layout = nx.bipartite_layout(graph, scale=scale)
    elif(layout_attr=="circular"):
        # Position nodes on a circle.
        print("\Circular Graph: ")
        layout = nx.circular_layout(graph, scale=scale)
    elif(layout_attr=="kamada_kawai"):
        # Position nodes using Kamada-Kawai path-length cost-function.
        print("\Kamada-kawai Graph: ")
        layout = nx.kamada_kawai_layout(graph, scale=scale)
    elif(layout_attr=="random"):
        # Position nodes uniformly at random in the unit square.
        print("\Random Graph: ")
        layout = nx.random_layout(graph)
    elif(layout_attr=="rescale"):
        # Return scaled position array to (-scale, scale) in all axes.
        print("\Rescale Graph: ")
        layout = nx.rescale_layout(graph)
    elif(layout_attr=="shell"):
        # Position nodes in concentric circles.
        print("\Shell Graph: ")
        layout = nx.shell_layout(graph, scale=scale)
    elif(layout_attr=="spring"):
        #Position nodes using Fruchterman-Reingold force-directed algorithm.
        print("\Spring Graph: ")
        layout = nx.spring_layout(graph, scale=scale)
    elif(layout_attr=="spectral"):
        # Position nodes using the eigenvectors of the graph Laplacian.
        print("\nSpectral Graph: ")
        layout = nx.spectral_layout(graph, scale=scale)
    else:
        layout = nx.random_layout(graph, scale=scale)
    
    nx.draw_networkx_nodes(graph, layout)
    
    edge_labels = dict([((u,v,),d[ 'weight']) for u,v,d in graph.edges(data=True)])
    
    #nx.draw_networkx_labels(graph, layout, font_size=20, font_family='sans-serif')
    if(enable_edges):
        nx.draw_networkx_edges(graph, layout)
    plt.axis('off')
    
    plt.figure(3,figsize=(12,12))
    
    #nx.draw_networkx_edge_labels(graph, layout, edge_labels=edge_labels)
    
    if color_map != None:
        nx.draw(graph, layout, edge_cmap=plt.cm.Reds, node_size=node_size,font_size=8, node_color=color_map)
    else:
        nx.draw(graph, layout, edge_cmap=plt.cm.Reds, node_size=node_size,font_size=8)

    plt.show()

In [None]:
def loadGraphInstance(file_name):
    n, m, vmap, graph, V = read_graph(file_name)
    dist = compute_distances(list(V.keys()), graph.edges())
    
    # FOR DEBUG PURPOSES ONLY.
    #print("VERTICES:\n")
    #print(V)
    #print("VMAP:\n")
    #print(vmap)
    #print("EDGES:\n")
    #print(graph.edges())

    E = remap_edges(graph.edges(), V)

    # FOR DEBUG PURPOSES ONLY
    #print("REVERSE MAPPED EDGES:\n")
    #print(E)
    #print("MINIMUM DISTANCE BETWEEN VERTICES:\n")
    #print(dist)
    
    return n, m, V, E, vmap, graph, dist

In [None]:

# ######################################### FOR DEBUG PURPOSES ONLY #################################################

# n, m, V, E, vmap, graph, dist = loadGraphInstance("tvshow_edges_shorter.txt")

# print("\nEncoded N: ", n, "- Vertices:", graph.nodes())
# print("\nEncoded M: ", m, "- Edges:", graph.edges())

# print("\nN:", n, "- V:", V)
# print("\nM:", m, "- E:", E)

# scale = 1000
# en_edges = False

# print("\nCircular Layout: ")
# visualizeGraph(graph, layout_attr='circular', scale=scale, enable_edges=en_edges)
# print("\nKamada Kawai Layout: ")
# visualizeGraph(graph, layout_attr='kamada_kawai', scale=scale, enable_edges=en_edges)
# print("\nRandom Layout: ")
# visualizeGraph(graph, layout_attr='random', scale=scale, enable_edges=en_edges)
# print("\nShell Layout: ")
# visualizeGraph(graph, layout_attr='shell', scale=scale, enable_edges=en_edges)
# print("\nSpring Layout: ")
# visualizeGraph(graph, layout_attr='spring', scale=scale, enable_edges=en_edges)
# print("\nSpectral Layout: ")
# visualizeGraph(graph, layout_attr='spectral', scale=scale, enable_edges=en_edges)


## Formulação para o Problema 1 (Clique Clustering Model - G=(V,E))

$$
{\large
\begin{array}{rll}
\min  &  \sum\limits_{k=1}^{n} z_k &\\
s.t. & & \\
 & y_{vk} + y_{wk} \leq 1  &\quad  d(v,w) > H + 1 \quad \forall k= 1,\ldots, n \\
 & y_{vk} \leq z_k & \quad \forall v \in V \quad \forall k= 1,\ldots, n \> = \> |V| \\
 & \sum\limits_{k=1}^{n} y_{vk} = 1 & \quad \forall v \in V \\
 &  y_{vk} \in \{ 0,1 \} & \\
 &  z_{k} \in \{ 0,1 \} &
\end{array}
}
$$





### Variables for Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# Create y_vk variables which represent the belonging of vertex v to the cluster k.
# If vertex v belongs to cluster k, then y_vk == 1. Otherwise, y_vk == 0.
# Binary decision variable.

def create_yvk_vars(model, V):
    n = len(V)
    
    y_vk = {}
    
    # For each vertex v and cluster k, vertex v belongs to cluster k (y_vk == 1) or not (y_vk == 0).
    for k in range(1, n+1):
        for v in range(1, n+1):
            y_vk[v, k] = model.addVar(obj=0.0, vtype=GRB.BINARY, name
                                      ='y_vk'+'_'+str(v)+str(k))
    model.update()
    return y_vk


In [None]:
# Create z_k variables which represent the inclusion of cluster k to the set of clusters composing the solution.
# If cluster k belongs to the set of disjoint partitions of V, then z_k == 1. Otherwise, z_k == 0.
# If z_k == 0, then no vertex is allocated to the cluster k and cluster k is excluded from the solution.

def create_zk_vars(model, V):

    z_k = {}
    
    for k in range(1, len(V) + 1):
        z_k[k] = model.addVar(obj=1.0, vtype=GRB.BINARY, name='z'+'_'+str(k))
    model.update()
    return z_k





### Constraints for Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# Create conflict constraints which restricts a pair of vertices (v, w) to belong to the same cluster
# only if the minimum distance between these vertices is at most H, a given parameter.
# If the minimum distance between vertices v and w is greater than H, then v or w belongs to cluster k
# or v, w not in cluster k.

def create_conf_constraints(model, y_vk, V, dist, H):

    n = len(V)
    total = 0
    
    conf_constr = {}

    # y_vk[v, k] == 1 or y_vk[w, k] == 1 or (y_vk[v, k] == 0 and y_vk[w, k] == 0) for each (v, w) and cluster k. 
    for k in range(1, n+1):
        for v in range(1, n):
            for w in range(v+1, n+1):
                if (((v,w) not in dist) or (dist[v,w] > H)):
                    total = total + 1
                    constr_name = 'conf_constr_'+str(v)+'_'+str(w)+'_'+str(k)
                    conf_constr[v, w, k] = model.addConstr(y_vk[v, k] + y_vk[w, k] <= 1, name=constr_name)
    model.update()
    print("\nTotal conflict constraints created = ", total)
    return conf_constr


In [None]:
# Create upper bound constraints which restricts y_vk[v, k] to a value equal or less than _k[k].
# This restricts vertex v to belong to cluster k only if z_k[k] == 1, that is cluster k exists in the solution.

def create_up_constraints(model, y_vk, z_k, V):
    n = len(V)
    total = 0
    up_constrs = {}
    
    # Restricts vertex v as possibly belonging to cluster k iff cluster k exists in the solution (z_k[k] == 1) 
    for k in range(1, n + 1):
        for v in range(1, n + 1):
            total = total + 1
            up_constrs[v,k] = model.addConstr(y_vk[v, k] - z_k[k] <= 0, name='up_constr_'+str(v)+'_'+str(k))
    
    model.update()
    print("\nTotal up constraints created = ", total)
    return up_constrs

In [None]:
# Create Assignment constraints which restricts a vertex to belong only to one cluster at a time.
# Each vertex belongs to one and one only cluster because partitions must be disjoint subsets of vertices.

def create_asgn_constraints(model, y_vk, V):
    n = len(V)
    total = 0
    asgn_constrs = {}
    
    # Assure that that exists only one cluster k for which vertex v belongs to, that is y_vk[v, k] == 1. 
    for v in range(1, n + 1):
        total = total + 1
        constr_name = 'asgn_constr'+str(v)
        asgn_constrs[v] = model.addConstr((quicksum(y_vk[v, k] for k in range(1, n + 1)) == 1), name=constr_name)
    
    model.update()
    print("\nTotal assignment constraints created = ", total)
    return asgn_constrs
    





### Primal-Dual Solver for Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# H => Maximum diameter
# timeLimit => integer representing the maximum number of seconds for the solver to find a solution.

def solveMinClustersPrimalModel(V, E, vmap, graph, dist, H, timeLimit):
    
    n = len(V)
    m = len(E)
    
    # Creates the model mp.
    mp = Model()
    
    # Decision binary variables 
    y_vk = {}
    z_k = {}
    
    # Constraints
    conf_constrs = {}
    up_constrs = {}
    asgn_constrs = {}
    
    print("\nH (Maximum Diameter) = ", H)
    
    # Creating and initializing model variables.
    
    vars_tstart = tm.time()
    
    # Create variables y_vk for each vertex v and cluster k
    # y_vk == 1 => vertex v belongs to cluster k
    # y_vk == 0 => vertex v does not belong to cluster k
    y_vk = create_yvk_vars(mp, V)
    
    # Create objective variables z_k for each disjoint cluster k
    # z_k = 1 => cluster k exists in the solution.
    # z_k = 0 => cluster k does not exist in the solution.
    z_k = create_zk_vars(mp, V)
    
    vars_exec_time = tm.time() - vars_tstart
    print("\nVariables created (execution time = {:.4f}s).".format(vars_exec_time))
    
    # Creating and initializing model constraints.
    
    constrs_tstart = tm.time()

    # Create constraint for avoiding conflict (vertices farther than H + 1 belong to different cluster).
    conf_constrs = create_conf_constraints(mp, y_vk, V, dist, H)
    
    # Create upper bound constraints (y_vk <= z_k)
    up_constrs = create_up_constraints(mp, y_vk, z_k, V)
    
    # Create assignment constraint (each vertex belongs to one and one only cluster)
    asgn_constrs = create_asgn_constraints(mp, y_vk, V)
    
    constrs_exec_time = tm.time() - constrs_tstart
    print("\nConstraints created (execution time = {:.4f}s).\n".format(constrs_exec_time))
    
    # Time limit for searching an optimal solution.
    print("\nTimeLimit: {}\n".format(timeLimit))
    
    # Output the mp model definition, including variables and constraints.
    mp.write("mf_clust.lp")
    
    # Set parameters for the mp model.
    mp.setParam('TimeLimit', timeLimit)
    mp.setParam('OutputFlag', 1)
    
    print("\n\n####### SOLVER START #######\n")
    solver_tstart = tm.time()
    mp.optimize()
    solver_exec_time = tm.time() - solver_tstart
    print("\nSolver finished (execution time = {:.4f}s)\n".format(solver_exec_time))
    print("\n####### SOLVER END #######\n\n")
    
    # Get model current objective function.
    zp = mp.getObjective()

    print("TOTAL EXECUTIO# Mapping vertices to their respective cluster according to the optimal solution found by the mp model.N TIME = {:.4f}s\n".format(vars_exec_time + constrs_exec_time + solver_exec_time))
    
    v_sol = mp.getAttr('X', y_vk)
    
    # FOR DEBUG PURPOSES ONLY.
    #print v_sol
    
    # Retrieve for each vertex the respective cluster to which it belongs to.
    v_sol_r = {v:k for v in range(1, n + 1) for k in range(1, n + 1) if v_sol[v,k] > 0.001}
    
    clusters = {}
    
    # Mapping vertices to their respective cluster according to the optimal solution found by the mp model.
    for v,k in v_sol_r.items():
        # Initializes an empty list of vertices of cluster k when cluster k is visited for the first time.
        if k not in clusters.keys():
            clusters[k] = []
        clusters[k].append(v)

    return zp.getValue(), v_sol_r, clusters






### Output of Results for Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# Create a mapping of color shades for each cluster and set the corresponding cluster color to each vertex.

def computeColorMap(graph, clusters, vertex_cluster):
    assert(isinstance(clusters, dict))
    assert(isinstance(vertex_cluster, dict))
    
    if(not clusters or not vertex_cluster):
        return None
    
    color_map = []
    
    cmap = cm.autumn
    norm = Normalize(vmin=0,vmax=1)

    ratio = 1.0 / len(clusters)
    k_index = {}
    k_counter = 1
    for node in graph:
        k = vertex_cluster[node]
        #print("\tNode: {} => k = {}".format(node, k))
        if k not in k_index.keys():
            k_index[k] = k_counter
            k_counter += 1
        color_map.append(cmap(norm(k_index[k] * ratio)))
    
    return color_map

In [None]:
def printSolution(V,zp, vertex_cluster, clusters):
    assert(isinstance(vertex_cluster, dict))
    assert(isinstance(clusters, dict))
    print ("\nNúmero de Clusters: ", zp)
    
    if(vertex_cluster != None and vertex_cluster):
        print ("\nSolution (vertex_index, k_cluster_index): \n\n\t{}\n".format(vertex_cluster))

    if(clusters != None and clusters):
        for k,vertices in clusters.items():
            print("\tCluster #{} contains the following vertices: \n".format(k))
            line_buffer = 0
            for v in vertices:
                if(line_buffer == 0):
                    print("\t\tINDEX = {} ; vertex[{}] = {}".format(v, v, V[v]), end = '')
                else:
                    print("\tINDEX = {} ; vertex[{}] = {}".format(v, v, V[v]), end = '')
                line_buffer += 1
                if(line_buffer == 3):
                    print()
                    line_buffer = 0
            print("\n")





### Execution of Primal-Dual Algorithm for Problem 1 (Clique Clustering Model - G=(V,E)) 


In [None]:

# Execution of the algorithm to solve problem 1 for a given graph stored in a readable file with path file_path.
# file_path : Path of the file containing the edges of the graph.
# H : Maximum minimum distance between any two vertices (maximum cluster diameter)
# enableVis : If true, plot graph with colored vertices according to the corresponding cluster.

def executeMinClustersAlgorithm(file_path, list_H, timeLimit, enableVis):

    # Load graph from file and treat data.
    n, m, V, E, vmap, graph, dist = loadGraphInstance(file_path)
    
    for H in list_H:
        # Solve the problem of minimizing the number of clusters with diameter less than or equal to H
        opt_val, vertex_cluster, clusters = solveMinClustersPrimalModel(V, E, vmap, graph, dist, H, timeLimit)

        printSolution(V,opt_val, vertex_cluster, clusters)

        # FOR DEBUG PURPOSES ONLY
        #print("\nColor Map: \n")
        #print("\t ", color_map)

        if enableVis:
            computeColorMap(graph, clusters, vertex_cluster)
            # Visualization of the graph. Comment this line for large graphs or when visualizing the clusters is not desired.
            visualizeGraph(graph, layout_attr='random', color_map=color_map, scale=1000, enable_edges=True, node_size=50)




### Execute the cell above to use the Primal-Dual algorithm for Problem 1 (Clique Clustering Model - G=(V,E)) 


In [None]:
# Execute this cell to use the algorithm and find optimal solutions for the formulation above.

# Path of the file containing the edges of the graph.
file_path = "instances/as19990829.txt"
timeLimit = 60*60*4

# file_path = "instances/as19981229.txt"
# timeLimit = 60*60*6

# file_path = "instances/as19981230.txt"
# timeLimit = 60*60*6


# Maximum minimum distance between any two vertices (maximum cluster diameter)
list_H = [1, 2, 3, 5, 6]



enableVis = False

executeMinClustersAlgorithm(file_path, list_H, timeLimit, enableVis)



## ColGen Formulation - Master Problem

$$
{\large
\begin{array}{rll}
\min  &  \sum\limits_{p \in \cal{P}} \lambda_p &\\
s.t. & & \\
(\mbox{Dual } \pi_v) & \sum\limits_{p \in \cal{P}} a_{vp} . \lambda_p = 1 & \quad \forall v \in V \\
 &  \lambda_{p} \in \{ 0,1 \} &
\end{array}
}
$$

## ColGen Subproblem

> Column reduced cost: $\overline{c}_p = 1 - \sum\limits_{v \in V} \pi_v . a_v$

$$
{\large
\begin{array}{rll}
\max  &  \sum\limits_{v \in V} \pi_v . a_v &\\
s.t. & & \\
 & a_{v} + a_{w} \leq 1  &\quad  d(v,w) > H + 1 \\
 &  a_{v} \in \{ 0,1 \} & \quad \forall v \in V
\end{array}
}
$$





### Variables of the Master Problem for Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# ColGen Master Create lambda variables.

def create_lmbda_vars(model, V):

    lmbda = {}
    n = len(V)
    
    for p in range(1, n + 1):
        lmbda[p] = model.addVar(obj=1.0, vtype=GRB.CONTINUOUS, name='lamb'+'_'+str(p))
    model.update()
    return lmbda
    





### Constraints of the Master Problem for Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# ColGen Master Create Set Part Constraints.

def create_setpart_constraints(model, V, lmbda):
    
    set_part_constrs = {}
    n = len(V)
    
    for v in range(1, n + 1):
        #print("SETPART LAMBDA = {}".format(lmbda[v]))
        set_part_constrs[v] = model.addConstr(lmbda[v] == 1, name='setpart_constr_'+str(v))                                            
    
    model.update()
    return set_part_constrs





### Variables for the Subproblem of the Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# ColGen Subprob Create y_v Vars

def create_a_vars(model, V):
    
    a = {}
    n = len(V)
    
    for v in range(1, n + 1):
        a[v] = model.addVar(obj=0.0, vtype=GRB.BINARY, name='a'+'_'+str(v))
    model.update()
    return a






### Constraints for the Subproblem of the Problem 1 (Clique Clustering Model - G=(V,E))   >>>  IMPLEMENTATION


In [None]:
# ColGen Subprob Create Conflict Constraints

def create_conf_sub_constraints(model, H, V, dist, a):
    
    conf_constr = {}
    n = len(V)
    
    for v in range(1, n):
        for w in range(v + 1, n + 1):
            if (((v,w) not in dist) or (dist[v,w] > H)):
                constr_name = 'conf_constr_'+str(v)+'_'+str(w)
                conf_constr[v, w] = model.addConstr(a[v] + a[w] <= 1, name=constr_name)                                           
    
    model.update()
    return conf_constr






### Creating and Solving the Subproblem (Problem 1 Column Generation)  >>>  IMPLEMENTATION


In [None]:
def create_sub(msub, H, V, dist):
    a = create_a_vars(msub, V)
    conf_constrs = create_conf_sub_constraints(msub, H, V, dist, a)
    return a, conf_constrs

In [None]:
def solvesub(msub, V, a, pi_v):
    
    n = len(V)
    
    for v in range(1, n + 1):
        a[v].setAttr('Obj', pi_v[v])

    msub.optimize()
    zp = msub.getObjective()
    return zp.getValue(), a






### Creating and Solving the Master Problem (Problem 1 Column Generation)  >>>  IMPLEMENTATION


In [None]:
# ColGen Solve Linear Relaxation

def solve_ColGen_lp(V, E, vmap, graph, dist, H, timeLimit, return_clusters):
    assert(isinstance(return_clusters, bool))
    
    n = len(V)
    m = len(E)
    pi_v = {}
    
    Epsilon = 0.001
    
    master = Model()
    
    print("\nH (Maximum Diameter) = ", H)
    vars_tstart = tm.time()
    
    # Create variable LAMBDA of the Master ColGen Problem.
    lmbda = create_lmbda_vars(master, V)
    #a_vp = create_a_vp_vars(master, V)
    
    print("\nVariable LAMBDA (Master) created (execution time = {:.4f}s).".format(tm.time() - vars_tstart))
    
    vars_tstart = tm.time()
    # Create set partition constrints of the ColGen master Problem.
    part = create_setpart_constraints(master, V, lmbda)
    
    print("\nConstraint Set Partition (Master) created (execution time = {:.4f}s).".format(tm.time() - vars_tstart))
    
    # Crete subproblem model
    msub = Model()
    
    vars_tstart = tm.time()
    a, conf_constrs = create_sub(msub, H, V, dist)
    print("\nSubproblem model created (execution time = {:.4f}s).".format(tm.time() - vars_tstart))
    
    master.setParam('OutputFlag', 0)
    msub.setParam('OutputFlag', 0)
    
    n_columns = n
    not_opt = 1
    iter_n = 0
    total_time = 0.0

    master.write("amm.lp")
    while not_opt > 0:

        vars_tstart = tm.time()
        not_opt = 0
        master.optimize()

        print ('\nIteration #{}'.format(iter_n))
        print ('Number of Columns = ', n_columns)
        zd = master.getObjective()
        print ("Master Objective Value = {}".format(zd.getValue()))

        for v in range(1, n + 1):
            pi_v[v] = -part[v].getAttr("Pi")

        redcost, a = solvesub(msub, V, a, pi_v)
        redcost = - redcost
        
        #msub.write("sub.lp")
        print ("Redcost: ", redcost)
        #print ("PI_V = [ {} ]\n".format(pi_v))
        if redcost >= 1 + Epsilon:
            not_opt = 1
        
            v_sol = msub.getAttr('X', a)
            #print("a_v = ", v_sol)
            
            v_sol_r = [v for v in range(1,n+1) if v_sol[v] > 0.01]
            #print("[ vertex such that a_v[vertex] in subproblem > 0 ] = {}".format(v_sol_r))
            
            n_columns += 1
            lmbda[n_columns] = master.addVar(obj=1.0, vtype=GRB.CONTINUOUS, name='lmbda'+'_'+str(n_columns))
            master.update()
            
            for v in v_sol_r:
                master.chgCoeff(part[v],lmbda[n_columns], 1.0)

            master.update()
            
        else:
            v_sol = msub.getAttr('X', a)
            #print v_sol
            
            v_sol_r = [v for v in range(1,n+1) if v_sol[v] > 0.01]
            #print("[ vertex such that a_v[vertex] in subproblem > 0 ] = {}".format(v_sol_r))
            print("\nEND OF ITERATION\n")
            not_opt = 0
        
        inc = tm.time() - vars_tstart
        print("\nTime duration = {:.4f}s.\n".format(inc))
        print("-------------------------------------------")
        total_time = total_time + inc
        iter_n = iter_n + 1

    print("\nAlgorithm total time duration = {:.4f}s).".format(total_time))
    master.write("mm.lp")
    print('Number of Columns:', n_columns)
    
    lmbda_sol = master.getAttr('X', lmbda)
    #print("lmbda = ", lmbda_sol)
    obj_value = 0.0
    for ind in lmbda_sol:
        obj_value += lmbda_sol[ind]
    
    print ("Final Master Objective Value = {}".format(obj_value))
    zd = master.getObjective()
    if zd.getValue():
        print ("Master Objective Value = {}".format(zd.getValue()))
        #print ("PI_V = [ {} ]\n".format(pi_v))
    
    print("-------------------------------------------\n")
    
    # DEBUG PURPOSES ONLY
    # Print constraints a_vp * lmbda_p == 1
    #for v in range(1, n + 1):
    #    for p in range(1, n_columns + 1):
    #        a_vp = master.getCoeff(part[v], lmbda[p])
    #        lmbdaX = lmbda[p].X
    #        print("a_vp(v = {}, p = {}) = {} ====> {} * {} == {}".format(v, p, a_vp, a_vp, lmbdaX, a_vp * lmbdaX))
    #        print()
    #print("\n-------------------------------------------\n")

    # DEBUG PURPOSES ONLY
    # Print constraints of subproblem a_v + a_w <= 1 for all (v, w) | dist(v, w) > H FOR ALL partitions p
    # for v in range(1, n):
    #    for w in range(v + 1, n + 1):
    #        if (((v,w) not in dist) or (dist[v,w] > H)):
    #            a_v = master.getCoeff(conf_constrs[v, w], a[v])
    #            a_w = master.getCoeff(conf_constrs[v, w], a[w])
    #            print("DIST({}, {}) = {}".format(v, w, dist[v,w]))
    #            print("a_v[{}] + a_w[{}] <= 1 === {} + {} <= 1".format(v, w, a[v].X, a[w].X))
    # print("\n------------------------------------------\n")

    clusters = {}
    vertex_cluster = {}
    
    # Mapping vertices to their respective cluster according to the optimal solution found by the mp model.
    if(return_clusters):
        for v in range(1, n + 1):
            for p in range(1, n_columns + 1):
                a_vp = master.getCoeff(part[v], lmbda[p])
                lmbdaX = lmbda[p].X
                if(a_vp * lmbdaX == 1.0):
                    if p not in clusters:
                        clusters[p] = []
                    clusters[p].append(v)
                    vertex_cluster[v] = p
    
    if zd.getValue():
        return zd.getValue(), vertex_cluster, clusters
    else:
        return obj_value, vertex_cluster, clusters





### Execution of Column Generation Algorithm for Problem 1 (Clique Clustering Model - G=(V,E)) 


In [None]:

# Column Generation Algorithm 
# Execution of the algorithm to solve problem 1 for a given graph stored in a readable file with path file_path.
# file_path : Path of the file containing the edges of the graph.
# H : Maximum minimum distance between any two vertices (maximum cluster diameter)
# enableVis : If true, plot graph with colored vertices according to the corresponding cluster.
# return_clusters: returns a list of vertices sorted by cluster according to the found solution.

def executeMinClustersGCAlgorithm(file_path, list_H, timeLimit, return_clusters, enableVis):

    # Load graph from file and parse data.
    n, m, V, E, vmap, graph, dist = loadGraphInstance(file_path)

    for H in list_H:
        # Solve the problem of minimizing the number of clusters with diameter less than or equal to H
        opt_val, vertex_cluster, clusters = solve_ColGen_lp(V, E, vmap, graph, dist, H, timeLimit, return_clusters)

        printSolution(V,opt_val, vertex_cluster, clusters)

        if enableVis:
            color_map = computeColorMap(graph, clusters, vertex_cluster)
            
            # FOR DEBUG PURPOSES ONLY
            #print("\nColor Map: \n")
            #print("\t ", color_map)
            
            # Visualization of the graph. Comment this line for large graphs or when visualizing the clusters is not desired.
            if color_map != None and color_map:
                visualizeGraph(graph, layout_attr='random', color_map=color_map, scale=1000, enable_edges=True, node_size=50)




### Execute the cell below to use the Column Generation Algorithm for Problem 1 


In [None]:
# Execute this cell to use the algorithm and find optimal solutions for the formulation using Column Generation.

file_path = "instances/as19990829.txt"
timeLimit = 60*60*4

# file_path = "instances/as19981229.txt"
# timeLimit = 60*60*6

# file_path = "instances/as19981230.txt"
# timeLimit = 60*60*6


# Maximum minimum distance between any two vertices (maximum cluster diameter)
list_H = [1, 2, 3, 5, 6]

enableVis = False

return_clusters = True

executeMinClustersGCAlgorithm(file_path, list_H, timeLimit, return_clusters , enableVis)