In [2]:
from z3 import *

from itertools import combinations
from utils import *
import shutil
import numpy as np
import copy

In [27]:
def clustering(D):
    '''
    This function pairs up two items a and b into a new cluster if a is the closest item reachable from b and viceversa (D is not necessarily simmetrical) and 
    computes a new distance matrix where each cluster is represented as a single item (the distance from/to each cluster is the mean of the distances from/to the 2 items
    of the cluster).

    input   - D: list of lists; The distance matrix
    output  - D_new: list of lists; The distance matrix after the new clusters are made
            - clusters: list of lists; The list of every cluster
    '''
    list_of_mins=[]
    n=len(D)-1
    Closest=[] #Closest[i] will be the closest item to the i-th item
    for item in range(n): 
        tmp = copy.deepcopy(D[item])
        tmp[item]=1000
        Closest.append(np.argmin(tmp[:-1])) 
        list_of_mins.append(np.min(tmp[:-1]))
    
    #Check what items can be paired up:
    clusters=[]
    already_in_cluster=[]
    for item in range(n):
        if item==Closest[Closest[item]] and (item not in already_in_cluster):
            clusters.append([item, Closest[item]])
            already_in_cluster.append(item)
            already_in_cluster.append(Closest[item])
        elif item!=Closest[Closest[item]]:
            clusters.append([item])
    clusters.append([n])
    
    #Build the new distance matrix
    D_new=[]
    n_new=len(clusters)-1
    for row in range(n_new): 
        D_new.append([]) 
        for column in range(n_new): 
            if row==column:
                D_new[row].append(0)
            else:
                tmp_dist=[] #store the distances from the item 'row' to the item 'column'. If one of the two is a cluster then len(tmp_dist)=2, if both of them are clusters then len(tmp_dist)=4, else len(tmp_dist)=1
                for i in clusters[row]:
                    for j in clusters[column]:
                        tmp_dist.append(D[i][j])
                D_new[row].append(np.mean(tmp_dist)) #The mean of the distances from the item/cluster 'row' to the item/cluster 'column'
        tmp_dist=[]
        #add the distance to the deposit
        for i in clusters[row]:
            tmp_dist.append(D[i][n])
        D_new[row].append(np.mean(tmp_dist))
    D_new.append([])

    #add the last row (the distances from the deposit)
    for column in range(n_new):
        tmp_dist=[]
        for i in clusters[column]:
            tmp_dist.append(D[n][i])
        D_new[n_new].append(np.mean(tmp_dist))
    D_new[n_new].append(0)

    return D_new, clusters

In [12]:
file_names=[]
file_names_mzn=[]
file_names_clustering_mzn=[]
for i in range(9):
    file_names.append('inst0'+str(i+1)+'.dat')
    file_names_mzn.append('inst0'+str(i+1)+'.dzn')
    file_names_clustering_mzn.append('inst0'+str(i+1)+'_clusters.dzn')
for i in range(12):
    file_names.append('inst'+str(i+10)+'.dat')
    file_names_mzn.append('inst'+str(i+10)+'.dzn')
    file_names_clustering_mzn.append('inst'+str(i+10)+'_clusters.dzn')

In [36]:
NO_CLUSTERING = 35
''' 
For every file decide if the clustering is necessary and build the necessary .dzn files
'''
for fn in range(21):
    file_name=file_names[fn]

    file = open('./Instances/'+file_name, 'r')
    splitted_file = file.read().split('\n')

    n = int(splitted_file[1])
    m = int(splitted_file[0])
    cpt = list(map(int, splitted_file[2].split(' ')))
    tmp_sz=splitted_file[3].split(' ')
    if '' in tmp_sz:
        sz=list(map(int, [tmp_sz[i] for i in range(len(tmp_sz)) if tmp_sz[i]!='']))
    else:
        sz = list(map(int, splitted_file[3].split(' ')))
    D = [list(map(int, line.strip().split(' '))) for line in splitted_file[4:(n+5)]]
        
    if n<60:
        clustering_iterations = 2
    elif n<100:
        clustering_iterations = 3
    elif n<130:
        clustering_iterations = 4
    else:
        clustering_iterations = 5

    #No clustering needed:
    if n<NO_CLUSTERING:
        
        file_name_mzn = file_names_mzn[fn]

        file_2 = open('./Preprocessed_Instances/'+file_name_mzn, 'w')
        file_2.write('m = '+str(m)+';\n')
        file_2.write('n = '+str(n)+';\n')

        file_2.write('l = ['+str(cpt[0]))
        for c in cpt[1:]:
            file_2.write(', ' + str(c))
        file_2.write('];\n')

        file_2.write('s = ['+str(sz[0]))
        for s in sz[1:]:
            file_2.write(', ' + str(s))
        file_2.write('];\n')

        file_2.write('D = [')
        for line in D:
            file_2.write('| ' + str(line[0]))
            for el in line[1:]:
                file_2.write(', ' + str(el))
            file_2.write('\n')
        file_2.write('|];\n')

        file.close()
        file_2.close()
        print('No Clustering for file: '+ file_name +'; Builded '+file_name_mzn+'; n='+str(n))
        

    #Clustering:
    else:
        m = int(splitted_file[0])
        cpt = list(map(int, splitted_file[2].split(' ')))
        tmp_sz=splitted_file[3].split(' ')
        if '' in tmp_sz:
            sz=list(map(int, [tmp_sz[i] for i in range(len(tmp_sz)) if tmp_sz[i]!='']))
        else:
            sz = list(map(int, splitted_file[3].split(' ')))
        D = [list(map(int, line.strip().split(' '))) for line in splitted_file[4:(n+5)]]

        #Build D_new (the distance matrix after the clustering)
        D_new=D
        clusters=[ [i] for i in range(n+1) ]
        for i in range(clustering_iterations):
            D_new, new_clusters=clustering(D_new)
            old_clusters=clusters
            clusters=[]
            for clus in new_clusters:
                clusters.append([])
                for item in clus:
                    for old_item in old_clusters[item]:
                        clusters[-1].append(old_item)
        
        for row in range(len(D_new)):
            for column in range(len(D_new)):
                D_new[row][column]=int(D_new[row][column]) #Turining D_new from a matrix of float into a matrix of int rounding down


        n_new  = len(clusters) - 1
        sz_new=[]
        
        #Build sz_new (the list of the items' sizes after the clustering)
        for item in range(n_new):
            sz_new.append(sum([sz[i] for i in clusters[item]]))
        

        file_name_mzn = file_names_mzn[fn]
        file_2 = open('./Preprocessed_Instances/'+file_name_mzn, 'w')
        
        #write the .dzn file
        file_2.write('m = '+str(m)+';\n')
        file_2.write('n = '+str(n_new)+';\n')

        file_2.write('l = ['+str(cpt[0]))
        for c in cpt[1:]:
            file_2.write(', ' + str(c))
        file_2.write('];\n')

        file_2.write('s = ['+str(sz_new[0]))
        for s in sz_new[1:]:
            file_2.write(', ' + str(s))
        file_2.write('];\n')

        file_2.write('D = [')
        for line in D_new:
            file_2.write('| ' + str(line[0]))
            for el in line[1:]:
                file_2.write(', ' + str(el))
            file_2.write('\n')
        file_2.write('|];\n\n')

        #write the clustering.dzn file
        file_name_cluster_mzn = file_names_clustering_mzn[fn]
        file_3 = open('./Preprocessed_Instances/'+file_name_cluster_mzn, 'w')
        real_clusters = [clus for clus in clusters if len(clus)>1]

        file_3.write('n = '+str(n)+';\n')
        file_3.write('k= '+str(len(real_clusters))+';\n')
        file_3.write('c = ['+str(len(real_clusters[0])))
        for c in real_clusters[1:]:
            file_3.write(', ' + str(len(c)))
        file_3.write('];\n')


        file_3.write('D = [')
        for line in D_new:
            file_3.write('| ' + str(line[0]))
            for el in line[1:]:
                file_3.write(', ' + str(el))
            file_3.write('\n')
        file_3.write('|];\n')

        file_3.write('C = [')

        max_len=max([len(i) for i in real_clusters])

        for clus in real_clusters:
            file_3.write('| ' + str(clus[0]))
            for el_id in range(max_len-1):
                if el_id<len(clus)-1:
                    file_3.write(', ' + str(clus[el_id+1]))
                else:
                    file_3.write(', -1')
            file_3.write('\n')
        file_3.write('|];\n')

        file_3.close()
        file_2.close()
        file.close()
        print('Clustering for file: '+ file_name +'; Builded '+file_name_mzn+' and '+file_name_cluster_mzn+'; n='+str(n)+'; n_new='+str(n_new))

No Clustering for file: inst01.dat; Builded inst01.dzn; n=6
No Clustering for file: inst02.dat; Builded inst02.dzn; n=9
No Clustering for file: inst03.dat; Builded inst03.dzn; n=7
No Clustering for file: inst04.dat; Builded inst04.dzn; n=10
No Clustering for file: inst05.dat; Builded inst05.dzn; n=3
No Clustering for file: inst06.dat; Builded inst06.dzn; n=8
No Clustering for file: inst07.dat; Builded inst07.dzn; n=17
No Clustering for file: inst08.dat; Builded inst08.dzn; n=10
No Clustering for file: inst09.dat; Builded inst09.dzn; n=13
No Clustering for file: inst10.dat; Builded inst10.dzn; n=13
Clustering for file: inst11.dat; Builded inst11.dzn and inst11_clusters.dzn; n=143; n_new=40
Clustering for file: inst12.dat; Builded inst12.dzn and inst12_clusters.dzn; n=95; n_new=43
Clustering for file: inst13.dat; Builded inst13.dzn and inst13_clusters.dzn; n=47; n_new=26
Clustering for file: inst14.dat; Builded inst14.dzn and inst14_clusters.dzn; n=215; n_new=61
Clustering for file: inst