In [None]:
import pandas as pd
import numpy as np
import  sklearn.cluster
from sklearn.cluster import KMeans
from scipy.spatial import distance_matrix
from scipy.spatial.distance import cdist
from scipy.spatial.distance import squareform, pdist
from random import randint
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.cluster import DBSCAN
from sklearn import metrics
from geopy.distance import great_circle
import time
import seaborn as sn

## 1. ACO | K-Means
        1.1 Centroids Path 
        1.2 First Path
        1.3 Second Path
## 2. ACO | DBSCAN
        2.1 Centroids Path
        2.2 First Path
        2.3 Second Path

In [None]:
#Dataset
cities = pd.read_csv('C:\\Users\\polly\\Desktop\\Decision_model\\santa_cities.csv')
cities.head()

## Algorithm

In [None]:
import random as rn
import numpy as np
from numpy.random import choice as np_choice

class AntColony(object):

    def __init__(self, distances, n_ants, n_best, n_iterations, decay, alpha=1, beta=1):
        """
        Args:
            distances (2D numpy.array): Square matrix of distances. Diagonal is assumed to be np.inf.
            n_ants (int): Number of ants running per iteration
            n_best (int): Number of best ants who deposit pheromone
            n_iteration (int): Number of iterations
            decay (float): Rate it which pheromone decays. The pheromone value is multiplied by decay, so 0.95 will lead to decay, 0.5 to much faster decay.
            alpha (int or float): exponenet on pheromone, higher alpha gives pheromone more weight. Default=1
            beta (int or float): exponent on distance, higher beta give distance more weight. Default=1
        Example:
            ant_colony = AntColony(german_distances, 100, 20, 2000, 0.95, alpha=1, beta=2)          
        """
        self.distances  = distances
        self.pheromone = np.ones(self.distances.shape) / len(distances)
        self.all_inds = range(len(distances))
        self.n_ants = n_ants
        self.n_best = n_best
        self.n_iterations = n_iterations
        self.decay = decay
        self.alpha = alpha
        self.beta = beta

    def run(self):
        shortest_path = None
        all_time_shortest_path = ("placeholder", np.inf)
        for i in range(self.n_iterations):
            all_paths = self.gen_all_paths()
            self.spread_pheronome(all_paths, self.n_best, shortest_path=shortest_path)
            shortest_path = min(all_paths, key=lambda x: x[1])
            print shortest_path
            if shortest_path[1] < all_time_shortest_path[1]:
                all_time_shortest_path = shortest_path            
            self.pheromone * self.decay            
        return all_time_shortest_path

    def spread_pheronome(self, all_paths, n_best, shortest_path):
        sorted_paths = sorted(all_paths, key=lambda x: x[1])
        for path, dist in sorted_paths[:n_best]:
            for move in path:
                self.pheromone[move] += 1.0 / self.distances[move]

    def gen_path_dist(self, path):
        total_dist = 0
        for ele in path:
            total_dist += self.distances[ele]
        return total_dist

    def gen_all_paths(self):
        all_paths = []
        for i in xrange(self.n_ants):
            path = self.gen_path(0)
            all_paths.append((path, self.gen_path_dist(path)))
        return all_paths

    def gen_path(self, start):
        path = []
        visited = set()
        visited.add(start)
        prev = start
        for i in xrange(len(self.distances) - 1):
            move = self.pick_move(self.pheromone[prev], self.distances[prev], visited)
            path.append((prev, move))
            prev = move
            visited.add(move)
        path.append((prev, start)) # going back to where we started    
        return path

    def pick_move(self, pheromone, dist, visited):
        pheromone = np.copy(pheromone)
        pheromone[list(visited)] = 0

        row = pheromone ** self.alpha * (( 1.0 / dist) ** self.beta)

        norm_row = row / row.sum()
        move = np_choice(self.all_inds, 1, p=norm_row)[0]
        return move

# ACO | K-Means

Dataset is clustered in 50 clusters with K-Means method

In [None]:
#50 clusters
kmeans = KMeans(n_clusters=50, random_state=0).fit(cities[['x','y']])

### 1.1 Centroids path
The ACO algorithm is used to get the optimal path among the clusters, according to minimiza the distances among the centroids. 

In [None]:
#Centroids
centroidi = kmeans.cluster_centers_
centro = pd.DataFrame(centroidi)
cities['cluster'] = kmeans.labels_
centro['cluster'] = cities['cluster'].unique()
centro.rename_axis({0:'x',1:'y'},axis=1,inplace=True)
centro.head()

In [None]:
#distance matrix to np.array
distanza = pd.DataFrame(squareform(pdist(centro[['x','y']])), columns=centro['cluster'].unique(), index=centro['cluster'].unique())
distanza.replace(0, np.inf, inplace=True)
dist = np.array(distanza)
dist

In [None]:
#ACO algorithm
ant_colony = AntColony(dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8)
shortest_path = ant_colony.run()

In [None]:
#from the position to the cluster's name
path = pd.DataFrame(shortest_path[0])
colonne = []
for elem in path[0]:
    f  = distanza.columns[elem]
    colonne.append([elem, f])
cluster_percorso = pd.DataFrame(colonne)
cluster_percorso.rename_axis({0:'posizione',1:'cluster'},axis=1,inplace=True)
cluster_percorso.head()

In [None]:
# cluster ordered by distance
lista_cluster = cluster_percorso['cluster'].tolist()
lista_cluster_menouno = lista_cluster[1:]

### 1.2 First Path
The ACO has been applied at the first cluster and the beginning node has been set randomly. The last point (pointx) of the path has been used to create a relation with the follow cluster. Indeed, the pointx has been added in the second cluster. Then it has been computed the distance matrix among the points inside the second cluster, and the matrix has been used to compute the ACO. Also, with second cluster’s path, the last point has been used to create the relationship with the third cluster, and so on. The idea was to handle the cluster ( C ) as sets, and create C-1 intersection between them, thanks to the pointx. 

In [None]:
for cluster in lista_cluster:
    #first cluster
    if cluster == 29: 
        print('start!')
        cl = cities.loc[cities['cluster'] == 29]
        #distance matrix
        distanza = pd.DataFrame(squareform(pdist(cl[['x','y']])), columns=cl['id'].unique(), index=cl['id'].unique()) 
        distanza.replace(0, np.inf, inplace=True)
        dist = np.array(distanza)
        #algorithm
        ant_colony = AntColony(dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8) 
        shortest_path = ant_colony.run()
        #subtract the distance between the last point and the start point
        path = pd.DataFrame(shortest_path[0])
        number = path[0].iloc[-1]
        t = distanza.iloc[number,0]
        distance = pd.DataFrame([shortest_path[1]])
        distance['distanza'] = distance[0]-t
        #save it!
        distance.to_csv('C:/Users/polly/Desktop/prova/distanza{}.csv'.format(29))
        #from position to point's name
        colonne = []
        for elem in path[0]:
            f  = distanza.columns[elem]
            colonne.append([elem, f])
        percorso = pd.DataFrame(colonne)
        percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
        px = (percorso['punti'].iloc[-1])
        print('puntox29!!!!', px)
        #save it!
        percorso.to_csv('C:/Users/polly/Desktop/prova/percorso{}.csv'.format(29))
        path.to_csv('C:/Users/polly/Desktop/prova/{}.csv'.format(29))
    if cluster != 29:
        for a,b in zip(lista_cluster,lista_cluster_menouno):
            print(a)
            # add puntox to the follow cluster, to create connection among groups
            cl = cities.loc[cities['cluster'] == a]
            print('inizio',cl)
            print('verifica px', px)
            puntox = cl.loc[cl['id'] == px]
            insert_line = pd.DataFrame(puntox)
            cl1 = cities.loc[cities['cluster'] ==b]
            DF= pd.concat([insert_line, cl1])
            print('dataframe',DF.head())
            DF.drop_duplicates(inplace=True)
            #distance matrix
            distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
            distanza.replace(0, np.inf, inplace=True)
            dist = np.array(distanza)
            #algorithm
            ant_colony = AntColony(dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8)
            shortest_path = ant_colony.run()
            #subtract the distance between the last point and the start point
            path = pd.DataFrame(shortest_path[0])
            number = path[0].iloc[-1]
            t = distanza.iloc[number,0]
            distance = pd.DataFrame([shortest_path[1]])
            distance['distanza'] = distance[0]-t
            #save it!
            distance.to_csv('C:/Users/polly/Desktop/prova/distanza{}.csv'.format(b))
            #from position to point's name
            colonne = []
            for elem in path[0]:
                f  = distanza.columns[elem]
                colonne.append([elem, f])
            percorso = pd.DataFrame(colonne)
            percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
            px = (percorso['punti'].iloc[-1])
            print('px',px)
            #save it!
            percorso.to_csv('C:/Users/polly/Desktop/prova/percorso{}.csv'.format(b))
            path.to_csv('C:/Users/polly/Desktop/prova/{}.csv'.format(b))

### 1.3 Second Path
To avoid that ants get the same steps of the first path, the distance matrix has been changed. The distance among all the points that was connected in the first step was changed to infinite. This is justified by the ant’s method to choose the path. As already said, the ants make a choice by heuristics information and pheromones. If the distance is equal to zero, because the deposit value is near to zero (1/∞), and the individual ants will avoid connecting node i and node j, and it will deposit a small level of pheromone.

In [None]:
def modifica_dm(df, dista):
    df['tupla'] = list(zip(df['0'], df['1']))
    percorso = df['tupla'].tolist()
    

    for a in range(len(percorso)):
        riga = percorso[a][0]
        colonna = percorso[a][1]
        print('prima', percorso[a][0],percorso[a][1],dista[riga][colonna])  
        dista[riga][colonna] = np.inf
        dista[colonna][riga] = np.inf
        print('dopo',percorso[a][0],percorso[a][1],dista[riga][colonna])
        
        
    return dista

In [None]:
for cluster in lista_cluster:
    #first cluster
    if cluster == 29: 
        print('start!')
        cl = cities.loc[cities['cluster'] == 29]
        #distance matrix
        distanza = pd.DataFrame(squareform(pdist(cl[['x','y']])), columns=cl['id'].unique(), index=cl['id'].unique()) 
        distanza.replace(0, np.inf, inplace=True)
        dist = np.array(distanza)
        dista = pd.DataFrame(dist)
        #old path
        df = pd.read_csv('C:/Users/polly/Desktop/Decision/Percorsi1/29.csv', sep = ';')
        #apply function to change old value to inf.
        new_distanza = modifica_dm(df,dista)
        new_dist = np.array(new_distanza)
        #algorithm
        ant_colony = AntColony(new_dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8) 
        shortest_path = ant_colony.run()
        #subtract the distance between the last point and the start point
        path = pd.DataFrame(shortest_path[0])
        number = path[0].iloc[-1]
        t = distanza.iloc[number,0]
        distance = pd.DataFrame([shortest_path[1]])
        distance['distanza'] = distance[0]-t
        #save it!
        distance.to_csv('C:/Users/polly/Desktop/prova/distanza{}.csv'.format(29))
        #from position to point's name
        colonne = []
        for elem in path[0]:
            f  = distanza.columns[elem]
            colonne.append([elem, f])
        percorso = pd.DataFrame(colonne)
        percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
        px = (percorso['punti'].iloc[-1])
        print('puntox29!!!!', px)
        #save it!
        percorso.to_csv('C:/Users/polly/Desktop/prova/percorso{}.csv'.format(29))
        path.to_csv('C:/Users/polly/Desktop/prova/{}.csv'.format(29))
    if cluster != 29:
        for a,b in zip(lista_cluster,lista_cluster_menouno):
            print(a)
            # add puntox to the follow cluster, to create connection among groups
            cl = cities.loc[cities['cluster'] == a]
            print('inizio',cl)
            print('verifica px', px)
            puntox = cl.loc[cl['id'] == px]
            insert_line = pd.DataFrame(puntox)
            cl1 = cities.loc[cities['cluster'] ==b]
            DF= pd.concat([insert_line, cl1])
            print('dataframe',DF.head())
            DF.drop_duplicates(inplace=True)
            #distance matrix
            distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
            distanza.replace(0, np.inf, inplace=True)
            dist = np.array(distanza)
            dista = pd.DataFrame(dist)
            #old path
            df = pd.read_csv('C:/Users/polly/Desktop/Decision/Percorsi1/{}.csv'.format(b), sep = ';')
            #apply function to change old value with inf.
            new_distanza = modifica_dm(df,dista)
            new_dist = np.array(new_distanza)
            #algorithm
            ant_colony = AntColony(new_dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8)
            shortest_path = ant_colony.run()
            #subtract the distance between the last point and the start point
            path = pd.DataFrame(shortest_path[0])
            number = path[0].iloc[-1]
            t = distanza.iloc[number,0]
            distance = pd.DataFrame([shortest_path[1]])
            distance['distanza'] = distance[0]-t
            #save it!
            distance.to_csv('C:/Users/polly/Desktop/prova/distanza{}.csv'.format(b))
            #from position to point's name
            colonne = []
            for elem in path[0]:
                f  = distanza.columns[elem]
                colonne.append([elem, f])
            percorso = pd.DataFrame(colonne)
            percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
            px = (percorso['punti'].iloc[-1])
            print('px',px)
            #save it!
            percorso.to_csv('C:/Users/polly/Desktop/prova/percorso{}.csv'.format(b))
            path.to_csv('C:/Users/polly/Desktop/prova/{}.csv'.format(b))

# ACO | DBSCAN
The DBSCAN created two clusters, Santa’s face also clustered in 50 clusters with KMeans method and the background, clustered in 3 clusters with DBSCAN.

In [None]:
dbscan = DBSCAN(eps=180).fit(cities[['x','y']])
cities['dbscan'] = dbscan.labels_
sn.lmplot('x','y', data = cities,fit_reg = False,hue = 'dbscan')

In [None]:
#separate noices by santa's face
rumore = cities.loc[cities['dbscan'] == -1]
dati = cities.loc[cities['dbscan'] != -1]

In [None]:
#santa's face is clustered in 50 clusters with K-Means and noices in 3 clusters with DBSCAN
kmeans_rumore = DBSCAN(eps = 700).fit(rumore[['x','y']])
kmeans_dati = KMeans(n_clusters=50, random_state=0, algorithm='elkan').fit(dati[['x','y']])

In [None]:
rumore['cluster'] = kmeans_rumore.labels_
dati['cluster'] = kmeans_dati.labels_

In [None]:
#Some plots
sn.lmplot('x','y', data = rumore,fit_reg = False,hue = 'cluster')
sn.lmplot('x','y', data = dati,fit_reg = False,hue = 'cluster')

### 2.1 Centroids Path
Create path among centroids of santa's face. Instead, the noices' clusters will be attached manually after.

In [None]:
#Centroids
centroidi = kmeans_dati.cluster_centers_
centro_dati = pd.DataFrame(centroidi)
centro_dati['cluster'] = cities['cluster'].unique()
centro_dati.rename_axis({0:'x',1:'y'},axis=1,inplace=True)
centro_dati.head()

In [None]:
#distance matrix to np.array
distanza = pd.DataFrame(squareform(pdist(centro_dati[['x','y']])), columns=centro_dati['cluster'].unique(), index=centro_dati['cluster'].unique())
distanza.replace(0, np.inf, inplace=True)
dist = np.array(distanza)
dist

In [None]:
#ACO algorithm
ant_colony = AntColony(dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8)
shortest_path = ant_colony.run()

In [None]:
#from the position to the cluster's name
path = pd.DataFrame(shortest_path[0])
colonne = []
for elem in path[0]:
    f  = distanza.columns[elem]
    colonne.append([elem, f])
cluster_percorso = pd.DataFrame(colonne)
cluster_percorso.rename_axis({0:'posizione',1:'cluster'},axis=1,inplace=True)
cluster_percorso.head()

In [None]:
# cluster ordered by distance
lista_cluster_dati = cluster_percorso['cluster'].tolist()
lista_cluster_menouno_dati = lista_cluster_dati[1:]

### 2.2 First Path

In [None]:
#clusters inside Santa's face
for cluster in lista_cluster_dati:
    #first cluster
    if cluster == 29: 
        print('start!')
        cl = dati.loc[dati['cluster'] == 29]
        #distance matrix
        distanza = pd.DataFrame(squareform(pdist(cl[['x','y']])), columns=cl['id'].unique(), index=cl['id'].unique()) 
        distanza.replace(0, np.inf, inplace=True)
        dist = np.array(distanza)
        #algorithm
        ant_colony = AntColony(dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8) 
        shortest_path = ant_colony.run()
        #subtract the distance between the last point and the start point
        path = pd.DataFrame(shortest_path[0])
        number = path[0].iloc[-1]
        t = distanza.iloc[number,0]
        distance = pd.DataFrame([shortest_path[1]])
        distance['distanza'] = distance[0]-t
        #save it!
        distance.to_csv('C:/Users/polly/Desktop/prova2/distanza{}.csv'.format(29))
        #from position to point's name
        colonne = []
        for elem in path[0]:
            f  = distanza.columns[elem]
            colonne.append([elem, f])
        percorso = pd.DataFrame(colonne)
        percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
        px = (percorso['punti'].iloc[-1])
        print('puntox29!!!!', px)
        #save it!
        percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN{}.csv'.format(29))
        path.to_csv('C:/Users/polly/Desktop/prova2/{}.csv'.format(29))
    if cluster != 29:
        for a,b in zip(lista_cluster_dati,lista_cluster_menouno_dati):
            print(a)
            # add puntox to the follow cluster, to create connection among groups
            cl = dati.loc[dati['cluster'] == a]
            print('inizio',cl)
            print('verifica px', px)
            puntox = cl.loc[cl['id'] == px]
            insert_line = pd.DataFrame(puntox)
            cl1 = dati.loc[dati['cluster'] ==b]
            DF= pd.concat([insert_line, cl1])
            print('dataframe',DF.head())
            DF.drop_duplicates(inplace=True)
            #distance matrix
            distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
            distanza.replace(0, np.inf, inplace=True)
            dist = np.array(distanza)
            #algorithm
            ant_colony = AntColony(dist, n_ants=60, n_best=60, n_iterations=5, decay=0.1, alpha=2, beta=8)
            shortest_path = ant_colony.run()
            #subtract the distance between the last point and the start point
            path = pd.DataFrame(shortest_path[0])
            number = path[0].iloc[-1]
            t = distanza.iloc[number,0]
            distance = pd.DataFrame([shortest_path[1]])
            distance['distanza'] = distance[0]-t
            #save it!
            distance.to_csv('C:/Users/polly/Desktop/prova2/distanzaDBSCAN{}.csv'.format(b))
            #from position to point's name
            colonne = []
            for elem in path[0]:
                f  = distanza.columns[elem]
                colonne.append([elem, f])
            percorso = pd.DataFrame(colonne)
            percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
            px = (percorso['punti'].iloc[-1])
            print('px',px)
            #save it!
            percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN{}.csv'.format(b))
            path.to_csv('C:/Users/polly/Desktop/prova2/{}.csv'.format(b))

In [None]:
lista_rumore = [2,0,1]
lista_rumore_menouno = [0,1]

In [None]:
#path inside noices'clusters
for cluster in lista_rumore:
    #first cluster
    if cluster == 2: 
        print('start!')
        insert_line = pd.DataFrame(puntox)
        cl1 = rumore.loc[rumore['cluster'] ==2]
        DF= pd.concat([insert_line, cl1])
        DF.head()
        #distance matrix
        distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique()) 
        distanza.replace(0, np.inf, inplace=True)
        dist = np.array(distanza)
        #algorithm
        ant_colony = AntColony(dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8) 
        shortest_path = ant_colony.run()
        #subtract the distance between the last point and the start point
        path = pd.DataFrame(shortest_path[0])
        number = path[0].iloc[-1]
        t = distanza.iloc[number,0]
        distance = pd.DataFrame([shortest_path[1]])
        distance['distanza'] = distance[0]-t
        #save it!
        distance.to_csv('C:/Users/polly/Desktop/prova2/distanzar{}.csv'.format(2))
        #from position to point's name
        colonne = []
        for elem in path[0]:
            f  = distanza.columns[elem]
            colonne.append([elem, f])
        percorso = pd.DataFrame(colonne)
        percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
        px = (percorso['punti'].iloc[-1])
        print('puntox2!!!!', px)
        #save it!
        percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN_r{}.csv'.format(2))
        path.to_csv('C:/Users/polly/Desktop/prova2/r{}.csv'.format(2))
    if cluster != 2:
        for a,b in zip(lista_rumore,lista_rumore_menouno):
            print(a)
            # add puntox to the follow cluster, to create connection among groups
            cl = rumore.loc[rumore['cluster'] == a]
            print('inizio',cl)
            print('verifica px', px)
            puntox = cl.loc[cl['id'] == px]
            insert_line = pd.DataFrame(puntox)
            cl1 = rumore.loc[rumore['cluster'] ==b]
            DF= pd.concat([insert_line, cl1])
            print('dataframe',DF.head())
            DF.drop_duplicates(inplace=True)
            #distance matrix
            distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
            distanza.replace(0, np.inf, inplace=True)
            dist = np.array(distanza)
            #algorithm
            ant_colony = AntColony(dist, n_ants=60, n_best=60, n_iterations=5, decay=0.1, alpha=2, beta=8)
            shortest_path = ant_colony.run()
            #subtract the distance between the last point and the start point
            path = pd.DataFrame(shortest_path[0])
            number = path[0].iloc[-1]
            t = distanza.iloc[number,0]
            distance = pd.DataFrame([shortest_path[1]])
            distance['distanza'] = distance[0]-t
            #save it!
            distance.to_csv('C:/Users/polly/Desktop/prova2/distanzaDBSCAN_r{}.csv'.format(b))
            #from position to point's name
            colonne = []
            for elem in path[0]:
                f  = distanza.columns[elem]
                colonne.append([elem, f])
            percorso = pd.DataFrame(colonne)
            percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
            px = (percorso['punti'].iloc[-1])
            print('px',px)
            #save it!
            percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN_r{}.csv'.format(b))
            path.to_csv('C:/Users/polly/Desktop/prova2/r{}.csv'.format(b))

### 2.3 Second Path

In [None]:
def modifica_dm(df, dista):
    df['tupla'] = list(zip(df['0'], df['1']))
    percorso = df['tupla'].tolist()
    

    for a in range(len(percorso)):
        riga = percorso[a][0]
        colonna = percorso[a][1]
        print('prima', percorso[a][0],percorso[a][1],dista[riga][colonna])  
        dista[riga][colonna] = np.inf
        dista[colonna][riga] = np.inf
        print('dopo',percorso[a][0],percorso[a][1],dista[riga][colonna])
        
        
    return dista

In [None]:
#clusters inside Santa's face
for cluster in lista_cluster_dati:
    #first cluster
    if cluster == 29: 
        print('start!')
        cl = dati.loc[dati['cluster'] == 29]
        #distance matrix
        distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
        distanza.replace(0, np.inf, inplace=True)
        dist = np.array(distanza)
        dista = pd.DataFrame(dist)
        #old path
        df = pd.read_csv('C:/Users/polly/Desktop/Decision/Percorsi1_DBSCAN/29.csv'.format(a), sep = ';')
        #apply function to change old value with inf.
        new_distanza = modifica_dm(df,dista)
        new_dist = np.array(new_distanza)
        #algorithm
        ant_colony = AntColony(new_dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8) 
        shortest_path = ant_colony.run()
        #subtract the distance between the last point and the start point
        path = pd.DataFrame(shortest_path[0])
        number = path[0].iloc[-1]
        t = distanza.iloc[number,0]
        distance = pd.DataFrame([shortest_path[1]])
        distance['distanza'] = distance[0]-t
        #save it!
        distance.to_csv('C:/Users/polly/Desktop/prova2/distanza{}.csv'.format(29))
        #from position to point's name
        colonne = []
        for elem in path[0]:
            f  = distanza.columns[elem]
            colonne.append([elem, f])
        percorso = pd.DataFrame(colonne)
        percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
        px = (percorso['punti'].iloc[-1])
        print('puntox29!!!!', px)
        #save it!
        percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN{}.csv'.format(29))
        path.to_csv('C:/Users/polly/Desktop/prova2/{}.csv'.format(29))
    if cluster != 29:
        for a,b in zip(lista_cluster_dati,lista_cluster_menouno_dati):
            print(a)
            # add puntox to the follow cluster, to create connection among groups
            cl = dati.loc[dati['cluster'] == a]
            print('inizio',cl)
            print('verifica px', px)
            puntox = cl.loc[cl['id'] == px]
            insert_line = pd.DataFrame(puntox)
            cl1 = dati.loc[dati['cluster'] ==b]
            DF= pd.concat([insert_line, cl1])
            print('dataframe',DF.head())
            DF.drop_duplicates(inplace=True)
            #distance matrix
            distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
            distanza.replace(0, np.inf, inplace=True)
            dist = np.array(distanza)
            dista = pd.DataFrame(dist)
            #old path
            df = pd.read_csv('C:/Users/polly/Desktop/Decision/Percorsi1_DBSCAN/{}.csv'.format(b), sep = ';')
            #apply function to change old value with inf.
            new_distanza = modifica_dm(df,dista)
            new_dist = np.array(new_distanza)
            #algorithm
            ant_colony = AntColony(new_dist, n_ants=60, n_best=60, n_iterations=5, decay=0.1, alpha=2, beta=8)
            shortest_path = ant_colony.run()
            #subtract the distance between the last point and the start point
            path = pd.DataFrame(shortest_path[0])
            number = path[0].iloc[-1]
            t = distanza.iloc[number,0]
            distance = pd.DataFrame([shortest_path[1]])
            distance['distanza'] = distance[0]-t
            #save it!
            distance.to_csv('C:/Users/polly/Desktop/prova2/distanzaDBSCAN{}.csv'.format(b))
            #from position to point's name
            colonne = []
            for elem in path[0]:
                f  = distanza.columns[elem]
                colonne.append([elem, f])
            percorso = pd.DataFrame(colonne)
            percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
            px = (percorso['punti'].iloc[-1])
            print('px',px)
            #save it!
            percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN{}.csv'.format(b))
            path.to_csv('C:/Users/polly/Desktop/prova2/{}.csv'.format(b))

In [None]:
lista_rumore = [2,0,1]
lista_rumore_menouno = [0,1]

In [None]:
#path inside noices'clusters
for cluster in lista_rumore:
    #first cluster
    if cluster == 2: 
        print('start!')
        insert_line = pd.DataFrame(puntox)
        cl1 = rumore.loc[rumore['cluster'] ==2]
        DF= pd.concat([insert_line, cl1])
        DF.head()
        #distance matrix
        distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
        distanza.replace(0, np.inf, inplace=True)
        dist = np.array(distanza)
        dista = pd.DataFrame(dist)
        #old path
        df = pd.read_csv('C:/Users/polly/Desktop/Decision/Percorsi1_DBSCAN/r{}.csv'.format(2), sep = ';')
        #apply function to change old value with inf.
        new_distanza = modifica_dm(df,dista)
        new_dist = np.array(new_distanza)
        #algorithm
        ant_colony = AntColony(new_dist, n_ants=60, n_best= 60, n_iterations=5, decay=0.1, alpha=2, beta=8) 
        shortest_path = ant_colony.run()
        #subtract the distance between the last point and the start point
        path = pd.DataFrame(shortest_path[0])
        number = path[0].iloc[-1]
        t = distanza.iloc[number,0]
        distance = pd.DataFrame([shortest_path[1]])
        distance['distanza'] = distance[0]-t
        #save it!
        distance.to_csv('C:/Users/polly/Desktop/prova2/distanzar{}.csv'.format(2))
        #from position to point's name
        colonne = []
        for elem in path[0]:
            f  = distanza.columns[elem]
            colonne.append([elem, f])
        percorso = pd.DataFrame(colonne)
        percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
        px = (percorso['punti'].iloc[-1])
        print('puntox2!!!!', px)
        #save it!
        percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN_r{}.csv'.format(2))
        path.to_csv('C:/Users/polly/Desktop/prova2/r{}.csv'.format(2))
    if cluster != 2:
        for a,b in zip(lista_rumore,lista_rumore_menouno):
            print(a)
            # add puntox to the follow cluster, to create connection among groups
            cl = rumore.loc[rumore['cluster'] == a]
            print('inizio',cl)
            print('verifica px', px)
            puntox = cl.loc[cl['id'] == px]
            insert_line = pd.DataFrame(puntox)
            cl1 = rumore.loc[rumore['cluster'] ==b]
            DF= pd.concat([insert_line, cl1])
            print('dataframe',DF.head())
            DF.drop_duplicates(inplace=True)
            #distance matrix
            distanza = pd.DataFrame(squareform(pdist(DF[['x','y']])), columns=DF['id'].unique(), index=DF['id'].unique())
            distanza.replace(0, np.inf, inplace=True)
            dist = np.array(distanza)
            dista = pd.DataFrame(dist)
            #old path
            df = pd.read_csv('C:/Users/polly/Desktop/Decision/Percorsi1_DBSCAN/r{}.csv'.format(b), sep = ';')
            #apply function to change old value with inf.
            new_distanza = modifica_dm(df,dista)
            new_dist = np.array(new_distanza)
            #algorithm
            ant_colony = AntColony(new_dist, n_ants=60, n_best=60, n_iterations=5, decay=0.1, alpha=2, beta=8)
            shortest_path = ant_colony.run()
            #subtract the distance between the last point and the start point
            path = pd.DataFrame(shortest_path[0])
            number = path[0].iloc[-1]
            t = distanza.iloc[number,0]
            distance = pd.DataFrame([shortest_path[1]])
            distance['distanza'] = distance[0]-t
            #save it!
            distance.to_csv('C:/Users/polly/Desktop/prova2/distanzaDBSCAN_r{}.csv'.format(b))
            #from position to point's name
            colonne = []
            for elem in path[0]:
                f  = distanza.columns[elem]
                colonne.append([elem, f])
            percorso = pd.DataFrame(colonne)
            percorso.rename_axis({0:'posizione',1:'punti'},axis=1,inplace=True)
            px = (percorso['punti'].iloc[-1])
            print('px',px)
            #save it!
            percorso.to_csv('C:/Users/polly/Desktop/prova2/percorsoDBSCAN_r{}.csv'.format(b))
            path.to_csv('C:/Users/polly/Desktop/prova2/r{}.csv'.format(b))