# Steps:

1. Clustering on cities through K-means;
2. GA on each cluster: obtain provinces (use normal total length);
3. GA for sorting custers: obtain candidate trace as concatenation of cluster routes (use normal total length);
4. SA on on obtained candidate trace, with special mutation and fitness func for primes.

# Imports

In [1]:
from datetime import datetime
import json
import numpy as np

In [2]:
cities = np.genfromtxt('cities.csv', delimiter=',', skip_header = 1)

# Clustering

In [3]:
from sklearn.cluster import KMeans

We partition the cities in 1000 clusters, using k-means.

In [4]:
n = 1000

We have run the following code and saved the clusters as a txt file, so we just import it now

In [None]:
# kmeans = KMeans(n_clusters=n, random_state=0)
# kmeans.fit(cities[:, 1:3])

In [5]:
clusters = np.genfromtxt('files/1000subsets.txt', skip_header = 0).astype(int)

In [6]:
clusters

array([894, 654, 614, ..., 263, 305,  80])

We add the information regarding the cluster to the cities array.

In [7]:
kcities = np.concatenate((cities, clusters[:, np.newaxis]), 1)

In [8]:
subs = [0]*n
for i in range(n):
    subs[i] = kcities[kcities[:, 3] == i][:, :3]

In [9]:
subs = np.array(subs)

In [10]:
lens = np.array([len(subs[i]) for i in range(n)])
lens[:50]

array([230, 118, 121, 168,  89, 163, 310, 288, 277, 156, 289, 228, 172,
       273, 151, 156, 261, 236, 190, 234, 183,  73, 242, 222, 228, 161,
       244, 282, 265, 198, 138, 288, 268, 181, 274, 158, 160, 203, 127,
       152, 216, 313, 222, 192, 218, 145, 241, 296, 151, 137])

In [11]:
sum(lens)

197769

# Running GA in each cluster

In [12]:
from ga import GA, route_fitness, shift_mutation, roulette_selection2
from ga import two_point_crossover2, total_length_loop, total_length_w_penalties, not_prime

In [13]:
a = [0]*n
b = [0]*n
c = [0]*n
d = [0]*n
e = [0]*n

After running the following...

In [None]:
# np.random.seed(4)
# start = datetime.now()
# startl = datetime.now()
# p = 0 
# for i in range(n):
#     if i % 100 == 0:
#         print('\nStart Loop {} at {}'.format(i, startl))
#     a[i], b[i], c[i], d[i], e[i] = GA(subs[i], np.inf, 20, 6, route_fitness, [shift_mutation], 0.1,
#                        roulette_selection2, cross_fun=two_point_crossover2, max_no_change = 500,
#                                      pop_include_zero=True, length_fun = total_length_loop)
#     if (i % (99 + p) == 0) and i > 0:
#         p += 100
#         endl = datetime.now()
#         print('End loop {} at {}: {} seconds'.format(i, endl, (endl-startl).total_seconds()))
#         startl = datetime.now()
# end = datetime.now()
# print('\nTotal time: {}'.format((end-start).total_seconds()))

We have created a list of lists, each consisting of the city ids in each cluster:

In [None]:
# c_ids = [el[:,0] for el in subs]

Running the algorithm we have created a list 'e'. Each element e[i] in this list is the best route found by running GA on subs[i]. We now order each list c_ids[i] by e[i] in order to obtain the routes specified by their city ids. 

In [None]:
# routes = []
# for i in range(n):
#     routes.append(c_ids[i][e[i]])

We can now concatenate these routes in order to find a route on the full set of cities.

In [None]:
# full_route = np.concatenate(routes)

We order the route so that it starts and ends from 0.

In [None]:
# zi = np.where(full_route == 0)[0][0]

In [None]:
# full_route = np.concatenate((full_route[zi:], full_route[:zi]))

In [None]:
# full_route = full_route.astype(int)

In [None]:
# np.savetxt('files/8291272_after_kmeans.txt', full_route)

We load the saved file containing this route:

In [14]:
full_route = np.genfromtxt('files/8291272_after_kmeans.txt').astype(int)

As a check, we can see that the length of the full route is correct.

In [15]:
len(full_route)

197769

Also, every city in the path is the id of a city:

In [16]:
all(np.isin(full_route, cities[:,0])) 

True

... and every city is in the path:

In [17]:
all(np.isin(cities[:,0], full_route)) 

True

The total length through this route:

In [18]:
total_length_loop(full_route, cities)

8214645.568977318

Let us now see what would be the length of this route if we penalize every 10th step not starting from a prime city.

We create a masking list for non-prime number:

In [19]:
np_not_prime = np.vectorize(not_prime)
nums = np.arange(0,len(cities))
not_primes_bool = np_not_prime(nums)

We now use the list to find the total length, where we penalize each 10th step originating from a city that is not prime.

In [20]:
total_length_w_penalties(full_route, cities, not_primes_bool)

8291272.454521399

#### Save subroutes as json

We also save the subroutes as a json file:

In [None]:
# routes_dict = dict()
# for i in range(len(routes)):
#     routes_dict[i] = list(routes[i])

In [None]:
# with open('files/1000_subroutes_2.json', 'w') as fp:
#     json.dump(routes_dict, fp)

# Running GA to find order of subroutes

Reload routes in case::

In [21]:
with open('files/1000_subroutes_2.json', 'r') as fp:
    loaded_json = json.load(fp)

In [22]:
json_routes = [loaded_json[str(i)] for i in range(1000)]

In [23]:
full_json_route = np.concatenate(json_routes)
len(full_json_route)

197769

In [24]:
# delete 0 ecc
zij = np.where(full_json_route == 0)[0][0]
zij

179270

In [25]:
full_json_route = np.concatenate((full_json_route[zij:], full_json_route[:zij])) # reorder full route as if starting from 0 to 0, and delete 0s
len(full_json_route) # the 0 will be added during length computation

197769

In [26]:
total_length_w_penalties(full_json_route.astype(int), cities, not_primes_bool)

8291272.454521399

In [27]:
routes = json_routes

We now apply GA for ordering the clusters:

In [28]:
for i in range(len(routes)):
    routes[i] = np.array(routes[i]).astype(int)

In [29]:
routes = np.array(routes)

After running the following ...

In [30]:
# np.random.seed(4)
# s1, s2, s3, s4, s5 = GA(cities, np.inf, 20, 10, subset_fitness, [reverse_mutation, swap_mutation], 0.1,
#                         roulette_selection2, cross_fun=two_point_crossover2, max_no_change = 500,
#                         pop_include_zero=1, length_fun = total_length_loop,
#                         on_subsets= True, subs = routes)

We have obtained s5, which is the order of clusters

In [31]:
# new_full_route = np.concatenate(routes[s5])

We order it so that it starts and end at 0:

In [32]:
# zin = np.where(new_full_route == 0)[0][0]
# new_full_route = np.concatenate((new_full_route[zin:], new_full_route[:zin]))

We save it:

In [33]:
# np.savetxt('files/6926557_from_1000_ordered_clusters.txt', new_full_route)

We load the route:

In [34]:
new_full_route = np.genfromtxt('files/6926557_from_1000_ordered_clusters.txt').astype(int)

In [35]:
new_full_route

array([     0, 153911,  48908, ...,  81234,  60843,  42251])

In [36]:
total_length_loop(new_full_route, cities)

6864862.674741199

In [37]:
all(np.isin(new_full_route, cities[:,0])) # every city in the path is the id of a city

True

In [38]:
all(np.isin(cities[:,0], new_full_route)) # every city is in the path

True

In [39]:
total_length_loop(new_full_route, cities)

6864862.674741199

In [40]:
total_length_w_penalties(new_full_route[1:], cities, not_primes_bool)

6926557.37104381

# Improving the route by moving prime cities

We start from the route with length 6 926 557.37104381

Reload route in case:

In [41]:
start_route = np.genfromtxt('files/6926557_from_1000_ordered_clusters.txt').astype(int)

In [42]:
total_length_w_penalties(start_route, cities, not_primes_bool)

6926557.37104381

In [43]:
clusters = np.repeat(np.arange(1000), 200)[:len(cities)-1]
len(clusters)

197768

In [44]:
sorted_cities = cities[start_route[1:]]

In [45]:
kscities = np.concatenate((sorted_cities, clusters[:, np.newaxis]), 1)

In [46]:
n = len(np.unique(clusters))
n

989

In [47]:
subs = [0]*n

In [48]:
for i in range(n):
    subs[i] = kscities[kscities[:, 3] == i][:, :3]

In [49]:
lens = [len(subs[i]) for i in range(n)]
np.array(lens[-20:])

array([200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
       200, 200, 200, 200, 200, 200, 168])

We apply SA...

In [50]:
a = [0]*n
b = [0]*n
c = [0]*n

In [51]:
for i in range(n):
    a[i] = np.arange(len(subs[i]))

In [52]:
start_route_w0 = start_route[1:]
start_route_w0

array([153911,  48908, 182416, ...,  81234,  60843,  42251])

In [53]:
total_length_w_penalties(start_route_w0, cities, not_primes_bool)

6926557.37104381

In [54]:
c_ids = [el[:,0] for el in subs]

After running the following...

In [55]:
from sa import SA, reverse_primes_mutation, total_length_w_penalties_unordered_straigth

In [56]:
# np.random.seed(4)
# start = datetime.now()
# startl = datetime.now()
# p = 0
# last_v = total_length_w_penalties(start_route_w0, cities, not_primes_bool)
# initial_v = last_v
# print('Initial distance: {}'.format(last_v))
# for i in range(n):
#     if (i % 100 == 0) and (i > 0):
#         print('\nStart Loop {} at {}'.format(i, startl))
#     
#     perm_init = np.arange(len(subs[i]))
#     a[i], b[i], c[i] = SA(subs[i], total_length_w_penalties_unordered_straigth, np.inf, reverse_primes_mutation,
#                           black_list = not_primes_bool, scale = 10000000, n_to_mute= 10,
#                           perm_init = perm_init, maxIterNoChange=1000)
#     if ((i % (99 + p) == 0) or (i == n - 1)) and i > 0:
#         p += 100
#         endl = datetime.now()
#         print('End loop {} at {}: {} seconds'.format(i, endl, (endl-startl).total_seconds()))
#         startl = datetime.now()
#     if i % 10 == 0:
#         temp_routes = []
#         for j in range(n):
#             temp_routes.append(c_ids[j][a[j]])
#         temp_full_route = np.concatenate(temp_routes).astype(int)
#         temp_full_route = np.concatenate(([0], temp_full_route))
#         temp_tot = total_length_w_penalties(temp_full_route[1:], cities, not_primes_bool)
#         print('\nLoop {}, Total distance so far: {}'.format(i, temp_tot))   
#         print(all(np.isin(temp_full_route, cities[:,0])), all(np.isin(cities[:,0], temp_full_route)) )
#         print('Improvement: {}'.format(temp_tot - last_v))
#         print('Total Improvement: {}'.format(temp_tot - initial_v))
#         last_v = temp_tot        
# end = datetime.now()
# print('\nTotal time: {}'.format((end-start).total_seconds()))

In [57]:
# c_ids = [el[:,0] for el in subs]
# routes = []
# for i in range(n):
#     routes.append(c_ids[i][a[i]])
# 
# final_full_route = np.concatenate(routes).astype(int)
# final_full_route = np.concatenate(([0], final_full_route))

In [58]:
# np.savetxt('files/6794694_after_sa.txt', final_full_route)

We have obtained the following route:

In [59]:
final_full_route = np.genfromtxt('files/6794694_after_sa.txt').astype(int)

In [60]:
final_full_route

array([     0, 153911,  48908, ...,  81234,  60843,  42251])

In [61]:
total_length_w_penalties(final_full_route, cities, not_primes_bool)

6794694.175852176

The length of the final route is 6 794 694.175852176