In [1]:
from datetime import datetime
import nbimporter
import numpy as np
from sklearn.cluster import KMeans

## Importing stuff from ga

In [2]:
from ga import *

Importing Jupyter notebook from ga.ipynb


In [3]:
cities = np.genfromtxt('cities.csv', delimiter=',', skip_header = 1) # load as numpy array

In [4]:
len(cities)

197769

Overwrite stuff from ga that needs variables in the notebook:

In [5]:
np_not_prime = np.vectorize(not_prime)

nums = np.arange(0,len(cities))

not_primes_bool = np_not_prime(nums)

In [6]:
def np_total_length(r, c):
    # v8
    r = np.concatenate(([0], r))
    c = c[r, :]
    cs = np.roll(c, -1, axis =0)
    cid = cs[:,0].astype(int)
    d = np.sqrt((c[:,1] - cs[:,1])**2 + (c[:,2] - cs[:,2])**2)
    idx = np.arange(9, len(c), 10)
    pc = cid[idx]
    sel = (idx+1)[not_primes_bool[pc]]
    d[sel-1] *= 1.1
    
    return np.sum(d)

In [7]:
def route_fitness(r, c):
    return 1 / np_total_length(r, c)

## Test on full cities set

In [9]:
n = 100

**You can load the dataset instead of running the following two lines**

In [10]:
kmeans = KMeans(n_clusters=n, random_state=0)

In [11]:
kmeans.fit(cities[:, 1:3])

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=100, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [58]:
kmeans.labels_

array([80,  7, 11, ..., 54,  1, 20])

In [59]:
np.savetxt('files/100subsets.txt', kmeans.labels_, delimiter=',') 

In [60]:
sub_test = np.genfromtxt('files/100subsets.txt', delimiter=',', skip_header = 0) # load as numpy array

In [61]:
sub_test

array([80.,  7., 11., ..., 54.,  1., 20.])

In [26]:
kcities = np.concatenate((cities, kmeans.labels_[:, np.newaxis]), 1)

In [27]:
subs = [0]*n

In [28]:
for i in range(n):
    subs[i] = kcities[kcities[:, 3] == i][:, :3]

In [29]:
lens = [len(subs[i]) for i in range(n)]

Each subset seems more or less balanced:

In [30]:
np.array(lens)

array([2064, 2932, 2682, 1864, 1642, 1839, 1832, 2957, 1975, 2405, 2053,
       2542, 1732, 2101, 2446, 2098, 2419, 2541, 1707, 1690, 2177, 1164,
       1654, 2050, 2449, 2819, 2756, 1461, 2509, 1021, 1671, 1489, 1520,
       2509, 2197, 1811, 2715, 1796, 1355, 2620, 2180, 1833, 1461, 1552,
       1809, 1480, 2969, 1188, 2146, 2761, 2424, 1405, 2059, 2493, 2903,
       1383, 1307, 2348, 1810, 2391, 1612, 1619, 2051, 1816, 2531, 1944,
       1403, 2105, 2099, 2271, 2509, 2261, 1622, 2577, 1678, 1825, 1603,
       1807, 1649, 1379, 1471, 1528,  402, 1917, 1874, 1674, 3331, 1729,
       1875, 2780, 1396, 2472, 1459, 1490, 1754, 1870, 2377, 2375, 1214,
       1254])

In [31]:
sum(lens)

197769

In [32]:
routes = [0]*n
a = [0]*n
b = [0]*n
c = [0]*n
d = [0]*n
e = [0]*n

In [33]:
np.random.seed(4)
start = datetime.now()
for i in range(n):
    startl = datetime.now()
    print('\nStart Loop {} at {}'.format(i, startl))
    a[i], b[i], c[i], d[i], e[i] = GA(subs[i], np.inf, 20, 6, route_fitness, [shift_mutation], 0.1,
                       roulette_selection2, cross_fun=mod_two_point_crossover2, max_no_change = 500)
    endl = datetime.now()
    print('End loop {} at {}: {} seconds'.format(i, endl, (endl-startl).total_seconds()))
end = datetime.now()
print('\nTotal time: {}'.format((end-start).total_seconds()))


Start Loop 0 at 2019-07-01 20:27:18.328951
End loop 0 at 2019-07-01 20:28:26.295942: 67.966991 seconds

Start Loop 1 at 2019-07-01 20:28:26.295942
End loop 1 at 2019-07-01 20:30:03.589171: 97.293229 seconds

Start Loop 2 at 2019-07-01 20:30:03.589171
End loop 2 at 2019-07-01 20:31:24.790348: 81.201177 seconds

Start Loop 3 at 2019-07-01 20:31:24.790348
End loop 3 at 2019-07-01 20:32:26.380366: 61.590018 seconds

Start Loop 4 at 2019-07-01 20:32:26.380366
End loop 4 at 2019-07-01 20:33:19.860917: 53.480551 seconds

Start Loop 5 at 2019-07-01 20:33:19.860917
End loop 5 at 2019-07-01 20:34:24.968724: 65.107807 seconds

Start Loop 6 at 2019-07-01 20:34:24.968724
End loop 6 at 2019-07-01 20:35:23.083365: 58.114641 seconds

Start Loop 7 at 2019-07-01 20:35:23.084370
End loop 7 at 2019-07-01 20:37:01.447640: 98.36327 seconds

Start Loop 8 at 2019-07-01 20:37:01.447640
End loop 8 at 2019-07-01 20:38:21.280299: 79.832659 seconds

Start Loop 9 at 2019-07-01 20:38:21.280299
End loop 9 at 2019-07

End loop 78 at 2019-07-01 21:51:11.440031: 42.674396 seconds

Start Loop 79 at 2019-07-01 21:51:11.440031
End loop 79 at 2019-07-01 21:51:47.556362: 36.116331 seconds

Start Loop 80 at 2019-07-01 21:51:47.556362
End loop 80 at 2019-07-01 21:52:25.608572: 38.05221 seconds

Start Loop 81 at 2019-07-01 21:52:25.608572
End loop 81 at 2019-07-01 21:53:14.864024: 49.255452 seconds

Start Loop 82 at 2019-07-01 21:53:14.864024
End loop 82 at 2019-07-01 21:53:21.903617: 7.039593 seconds

Start Loop 83 at 2019-07-01 21:53:21.903617
End loop 83 at 2019-07-01 21:54:16.358605: 54.454988 seconds

Start Loop 84 at 2019-07-01 21:54:16.358605
End loop 84 at 2019-07-01 21:55:01.952300: 45.593695 seconds

Start Loop 85 at 2019-07-01 21:55:01.952300
End loop 85 at 2019-07-01 21:56:01.478546: 59.526246 seconds

Start Loop 86 at 2019-07-01 21:56:01.478546
End loop 86 at 2019-07-01 21:58:19.963443: 138.484897 seconds

Start Loop 87 at 2019-07-01 21:58:19.963443
End loop 87 at 2019-07-01 21:59:26.930331: 66.9

In [34]:
cds = [el[:,0] for el in subs]

In [35]:
full_route = []
for i in range(n):
    full_route.append(np.concatenate(([cds[i][0]], cds[i][e[i]])))
full_route = np.concatenate(full_route)

In [36]:
len(full_route)

197769

In [37]:
all(np.isin(full_route, cities[:,0])) # every city in the path is the id of a city

True

In [38]:
all(np.isin(cities[:,0], full_route)) # every city is in the path

True

In [39]:
zi = np.where(full_route == 0)

In [40]:
zi = zi[0][0]

In [56]:
zi

161527

In [41]:
full_route = np.concatenate((full_route[:zi], full_route[zi+1:])) # reorder full route as if starting from 0 to 0, and delete 0s

In [42]:
len(full_route)

197768

In [44]:
np_total_length(full_route.astype(int), cities)

20709214.287331678

Final result 20 709 214.287331678

#### Save routes as json

In [45]:
full_route

array([1.04000e+02, 2.62550e+04, 1.66034e+05, ..., 5.42550e+04,
       1.87021e+05, 1.95521e+05])

In [46]:
routes_list = []
for i in range(n):
    routes_list.append(np.concatenate(([cds[i][0]], cds[i][e[i]])))

In [47]:
routes_list[0]

array([1.04000e+02, 2.62550e+04, 1.66034e+05, ..., 1.21417e+05,
       3.85940e+04, 2.22160e+04])

In [48]:
routes_dict = dict()
for i in range(len(routes_list)):
    routes_dict[i] = list(routes_list[i])

In [62]:
import json

with open('files/100_subroutes.json', 'w') as fp:
    json.dump(routes_dict, fp)

### load test

In [63]:
with open('files/100_subroutes.json', 'r') as fp:
    loaded_json = json.load(fp)

Create route

In [64]:
json_routes = [loaded_json[str(i)] for i in range(100)]

In [65]:
full_json_route = np.concatenate(json_routes)
len(full_json_route)

197769

In [66]:
# delete 0 ecc
zij = np.where(full_json_route == 0)[0][0]
zij

161527

In [67]:
full_json_route = np.concatenate((full_json_route[:zi], full_json_route[zi+1:])) # reorder full route as if starting from 0 to 0, and delete 0s
len(full_json_route) # the 0 will be added during length computation

197768

In [68]:
np_total_length(full_json_route.astype(int), cities)

20709214.287331678

### Other results:

Full set of cities, 40 partitions: 34 068 538.50736396 total length

10k cities, divided in subsets: 3 441 365 .0848769355

10k cities, no subsets: 13 683 004 .521273606