In [1]:
import math
import numpy as np
import pandas as pd
import timeit

In [2]:
cities = pd.read_csv('cities.csv')

In [3]:
cities.head()

Unnamed: 0,CityId,X,Y
0,0,316.836739,2202.340707
1,1,4377.405972,336.602082
2,2,3454.158198,2820.053011
3,3,4688.099298,2935.898056
4,4,1010.696952,3236.750989


In [4]:
cities.tail()

Unnamed: 0,CityId,X,Y
197764,197764,149.828018,3134.756986
197765,197765,2615.299239,2267.979286
197766,197766,4775.889874,3103.846228
197767,197767,2994.230955,1931.764344
197768,197768,1354.764778,3218.100625


In [5]:
coors = cities[['X', 'Y']]

## Euclidean Distance between two coordinates:

In [6]:
coors.head()

Unnamed: 0,X,Y
0,316.836739,2202.340707
1,4377.405972,336.602082
2,3454.158198,2820.053011
3,4688.099298,2935.898056
4,1010.696952,3236.750989


We can compute the distance between two consecutive rows as follows:

In [7]:
dist = np.sqrt((coors.X - coors.X.shift())**2 + (coors.Y - coors.Y.shift())**2)
dist.head()

0            NaN
1    4468.691432
2    2649.512214
3    1239.367061
4    3689.688402
dtype: float64

**TODO: Maybe there is a faster way?**

And the total distance:

In [8]:
np.sum(dist)

443429408.0060866

Random route:

In [9]:
idx = np.random.permutation(range(1, len(coors)))

This way the route does not include 0, which has to be the starting and the end point.

Then we can complete the route by:

In [10]:
idx = np.concatenate(([0], idx, [0]))

In [11]:
idx[:5]

array([     0, 188716,  53776, 166809,  37161])

In [12]:
idx[-5:]

array([141441, 100091,  14358, 112109,      0])

The sequence of coordinates ordered by the route:

In [13]:
coors[['X', 'Y']].loc[idx].head()

Unnamed: 0,X,Y
0,316.836739,2202.340707
188716,3064.902293,1623.252727
53776,991.836962,2195.344794
166809,2502.990754,2736.047191
37161,406.567917,3264.463355


In [14]:
def total_length(coors, route):
    df = coors.copy()
    
    route = np.concatenate(([0], route, [0]))
    
    df = df.loc[route]
    return np.sum(np.sqrt((df.X - df.X.shift())**2 + (df.Y - df.Y.shift())**2))

In [15]:
idx = np.random.permutation(range(1, len(coors)))

In [16]:
total_length(coors, idx)

443501276.6192398

In [17]:
total_length(coors, range(1, len(coors)))

443430860.2673667

## Modified total distance: add 10% for prime numbers

Let's just begin by testing with a smaller dataset

In [18]:
coors_sub = coors.loc[0:25] 

In [19]:
coors_sub.head()

Unnamed: 0,X,Y
0,316.836739,2202.340707
1,4377.405972,336.602082
2,3454.158198,2820.053011
3,4688.099298,2935.898056
4,1010.696952,3236.750989


In [20]:
np.random.seed(3)
idx = np.random.permutation(range(1,len(coors_sub)))
idx = np.concatenate(([0], idx, [0])) # add start and end

In [21]:
coors_sub = coors_sub.loc[idx]

In [22]:
coors_sub = coors_sub.reset_index()

In [23]:
coors_sub = coors_sub.rename(columns = {'index' : 'CityId'})

In [24]:
coors_sub['dist'] = np.sqrt((coors_sub.X - coors_sub.X.shift())**2 + (coors_sub.Y - coors_sub.Y.shift())**2)

In [25]:
coors_sub.head()

Unnamed: 0,CityId,X,Y,dist
0,0,316.836739,2202.340707,
1,19,3033.179607,515.217613,3197.64021
2,18,2352.743647,2489.939529,2088.664583
3,13,965.611152,1067.734281,1986.65657
4,24,3694.082279,734.949757,2748.69064


Drop the first row:

In [26]:
coors_sub = coors_sub.drop(0)

In [27]:
coors_sub.head()

Unnamed: 0,CityId,X,Y,dist
1,19,3033.179607,515.217613,3197.64021
2,18,2352.743647,2489.939529,2088.664583
3,13,965.611152,1067.734281,1986.65657
4,24,3694.082279,734.949757,2748.69064
5,16,4944.059453,2326.338189,2023.600771


dist[1] is the distance between row 0 and row 1, and so on, so that dist[10] is the distance of the 10th step, ecc

Every 10th step:

In [28]:
coors_sub[coors_sub.index % 10 == 0]

Unnamed: 0,CityId,X,Y,dist
10,23,3633.815728,2889.995167,192.791943
20,25,4646.266998,2884.589219,2008.1251


Check if not prime:

**Todo: look for fastest version**

In [29]:
def is_prime(n):
    if n == 2:
        return True
    if n % 2 == 0 or n <= 1:
        return False

    sqr = int(math.sqrt(n)) + 1

    for divisor in range(3, sqr, 2):
        if n % divisor == 0:
            return False
    return True

In [30]:
def not_prime(n):
    if n == 2:
        return False
    if n % 2 == 0 or n <= 1:
        return True

    sqr = int(math.sqrt(n)) + 1

    for divisor in range(3, sqr, 2):
        if n % divisor == 0:
            return True
    return False

Find every tenth step which is not prime:

In [31]:
coors_sub[coors_sub.CityId.apply(not_prime).values & (coors_sub.index % 10 == 0)]

Unnamed: 0,CityId,X,Y,dist
20,25,4646.266998,2884.589219,2008.1251


Find every tenth step which is not prime and increase dist by 10%.

In [32]:
idx2 = coors_sub.CityId.apply(not_prime).values & (coors_sub.index % 10 == 0)

In [33]:
idx2

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False])

In [34]:
coors_sub[idx2]

Unnamed: 0,CityId,X,Y,dist
20,25,4646.266998,2884.589219,2008.1251


In [35]:
coors_sub.loc[idx2, 'dist'] = coors_sub.loc[idx2, 'dist'] + coors_sub.loc[idx2, 'dist']/10

In [36]:
np.sum(coors_sub.dist)

60566.12398696876

In [37]:
def total_length_mod(coors, route):
    route = np.concatenate(([0], route, [0]))   
    df = coors.copy()
    df = df.loc[route].reset_index()
    df = df.rename(columns = {'index' : 'CityId'})
    df['dist'] = np.sqrt((df.X - df.X.shift())**2 + (df.Y - df.Y.shift())**2)
    df = df.drop(0)
    idx = df.CityId.apply(not_prime).values & (df.index % 10 == 0)
    df.loc[idx, 'dist'] = df.loc[idx, 'dist'] + df.loc[idx, 'dist'] / 10
    return np.sum(df['dist'])

Alternative:

In [38]:
def total_length_mod2(coors, route):
    route = np.concatenate(([0], route, [0]))   
    df = coors.copy()
    df = df.loc[route].reset_index()
    df = df.rename(columns = {'index' : 'CityId'})
    df['dist'] = np.sqrt((df.X - df.X.shift())**2 + (df.Y - df.Y.shift())**2)
    df = df.drop(0)
    idx = (df.index % 10 == 0)
    idx = df.loc[idx].CityId.apply(not_prime)
    idx = idx.index[idx.values]
    df.loc[idx, 'dist'] = df.loc[idx, 'dist'] + df.loc[idx, 'dist'] / 10
    # print(df)
    return np.sum(df['dist'])

Check using same subset as before:

In [39]:
coors_sub = coors.loc[0:25] 

In [40]:
np.random.seed(3)
idx = np.random.permutation(range(1,26))

In [41]:
total_length_mod(coors_sub, idx)

60566.12398696876

In [42]:
total_length_mod2(coors_sub, idx)

60566.12398696876

## Functions from other notebook:

In [43]:
def cities_distance(cities_path, coors):
    coors = coors.copy().values.tolist()
    distance = 0
    for index in range(1, len(cities_path)):
        city_a = cities_path[index - 1]
        city_b = cities_path[index]
        a_b_dist = euclidean_dist(city_a, city_b, coors)       
        if ((index % 10 == 0) & (not_prime(city_b))):
            #print("I'm not prime and index multiple of 10!")
            distance = distance + a_b_dist * 1.1 
        else:
            distance = distance + a_b_dist
        #print("Index {}, City {}, a_b_distance: {}, overall_distance: {}".format(index, city_b, a_b_dist, distance))
    return(distance)

In [44]:
def euclidean_dist(id_a, id_b, coors):
    a_coordinates = coors[id_a]
    b_coordinates = coors[id_b]
    partial_sum_X = np.power(a_coordinates[0] - b_coordinates[0], 2)
    partial_sum_Y = np.power(a_coordinates[1] - b_coordinates[1], 2)
    distance = np.sqrt(partial_sum_X + partial_sum_Y)
    return(distance)

# Performance comparisons

### On subset of data:

In [45]:
coors_sub = coors.loc[0:25] 

In [46]:
np.random.seed(3)
idx = np.random.permutation(range(1,26))

In [47]:
route = np.concatenate(([0], idx, [0]))   

In [48]:
total_length_mod2(coors_sub, idx)

60566.12398696876

In [49]:
cities_distance(route, coors_sub)

60566.12398696877

In [50]:
timeit.timeit('total_length_mod2(coors_sub, idx)', number=2000, globals=globals())

15.597050603386785

In [51]:
timeit.timeit('cities_distance(route, coors_sub)', number=2000, globals=globals())

0.5741499124057121

#### Full data:

In [52]:
np.random.seed(3)
idx = np.random.permutation(range(1, len(coors)))

In [53]:
total_length_mod2(coors, idx)

447277748.2547363

In [54]:
route = np.concatenate(([0], idx, [0]))   

In [55]:
cities_distance(route, coors)

447277748.2547359

In [56]:
timeit.timeit('total_length_mod2(coors, idx)', number=100, globals=globals())

13.625909310375508

In [57]:
timeit.timeit('cities_distance(route, coors)', number=100, globals=globals())

288.1246263932305

In [58]:
timeit.timeit('total_length_mod2(coors, idx)', number=1, globals=globals())

0.16677375876486167

In [59]:
timeit.timeit('cities_distance(route, coors)', number=1, globals=globals())

2.70982332356499

In [60]:
timeit.timeit('total_length_mod2(coors, idx)', number=10, globals=globals())

1.3118655613692454

In [61]:
timeit.timeit('cities_distance(route, coors)', number=10, globals=globals())

29.782806284616527