### 1. Define scoring functions

#### Import all the stuff

In [1]:
import numpy as np
import pandas as pd
import numba
from sympy import isprime
from math import sqrt

#### Read the cities

In [2]:
cities = pd.read_csv('../data/raw/cities.csv', index_col=['CityId'])

#### Define scoring functions

In [3]:
XY = np.stack((cities.X.astype(np.float32), cities.Y.astype(np.float32)), axis=1)
is_not_prime = np.array([not isprime(city_id) for city_id in cities.index], dtype=np.int32)

@numba.jit('f8(i8[:])', nopython=True, parallel=False)
def pure_score(path):
    '''Pure path score without penalties.'''
    dist = 0.0
    for i in numba.prange(path.shape[0] - 1):
        a, b = XY[path[i]], XY[path[i+1]]
        dx, dy = a[0] - b[0], a[1] - b[1]
        dist += sqrt(dx * dx + dy * dy)
    return dist


@numba.jit('f8(i4, i8[:])', nopython=True, parallel=False)
def chunk_score(start_offset, chunk):
    '''Score of path's chunk that starts at index 'start_offset'.'''
    dist = 0.0
    penalty = 0.0
    penalty_modulo = 9 - start_offset % 10
    for i in numba.prange(chunk.shape[0] - 1):
        id_a = chunk[i]
        a, b = XY[id_a], XY[chunk[i+1]]
        dx, dy = a[0] - b[0], a[1] - b[1]
        d = sqrt(dx * dx + dy * dy)
        dist += d
        if i % 10 == penalty_modulo and is_not_prime[id_a]:
            penalty += d
    return dist + 0.1 * penalty


@numba.jit('f8(i8[:])', nopython=True, parallel=False)
def path_score(path):
    return chunk_score(0, path)


### 2. Test scoring functions' performance

#### Define some silly Rudolph's path

In [4]:
path = np.concatenate([cities.index, [0]])

In [7]:
path[42:100]

array([42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
       59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
       76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
       93, 94, 95, 96, 97, 98, 99])

#### Measure functions' performance


In [5]:
%timeit pure_score(path)

1.41 ms ± 154 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [6]:
%timeit path_score(path)

2.82 ms ± 94.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%timeit chunk_score(42, path[42:2019])

28.4 µs ± 1.06 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
