In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import sympy
from sympy import isprime
import numba

%matplotlib inline

In [2]:
initial_path = pd.read_csv('submission.151557248.csv')
df = pd.read_csv('../data/raw/cities.csv', index_col=['CityId'], dtype={'X': np.float32, 'Y': np.float32})
primes = list(sympy.primerange(0, max(df.index)))

df_path = df.reindex(initial_path['Path']).reset_index()

In [3]:
from math import sqrt

XY = np.stack((df.X.astype(np.float32), df.Y.astype(np.float32)), axis=1)
is_not_prime = np.array([not isprime(city_id) for city_id in df.index], dtype=np.int32)

@numba.jit('f8(i8[:])', nopython=True, parallel=False)
def pure_score(path):
    '''Pure path score without penalties.'''
    dist = 0.0
    for i in numba.prange(path.shape[0] - 1):
        a, b = XY[path[i]], XY[path[i+1]]
        dx, dy = a[0] - b[0], a[1] - b[1]
        dist += sqrt(dx * dx + dy * dy)
    return dist


@numba.jit('f8(i4, i8[:])', nopython=True, parallel=False)
def chunk_score(start_offset, chunk):
    '''Score of path's chunk that starts at index 'start_offset'.'''
    dist = 0.0
    penalty = 0.0
    penalty_modulo = 9 - start_offset % 10
    for i in numba.prange(chunk.shape[0] - 1):
        id_a = chunk[i]
        a, b = XY[id_a], XY[chunk[i+1]]
        dx, dy = a[0] - b[0], a[1] - b[1]
        d = sqrt(dx * dx + dy * dy)
        dist += d
        if i % 10 == penalty_modulo and is_not_prime[id_a]:
            penalty += d
    return dist + 0.1 * penalty


@numba.jit('f8(i8[:])', nopython=True, parallel=False)
def path_score(path):
    return chunk_score(0, path)

In [4]:
def write_tsp(cities, filename, name='traveling-santa-2018-prime-paths'):
    with open(filename, 'w') as f:
        f.write('NAME : %s\n' % name)
        f.write('COMMENT : %s\n' % name)
        f.write('TYPE : TSP\n')
        f.write('DIMENSION : %d\n' % len(cities))
        f.write('EDGE_WEIGHT_TYPE : EUC_2D\n')
        f.write('NODE_COORD_SECTION\n')
        for row in cities.itertuples():
            f.write('%d %.11f %.11f\n' % (row.Index, row.X, row.Y))
        f.write('EOF\n')

In [5]:
def read_link(filename):
    data = open(filename, 'r').read()
    data = data.replace('\n', ' ')
    data = np.fromstring(data, sep=' ', dtype=np.int32)
    if len(data) != data[0] + 1:
        raise Exception('Unrecognized format in %s' % filename)
    return data[1:]

In [None]:
def calc_chunk(df_chunk):
    write_tsp(df_chunk, 'temp_chunk.tsp')
    !./linkern -s 42 -S temp_chunk.tour -R 999999999 -t 60 ./temp_chunk.tsp >linkern.log
    tour_data = read_link('temp_chunk.tour')
    tour_data_2 = np.array([df_chunk.index[x] for x in tour_data])
    return tour_data_2

In [None]:
df_path_best = df_path.copy()

for i in range(1000):
    chunk = df_path[i:i+20]
    new_chunk = calc_chunk(chunk)
    score = chunk_score(i, chunk['Path'].values)
    score_new = chunk_score(i, df_path['Path'].loc[new_chunk].values)
    if score_new < score:
        print("!!")