In [None]:
# MinMax CTSP
# https://www.kaggle.com/kostyaatarik/minmax-ctsp


In [None]:
%%writefile /kaggle/working/Penalty_CTSP.c

#include "LKH.h"
#include "Segment.h"

GainType Penalty_CTSP()
{
    static Node *StartRoute = 0;
    Node *N, *N1, *N2, *CurrentRoute, *NextN;
    GainType P = 0;
    GainType Cost, MaxCost = MINUS_INFINITY;
    int Forward;

    N1 = Depot;
    while ((N1 = SUCC(N1))->DepotId == 0);
    N2 = Depot;
    while ((N2 = PREDD(N2))->DepotId == 0);
    Forward = N1 != N2 ? N1->DepotId < N2->DepotId : !Reversed;

    if (!StartRoute)
        StartRoute = Depot;
    N = StartRoute;
    do {
        CurrentRoute = N;
        do {
            if (N->Color != 0 && N->Color != CurrentRoute->DepotId)
                P += 100000;  // valid CTSP solution is the priority over MinMax objective
        } while ((N = Forward ? SUCC(N) : PREDD(N))->DepotId == 0);
        if (P > CurrentPenalty ||
            (P == CurrentPenalty && CurrentGain <= 0)) {
            StartRoute = CurrentRoute;
            return CurrentPenalty + (CurrentGain > 0);
        }
        Cost = 0;
        N = CurrentRoute;
        do {
            NextN = Forward ? SUCC(N) : PREDD(N);
            Cost += C(N, NextN) - N->Pi - NextN->Pi;
            if (NextN->Id > DimensionSaved)
                NextN = Forward ? SUCC(NextN) : PREDD(NextN);
        } while ((N = NextN)->DepotId == 0);
        Cost /= Precision;
        if (Cost > MaxCost) {
            if (Cost > CurrentPenalty ||
                (Cost == CurrentPenalty && CurrentGain <= 0)) {
                StartRoute = CurrentRoute;
                return CurrentPenalty + (CurrentGain > 0);
            }
            MaxCost = Cost;
        }
    } while (N != StartRoute);
    return P + MaxCost;  // sum of CTSP and MinMax penalties
}


In [None]:
import functools
import glob
import itertools
import numpy as np
import pandas as pd
import os
import stat
import time
import threading

WRK_DIR = '/kaggle/working/'
SRC_FILE = '../input/st-21d-baseline/ST_21-d-baseline/submission_no_wildcards_2483_2483_2483.csv'
TIME_LIMIT = 3600 * 8 # time limit for LKH run, seconds
SEED = 0 # LKH seed value

SIZE = 5280 # total number of permutations
INF = 10**9 - 1 # infinite edge weight

ENDED = False

def build_lkh():
    f = open(WRK_DIR + 'build_lkh.sh', 'w')
    f.write(f'#!/bin/sh\n')
    f.write(f'cd {WRK_DIR}\n')
    f.write(f'wget http://webhotel4.ruc.dk/~keld/research/LKH-3/LKH-3.0.7.tgz >/dev/null\n')
    f.write(f'tar xvfz LKH-3.0.7.tgz >/dev/null\n')
    f.write(f'cp -r {WRK_DIR}Penalty_CTSP.c LKH-3.0.7/SRC/\n')
    f.write(f'cd LKH-3.0.7; make >/dev/null; cp LKH {WRK_DIR}\n')
    f.write(f'\n')
    f.close()

    os.chmod(WRK_DIR + 'build_lkh.sh', stat.S_IRWXU);
    os.system(WRK_DIR + 'build_lkh.sh')
    
def perm_dist(p, q):
    i = p.index(q[0])
    return i if p[i:] == q[:7-i] else 7

def distances_matrix():
    all_perms = list(itertools.permutations(range(7), 7))
    mandatory_perms = all_perms[:120]
    nodes = mandatory_perms * 2 + all_perms
    m = np.zeros((SIZE, SIZE), dtype='int32')
    for i, p in enumerate(nodes):
        for j, q in enumerate(nodes):
            m[i, j] = perm_dist(p, q)
    m *= 10
    m[np.where(m == 0)] = INF # treat equal perms in different mandatory sets
    np.fill_diagonal(m, 0) # restore zero weights at the main diagonal
    return m

def write_params_file(initial_tour=None):
    with open(f'{WRK_DIR}santa.par', 'w') as f:
        printf = functools.partial(print, file=f)
        printf('SPECIAL')
        printf('PROBLEM_FILE = santa.ctsp')
        printf('TOUR_FILE = best_tour_$.txt') # $ will be replaced with the tour cost
        printf('OUTPUT_TOUR_FILE = output_tour_$.txt') # save each improvement
        if initial_tour:
            printf('INITIAL_TOUR_FILE = initial_tour.txt')
        printf('INITIAL_TOUR_ALGORITHM = CTSP')
        printf('MTSP_OBJECTIVE = MINMAX')
        printf('GAIN23 = YES')
        printf('PATCHING_C = 3')
        printf('PATCHING_A = 2')
        printf(f'SEED = {SEED}')
        printf('MAX_TRIALS = 100000')
        printf(f'TIME_LIMIT = {TIME_LIMIT}') # seconds
        printf('TRACE_LEVEL = 2')
        printf('PRECISION = 1')
        
def write_problem_file():
    with open(f'{WRK_DIR}santa.ctsp', 'w', buffering=-1) as f:
        printf = functools.partial(print, file=f)
        printf('TYPE: CTSP')
        printf(f'DIMENSION: {SIZE * 2 + 1}')
        printf('SALESMEN : 3')
        printf('EDGE_WEIGHT_TYPE: EXPLICIT')
        printf('EDGE_WEIGHT_FORMAT: FULL_MATRIX')
        printf('EDGE_WEIGHT_SECTION')
        # write distances matrix
        inf_row = ' '.join(itertools.repeat(str(INF), SIZE))
        distances = distances_matrix()
        # top half of the distances matrix
        for weights in distances.T: # iterate over columns
            # infinite weights, weights column, distance to depot
            printf(inf_row, ' '.join(map(str, weights)), 35)
        # bottom half of the distances matrix
        for weights in distances: # iterate over rows
            # weights row, infinite weights, distance to depot
            printf(' '.join(map(str, weights)), inf_row, 35)
        printf(' '.join(itertools.repeat('35', SIZE * 2)), INF) # distances from the depot
        # write "private city sets"
        printf('CTSP_SET_SECTION')
        for i in range(3):
            printf(i + 1, end=' ') # set index 
            for j in range(1, 121):
                printf(i * 120 + j, end=' ') # real node of mandatory permutations
                printf(i * 120 + j + SIZE, end=' ') # complementary virtual node
            printf(-1)
        printf('FIXED_EDGES_SECTION')
        fixed_edges = zip(range(1, SIZE+1), range(SIZE+1, 2*SIZE+1))
        fixed_edges = itertools.chain.from_iterable(fixed_edges)
        printf(' '.join(map(str, fixed_edges)), -1)
        printf('DEPOT_SECTION')
        printf(2 * SIZE + 1)
        printf(-1)
        printf('EOF')

def write_initial_tour_file(initial_tour=None):
    if initial_tour:
        with open(f'{WRK_DIR}initial_tour.txt', 'w') as f:
            print('TOUR_SECTION', file=f)
            print(' '.join(str(_) for _ in initial_tour), -1, file=f)
    
def solve_ctsp(initial_tour=None, verbose=False):
    write_params_file(initial_tour)
    write_problem_file()
    write_initial_tour_file(initial_tour)

    f = open(WRK_DIR + 'run_lkh.sh', 'w')
    f.write(f'#!/bin/sh\n')
    f.write(f'cd {WRK_DIR}\n')
    
    # run LKH-3 to solve CTSP instance
    if verbose:
        f.write(f'./LKH santa.par\n')
    else:
        f.write(f'touch lkh.log\n')
        f.write(f'./LKH santa.par >> lkh.log\n')
        
    f.write(f'\n')
    f.close()        
    os.chmod(WRK_DIR + 'run_lkh.sh', stat.S_IRWXU);
    os.system(WRK_DIR + 'run_lkh.sh')
        
def find_strings_perms(strings, verbose=False):
    all_perms = set(itertools.permutations(range(1, 8), 7))
    perms = []
    for s in strings:
        perms.append([])
        for i in range(len(s)-6):
            p = tuple(s[i:i+7])
            if p in all_perms:
                perms[-1].append(p)
    if verbose:
        lens = [len(_) for _ in  perms]
        print(f'There are {lens} permutations in strings, {sum(lens)} in total.')
        lens = [len(set(_)) for _ in  perms]
        print(f'There are {lens} unique permutations in strings, {sum(lens)} in total.')
    return perms

def rebalance_perms(strings_perms, verbose=False):
    # convert to dicts for fast lookup and to keep permutations order
    strings_perms = [dict.fromkeys(_) for _ in strings_perms] 
    for p in strings_perms[0].copy():  # iterate over the copy to allow modification during iteration
        if p[:2] != (1, 2) and (p in strings_perms[1] or p in strings_perms[2]):
            strings_perms[0].pop(p)
    for p in strings_perms[1].copy():
        if p[:2] != (1, 2) and p in strings_perms[2]:
            strings_perms[1].pop(p)
    if verbose:
        lens = [len(_) for _ in  strings_perms]
        print(f'There are {lens} permutations left in strings after rebalancing, {sum(lens)} in total.')
    return [list(_) for _ in strings_perms]
        
def ctsp_initial_tour(strings_perms):
    index = {p: i for (i, p) in enumerate(itertools.permutations(range(1, 8), 7), 1)}
    initial_tour = []
    for i, perms in enumerate(strings_perms):
        initial_tour.append(SIZE*2 + i + 1) # depot node for each string
        for p in perms:
            if p[:2] == (1, 2):
                initial_tour.append(i*120 + index[p])
            else:
                initial_tour.append(240 + index[p])
            initial_tour.append(initial_tour[-1] + SIZE) # a complementary virtual node
    return initial_tour
    
def read_strings(file_name):
    all_perms = list(itertools.permutations(range(1, 8), 7))
    mandatory_perms = all_perms[:120]
    nodes = mandatory_perms * 2 + all_perms
    
    with open(file_name, 'r') as f:
        lines = [l.strip() for l in f.readlines()]
    lines = lines[lines.index(f'{SIZE*2 + 1}'):-2]
    tour = [int(_) - 1 for _ in lines]
    i0, i1, i2 = sorted(tour.index(i) for i in range(SIZE*2, SIZE*2 + 3)) # depots
    strings = [tour[i0+1:i1], tour[i1+1:i2], tour[i2+1:]]
    for s in strings:
        s[:] = [nodes[_] for _ in s if _ < SIZE] # leave only real nodes
        s_forward, s_backward = [], []
        for directed_s in (s_forward, s_backward):
            directed_s.extend(s[0])
            for p, q in zip(s, s[1:]):
                d = perm_dist(p, q)
                directed_s.extend(q[-d:])
            s[:] = s[::-1]
        s[:] = min(s_forward, s_backward, key=len)
    return strings

def check_solution(strings):
    all_perms = set(itertools.permutations(range(1, 8), 7))
    mandatory_perms = {p for p in all_perms if p[:2] == (1, 2)}
    strings_perms = [set(_) for _ in find_strings_perms(strings)]
    for s in strings_perms:
        if mandatory_perms - s:
            print(mandatory_perms - s)
            return False
    if all_perms - set.union(*strings_perms):
        return False
    return True

def contain_wildcards(strings):
    for s in strings:
        if 8 in s:
            return True
    return False

def write_submission_csv(strings):
    sub = pd.DataFrame()
    sub['schedule'] = [''.join(LETTERS[x] for x in s) for s in strings]
    if contain_wildcards(strings):
        sub_name = f'{WRK_DIR}submission_wildcards_{"_".join(str(len(_)) for _ in strings)}.csv'
    else:
        sub_name = f'{WRK_DIR}submission_no_wildcards_{"_".join(str(len(_)) for _ in strings)}.csv'
    sub.to_csv(sub_name, index=False)
    return sub_name
    
def build_results():
    global ENDED
    
    INIT_TIME = 60 * 4
    INIT_TIME = 3600 * 0.5
    START_TIME = time.time()
    USED = []
    while (time.time() - START_TIME < TIME_LIMIT + INIT_TIME) and (ENDED == False):
        tour_files = glob.glob(f'{WRK_DIR}output_tour_*.txt') + glob.glob(f'{WRK_DIR}best_tour_*.txt')
        for f in tour_files:
            if f in USED:
                continue
            USED.append(f)
            strings = read_strings(f)
            strings.sort(key=len, reverse=True)
            print("\n\n")
            print("=" * 70)
            print(f'File {f}, strings lenghts are {[len(s) for s in strings]}.')    
            if check_solution(strings):
                print(f'The solution is written to {write_submission_csv(strings)}')
            else:
                print('The solution is invalid.')
            print("=" * 70)
            print("\n\n")
            
        time.sleep(30)
    
build_lkh()    

LETTERS = {
    1: '🎅',  # father christmas
    2: '🤶',  # mother christmas
    3: '🦌',  # reindeer
    4: '🧝',  # elf
    5: '🎄',  # christmas tree
    6: '🎁',  # gift
    7: '🎀',  # ribbon
    8: '🌟',  # star
}
INV_LETTERS = {v: k for k, v in LETTERS.items()}

solution = pd.read_csv(SRC_FILE)
strings = [[INV_LETTERS[c] for c in s] for s in solution.schedule]
strings.sort(key=len, reverse=True)
print(f'Strings lengths are {[len(_) for _ in strings]}.')

strings_perms = find_strings_perms(strings, verbose=True)
strings_perms = rebalance_perms(strings_perms, verbose=True)

initial_tour = ctsp_initial_tour(strings_perms)

result_thread = threading.Thread(target=build_results, name="BuildResults", args=[])
result_thread.start()
    
solve_ctsp(initial_tour, True)

ENDED = True