# **REFERENCES**
* https://www.kaggle.com/starohub/st-21-a-minmax-ctsp
*https://www.kaggle.com/yamqwe/permutations-rebalancing-multiprocessing

If you find this helpful dont forget to upvote it!!

If you are simply forking it without giving an upvote this suggests your greedy quality. It takes just one push to hit the like so don't be self-centered. 

In [None]:
import os
import random
import itertools
import numpy as np
import pandas as pd
from tqdm.contrib.concurrent import process_map

!wget http://webhotel4.ruc.dk/~keld/research/LKH-3/LKH-3.0.7.tgz
!tar xvfz LKH-3.0.7.tgz
!cd LKH-3.0.7; make; cp LKH ..

In [None]:
LETTERS = {
    1: '🎅',  # father christmas
    2: '🤶',  # mother christmas
    3: '🦌',  # reindeer
    4: '🧝',  # elf
    5: '🎄',  # christmas tree
    6: '🎁',  # gift
    7: '🎀',  # ribbon
    8: '🌟',  # star
}
INV_LETTERS = {v: k for k, v in LETTERS.items()}

solution = pd.read_csv('../input/2448-no-wild/submission_no_wildcards_2448_2448_2447.csv')
strings = [[INV_LETTERS[c] for c in s] for s in solution.schedule]
strings.sort(key=len, reverse=True)
print(f'Strings lengths are {[len(_) for _ in strings]}.')

# Peek closely at the permutations in each string

In [None]:
def find_strings_perms(strings, verbose=False):
    all_perms = set(itertools.permutations(range(1, 8), 7))
    perms = []
    for s in strings:
        perms.append([])
        for i in range(len(s)-6):
            p = tuple(s[i:i+7])
            if p in all_perms:
                perms[-1].append(p)
    if verbose:
        lens = [len(_) for _ in  perms]
        print(f'There are {lens} permutations in strings, {sum(lens)} in total.')
        lens = [len(set(_)) for _ in  perms]
        print(f'There are {lens} unique permutations in strings, {sum(lens)} in total.')
    return perms

strings_perms = find_strings_perms(strings, verbose=True)

In [None]:
def rebalance_perms(strings_perms, verbose=False):
    # convert to dicts for fast lookup and to keep permutations order
    strings_perms = [dict.fromkeys(_) for _ in strings_perms] 
    for p in strings_perms[0].copy():  # iterate over the copy to allow modification during iteration
        if p[:2] != (1, 2) and (p in strings_perms[1] or p in strings_perms[2]):
            strings_perms[0].pop(p)
    for p in strings_perms[1].copy():
        if p[:2] != (1, 2) and p in strings_perms[2]:
            strings_perms[1].pop(p)
    if verbose:
        lens = [len(_) for _ in  strings_perms]
        print(f'There are {lens} permutations left in strings after rebalancing, {sum(lens)} in total.')
    return [list(_) for _ in strings_perms] 

strings_perms = rebalance_perms(strings_perms, verbose=True)

In [None]:
def perm_dist(p, q):
    i = p.index(q[0])
    return i if p[i:] == q[:7-i] else 7

def perms_to_string(perms):
    perms = list(perms)
    s = [*perms[0]]
    for p, q in zip(perms, perms[1:]):
        d = perm_dist(p, q)
        s.extend(q[-d:])
    return s

def distances_matrix(perms):
    m = np.zeros((len(perms), len(perms)), dtype='int8')
    for i, p in enumerate(perms):
        for j, q in enumerate(perms):
            m[i, j] = perm_dist(p, q)
    return m

def write_params_file(uid):
    with open('santa_%s.par' % uid, 'w') as f:
        print('PROBLEM_FILE = santa_%s.atsp' % uid, file=f)
        print('TOUR_FILE = best_tour_%s.txt' % uid, file=f)
        print('INITIAL_TOUR_FILE = initial_tour_%s.txt' % uid, file=f)
        print('PATCHING_C = 4', file=f)
        print('PATCHING_A = 3', file=f)
        print('GAIN23 = YES', file=f)
        print('SEED = 42', file=f)
        print('MAX_TRIALS = 100000', file=f)
        print('TIME_LIMIT = 600', file=f) #seconds
        print('TRACE_LEVEL = 1', file=f)

def write_problem_file(uid, distances):
    with open('santa_%s.atsp' % uid, 'w') as f:
        print('TYPE: ATSP', file=f)
        print(f'DIMENSION: {len(distances)}', file=f)
        print('EDGE_WEIGHT_TYPE: EXPLICIT', file=f)
        print('EDGE_WEIGHT_FORMAT: FULL_MATRIX\n', file=f)
        print('EDGE_WEIGHT_SECTION', file=f)
        for row in distances:
            print(' '.join(str(_) for _ in row), file=f)

def write_initial_tour_file(uid, perms):
    with open('initial_tour_%s.txt' % uid, 'w') as f:
        print('TOUR_SECTION', file=f)
        print(' '.join(str(_) for _ in range(1, len(perms)+1)), -1, file=f)

def read_output_tour(uid, perms):
    perms = list(perms)
    with open('best_tour_%s.txt' % uid) as f:
        lines = f.readlines()
    tour = lines[lines.index('TOUR_SECTION\n')+1:-2]
    return [perms[int(_) - 1] for _ in tour] 
    
def solve_atsp(perms, verbose=False):
    uid = str(random.randint(1, 9999))
    write_params_file(uid)
    distances = distances_matrix(perms)
    write_problem_file(uid, distances)
    write_initial_tour_file(uid, perms)
    
    # Run LKH-3 to solve ATSP instance
    if verbose:
        os.system('./LKH santa_%s.par' % uid)
    else:
        os.system('touch lkh_%s.log' % uid)
        os.system('./LKH santa_%s.par >> lkh_%s.log' % (uid, uid))
    tour = read_output_tour(uid, perms)
    return perms_to_string(tour)

In [None]:
improved, old_lens = True, [len(i) for i in strings]
while improved:
    print('='*91)
    # new_strings = [solve_atsp(i) for i in strings_perms]
    new_strings = list(process_map(solve_atsp, strings_perms))
    new_strings.sort(key=len, reverse=True)
    new_lens = [len(_) for _ in new_strings]
    if new_lens < old_lens:
        print(f'Improved strings lengths from {old_lens} to {new_lens}.')
        strings, old_lens = new_strings, new_lens
        strings_perms = find_strings_perms(strings, verbose=True)
        strings_perms = rebalance_perms(strings_perms, verbose=True)
    else:
        improved = False

Wow, we improved the solution quite a bit. Let's check it's the correct one and we haven't lost any permutations along the way.

In [None]:
all_perms = set(itertools.permutations(range(1, 8), 7))
mandatory_perms = set((1, 2) +  _ for _ in itertools.permutations(range(3, 8), 5))

strings_perms = [set(_) for _ in find_strings_perms(strings)]
for i, s in enumerate(strings_perms):
    if mandatory_perms - s:
        print(f'String #{i} is missing {mandatory_perms - s}.')
if all_perms - set.union(*strings_perms):
    print(f'Strings are missing {all_perms - set.union(*strings_perms)}.')

As everything is ok, now we'll save the found solution and then try to improve it even more using wildcards.

In [None]:
sub = pd.DataFrame()
sub['schedule'] = [''.join(LETTERS[x] for x in s) for s in strings]
sub_name = f'submission_no_wildcards_{"_".join(str(len(_)) for _ in strings)}.csv'
sub.to_csv(sub_name, index=False)

# Wildcards optimization

We'll use the code from the [notebook](https://www.kaggle.com/yosshi999/wildcard-postprocessing-using-dynamic-programming) created by [Yosshi999](https://www.kaggle.com/yosshi999) to improve our solution with wildcards.

In [None]:
import numpy as np
import pandas as pd
import itertools

WRK_DIR = '/kaggle/working/'
DAT_DIR = '../input/santa-2021/'
SRC_FILE = '../input/2448-no-wild/submission_no_wildcards_2448_2448_2447.csv'
RMV_FILE = WRK_DIR + 'removed.csv'
ADD_FILE = WRK_DIR + 'add.csv'

RUN_RMV = True
RUN_ADD = True

def check_if_good(a):
    # Check if the submission is valid
    global wildcard
    global start
    global other

    # If all the combinations beginning with 🎅🤶 are in the three submissions
    for permu in start['Permutation'].tolist():
        x=0
        if permu in a[0]:
            x+=1
            if permu in a[1]:
                x+=1
                if permu in a[2]:
                    continue
        # For wildcards
        permus_for_wild = wildcard[wildcard['Permutation']==permu].index.values
        in_string = False
        for p in permus_for_wild:
            if wildcard.at[p, 'Factor'] in a[0]: in_string = True
            if wildcard.at[p, 'Factor'] in a[1]: in_string = True
            if wildcard.at[p, 'Factor'] in a[2]: in_string = True
        if in_string: continue
        print("Not all 🎅🤶, missing:", permu, str(permu).translate(str.maketrans(symbols, "12345678")), "; string:", x)
        return False

    # If all the combinations are in the submissions
    for permu in other['Permutation'].tolist():
        if permu in a[0]: continue
        if permu in a[1]: continue
        if permu in a[2]: continue

        # For wildcards
        permus_for_wild = wildcard[wildcard['Permutation']==permu].index.values
        in_string = False
        for p in permus_for_wild:
            if wildcard.at[p, 'Factor'] in a[0]: in_string = True
            if wildcard.at[p, 'Factor'] in a[1]: in_string = True
            if wildcard.at[p, 'Factor'] in a[2]: in_string = True
        if not in_string:
            print("Not all the combinaison, missing:", permu, str(permu).translate(str.maketrans(symbols, "12345678")))
            return False

    # If there are 2 stars or less
    if a[0].count('🌟') > 2:
        print("Too many stars in string: 0")
        return False
    if a[1].count('🌟') > 2:
        print("Too many stars in string: 1")
        return False
    if a[2].count('🌟') > 2:
        print("Too many stars in string: 2")
        return False
    return True

def hamming_distance(str1, str2):
    return sum( (c1!=c2) for c1, c2 in zip(str1, str2))

def offset(s1, s2):
    assert(len(s1)==len(s2))
    ln = len(s1)
    j = ln
    for k in range(0,ln):
        if hamming_distance(s1[k:],s2[:7-k])==0:
            j=k
            break
    return j

def is_perm(s):
    y = True
    for k in range(1,8):
        y = y&(str(k) in s)
        if not y: break
    return y

def str2perms(best):
    gp = []
    for k in range(len(best)-6):
        s = best[k:k+7]
        if (is_perm(s))&(s not in gp):
            gp.append(s)
    return gp

def rebalance_perms(strings_perms, verbose=False):
    for p in strings_perms[0].copy():  # iterate over the copy to allow modification during iteration
        if p[:2] != "12" and (p in strings_perms[1] or p in strings_perms[2]):
            strings_perms[0].remove(p)
    for p in strings_perms[1].copy():
        if p[:2] != "12" and p in strings_perms[2]:
            strings_perms[1].remove(p)
    if verbose:
        lens = [len(_) for _ in  strings_perms]
        print(f'There are {lens} permutations left in strings after rebalancing, {sum(lens)} in total.')
    return strings_perms

symbols = "🎅🤶🦌🧝🎄🎁🎀🌟"
schedule = sub.schedule.tolist()

strings = [s.translate(str.maketrans(symbols, "12345678")) for s in schedule]
print(f'Strings lengths are {[len(_) for _ in strings]}.')

perms = [str2perms(x) for x in strings]
size_perms = [len(x) for x in perms]
print(f'Size perms: {size_perms}')

rb_perms = rebalance_perms(perms.copy(), True)

permus = pd.read_csv(DAT_DIR + "permutations.csv") # Import of permutations
wildcard = pd.read_csv(DAT_DIR + "wildcards.csv") # Import of wildcard permutations

start = permus[permus.Permutation.str[:2] =='🎅🤶'] # DataFrame of permutations starting with 🎅🤶
other = permus[permus.Permutation.str[:2] !='🎅🤶'].reset_index(drop=True) # DataFrame of all other permutations

def score_matrix(perm_list):
    score_mat = []
    for i in range(len(perm_list)):
        if i == 0:
            score_mat.append(7)
        else:
            score_mat.append(offset(perm_list[i - 1], perm_list[i]))
    return score_mat

def fill_wildcard(p, p2, wi):
    o = p + ''
    o2 = p2 + ''
    w2 = p2[wi:wi+1]
    p2 = p2.replace(w2, '8')
    a2 = p2[:wi]
    b2 = p2[wi+1:]
        
    if a2 not in p:
        return o, o2
    
    i = p.index(a2)    
    w = p[i + len(a2):i + len(a2) + 1]
    p = p.replace(w, '8')
            
    return p, p2

def add_wildcard(perm_list, pm):
    new_perm_list = perm_list.copy()
    for i in range(1, len(new_perm_list) - 1):
        o = new_perm_list[i - 1]
        p = new_perm_list[i]
        q = new_perm_list[i + 1]
        f, ro, rp, rq = lookup(pm, o, p, q)
        if f:
            new_perm_list[i - 1] = ro
            new_perm_list[i] = rp
            new_perm_list[i + 1] = rq
            return new_perm_list
    return None
        
def add_wildcard_all(perm_list, pm):
    result = []
    new_perm_list = perm_list.copy()
    for i in range(1, len(new_perm_list) - 1):
        o = new_perm_list[i - 1]
        p = new_perm_list[i]
        q = new_perm_list[i + 1]
        f, ro, rp, rq = lookup(pm, o, p, q)
        if f:
            new_perm_list2 = perm_list.copy()
            new_perm_list2[i - 1] = ro
            new_perm_list2[i] = rp
            new_perm_list2[i + 1] = rq
            result.append(new_perm_list2)
            
    return result
    
def remove_perm(perm_list, p, equals = False):
    score_mat = score_matrix(perm_list)
    cur_score = sum(score_mat)
    min_idx = -1
    min_score = cur_score

    new_perm_list = perm_list.copy()
    new_perm_list.remove(p)
    new_score_mat = score_matrix(new_perm_list)
    new_score = sum(new_score_mat)
    if equals:
        if new_score <= min_score:
            print(f'Removed {p} with {new_score} ...')
            min_score = new_score
            min_idx = 0
    else:
        if new_score < min_score:
            print(f'Removed {p} with {new_score} ...')
            min_score = new_score
            min_idx = 0        
    if min_idx < 0:
        print(f'Not removed {p} with {cur_score} ...')
        return None
    new_perm_list = perm_list.copy()
    new_perm_list.remove(p)
    return new_perm_list

def perms2str(ngp):
    result = ngp[0]
    for k in range(len(ngp)):
        if k == 0:
            continue
        s1 = ngp[k - 1]
        s2 = ngp[k]
        d = offset(s1,s2)
        result += s2[-d:]
    return result

def perms2str_w(ngp):
    result = ngp[0]
    for k in range(len(ngp)):
        if k == 0:
            continue
        s1 = ngp[k - 1]
        s2 = ngp[k]
        if '8' not in s1 and '8' in s2:
            fds = s2.split('8')
            d = offset_wc(s1,s2)
            i = len(result) - (7 - d - len(fds[0]))
            if len(result[i+1:]) == 0 and len(s2.split('8')[1]) > 1:
                result = result[:i] + result[i+1:]                
            else:
                result = result[:i] + '8' + result[i+1:]
            result += s2[-d:]          
        else:
            if '8' in s1 or '8' in s2:
                d = offset_wc(s1,s2)
                result += s2[-d:]
            else:
                d = offset_wc(s1,s2)
                result += s2[-d:]
        
    return result    
    
def tostr(src):
    src = str(src)
    tag = ''.join([str(x) for x in src])
    return tag

def offset_wc(p1, p2):
    if '8' in p1 and '8' in p2:
        return offset(p1, p2)
    if '8' not in p1 and '8' not in p2:
        return offset(p1, p2)
    if '8' in p2:
        fds = p2.split('8')
        a2 = fds[0]
        b2 = fds[1]
        if a2 not in p1:
            return 7
        i1 = p1.index(a2)
        i2 = i1 + len(a2) + 1
        a1 = p1[:i2-1]
        b1 = p1[i2:]
        if b1 not in b2:
            return 7
        i3 = b2.index(b1)
        if i3 > 0:
            return 7
        return i1
    if '8' in p1:
        for i in range(7):
            c = str(i + 1)
            p = p1.replace('8', c)
            d = offset(p, p2)
            if d < 7:
                return d
        return 7
    
def offset_w(p1, p2, wp):
    a2 = p2[:wp]
    b2 = p2[wp + 1:]
    if a2 not in p1:
        return 7
    i1 = p1.index(a2)
    i2 = i1 + len(a2) + 1
    a1 = p1[:i2-1]
    b1 = p1[i2:]
    if b1 not in b2:
        return 7
    i3 = b2.index(b1)
    if i3 > 0:
        return 7
    return i1

def lookup(pm, o, p, q):
    found = False
    ro = o
    rp = p
    rq = q
    rs = perms2str([o, p, q])
    for j in range(len(rs)):
        a = rs[0:j]
        b = rs[j+1:]
        c = a + '8' + b      
        if '8' in rs:
            c = rs
        for i in range(7):
            a = pm[0: i]
            b = pm[i+1:]
            d = a + '8' + b
            if d in c:
                found = True
                for k in range(7):
                    a = o[0: k]
                    b = o[k+1:]
                    e = a + '8' + b
                    if e in c:
                        ro = e
                        break
                for k in range(7):
                    a = p[0: k]
                    b = p[k+1:]
                    e = a + '8' + b
                    if e in c:
                        rp = e
                        break
                for k in range(7):
                    a = q[0: k]
                    b = q[k+1:]
                    e = a + '8' + b
                    if e in c:
                        rq = e
                        break
            if found:
                break
        if found:
            break
        if '8' in rs:
            break
            
    return found, ro, rp, rq    

if RUN_RMV:
    rows = []
    for i in range(len(rb_perms)):
        group = i + 1
        perm_list_a = rb_perms[i].copy()
        rsa = perms2str(perm_list_a)
        sa = len(rsa)
        for p in perm_list_a:
            if p[0:2] == '12':
                continue
            new_perm_list_a = remove_perm(perm_list_a, p, False)
            if new_perm_list_a is None:
                continue
            nrsa = perms2str(new_perm_list_a)
            nsa = len(nrsa)
            rw = {'group': group, 'perm': p, 'score': nsa}
            rows.append(rw)
    if len(rows) > 0:
        df = pd.DataFrame(rows)
        df = df.sort_values(by=['perm'], ascending=[True])
        df.to_csv(RMV_FILE, index=False)
    rmv_df = pd.read_csv(RMV_FILE)
else:
    rmv_df = pd.read_csv(RMV_FILE)
    
if RUN_ADD:
    rows = []
    for group in range(len(rb_perms)):
        rdf = rmv_df[rmv_df['group'] == group + 1]
        rdf = rdf.sort_values(by=['perm'], ascending=[True])
        for i in range(len(rdf)):
            p = tostr(rdf['perm'].iloc[i])
            if p[0:2] == '12':
                continue
            for i2 in range(len(rdf)):
                p2 = tostr(rdf['perm'].iloc[i2])
                if p2[0:2] == '12':
                    continue
                if p == p2:
                    continue
                perm_list_a = rb_perms[group].copy()
                rsa = perms2str_w(perm_list_a)
                sa = len(rsa)
                perm_list_a.remove(p)
                perm_list_a.remove(p2)
                
                result = []
                new_result = add_wildcard_all(perm_list_a, p)
                if len(new_result) == 0:
                    continue
                for new_perm_list_a in new_result:
                    new_result_2 = add_wildcard_all(new_perm_list_a, p2)
                    if len(new_result_2) == 0:
                        continue
                    for npla in new_result_2:
                        result.append(npla)
                if len(result) == 0:
                    continue

                count = 0
                for new_perm_list_a in result:
                    count += 1
                    nrsa = perms2str_w(new_perm_list_a)
                    nsa = len(nrsa)
                
                    print(f'{p}, {p2}: {sa}, {nsa} <- {group + 1}, {count}')
                    if nsa < sa:
                        rw = {'group': group + 1, 'perm': p, 'perm_2': p2, 'part_no': count, 'score': nsa, 'string': nrsa}
                        rows.append(rw)

    if len(rows) > 0:
        df = pd.DataFrame(rows)
        df = df.sort_values(by=['group', 'score'], ascending=[True, True])
        df.to_csv(ADD_FILE, index=False)
    add_df = pd.read_csv(ADD_FILE)
else:
    add_df = pd.read_csv(ADD_FILE)
    
adf1 = add_df[add_df['group'] == 1]
adf1 = adf1.sort_values(by=['score'], ascending=[True])

adf2 = add_df[add_df['group'] == 2]
adf2 = adf2.sort_values(by=['score'], ascending=[True])

adf3 = add_df[add_df['group'] == 3]
adf3 = adf3.sort_values(by=['score'], ascending=[True])

s1 = adf1['string'].iloc[0]
s2 = adf2['string'].iloc[0]
s3 = adf3['string'].iloc[0]
schedule = [s1, s2, s3]

replace_dict = {
 '1': '🎅',
 '2': '🤶',
 '8': '🌟',
 '3': '🦌',
 '4': '🧝',
 '5': '🎄',
 '6': '🎁',
 '7': '🎀'}

for k,v in replace_dict.items():
    schedule[0] = schedule[0].replace(k, v)
    schedule[1] = schedule[1].replace(k, v)
    schedule[2] = schedule[2].replace(k, v)
    
check_if_good(schedule)

scores = [len(schedule[0]), len(schedule[1]), len(schedule[2])]
nws = [len(schedule[0].split('🌟')) - 1, len(schedule[1].split('🌟')) - 1, len(schedule[2].split('🌟')) - 1]
print(f'Number of wildcards: {nws}')

# WRITE SUBMISSION CSV
sub = pd.DataFrame()
sub['schedule'] = schedule
sub.to_csv(f'{WRK_DIR}submission_wildcards_{str(scores[0])}_{str(scores[1])}_{str(scores[2])}.csv',index=False)
sub.head()