# Processing Codenames Clues

In [1]:
from itertools import chain, combinations
import gensim

In [2]:
model = gensim.models.KeyedVectors.load_word2vec_format(
    'GoogleNews-vectors-negative300.bin', binary=True, limit=500000
)

In [3]:
def powerset(iterable):
    s = list(iterable)
    pwrset = list(chain.from_iterable(combinations(s, r) for r in range(len(s))))
    pwrset.remove(())
    return pwrset

In [4]:
def find_grouping_optimum(grouping, red, bomb):
    if len(grouping) == 1:
        grouping_options = model.similar_by_word(grouping[0], topn=10)
    else:
        try:
            grouping_options = model.most_similar(positive=grouping, negative=(red+bomb), restrict_vocab=50000)
        except:
            print(grouping)
    
    for option in grouping_options:
        if is_valid_option(grouping, option):
            return (grouping, option)
        
    return (grouping, ("null", 0))

def is_valid_option(grouping, option):
    chars = set('0123456789#$,')
    for clue in grouping:
        if clue.lower() in option[0].lower() or option[0].lower() in clue.lower() or any((c in option[0]) for c in chars):
            return False
    return True

def apply_weight(grouping_option):
    clue = grouping_option[1][0]
    score = grouping_option[1][1] * len(grouping_option[0])
    return (grouping_option[0], (clue, score))

def sort_clue_options(clue_options):
    return list(sorted(clue_options, key=lambda r: r[1][1], reverse=True))

def find_best_clues(clues, opposing_clues, bomb):
    groupings = []
    while len(clues) > 0:
        if len(clues) == 1:
            combination_clues = [(clues[0],)]
        else:
            combination_clues = powerset(clues)[len(clues):]
            
        clue_options = [find_grouping_optimum(grouping, red, bomb) for grouping in combination_clues]
        clue_options = list(map(apply_weight, clue_options))
        sorted_clue_options = sort_clue_options(clue_options)
    
        groupings.append(clue_options[0])
        for clue in clue_options[0][0]:
            clues.remove(clue)
    return groupings

In [5]:
blue = ['piano', 'ambulance', 'bugle', 'missile', 'bond', 'heart', 'crane', 'smuggler', 'bank']
red = ['shadow', 'vacuum', 'root', 'ham', 'head', 'march', 'ray', 'air']
bomb = ['Beijing']

find_best_clues(blue, red, bomb)

[(('piano', 'ambulance'), ('EMT', 0.33964040875434875)),
 (('bugle', 'missile'), ('Cephalon', 0.252603143453598)),
 (('bond', 'heart'), ('Relationships', 0.3557929992675781)),
 (('crane', 'smuggler'), ('Cephalon', 0.2714196741580963)),
 (('bank',), ('lender', 0.6342284679412842))]