# Processing Codenames Clues

In [1]:
from itertools import chain, combinations
import gensim

In [2]:
model = gensim.models.KeyedVectors.load_word2vec_format(
    'GoogleNews-vectors-negative300.bin', binary=True, limit=500000
)

In [3]:
def powerset(iterable):
    s = list(iterable)
    pwrset = list(chain.from_iterable(combinations(s, r) for r in range(len(s))))
    pwrset.remove(())
    return pwrset

In [4]:
def find_grouping_optimum(grouping, red, bomb):
    if len(grouping) == 1:
        grouping_options = model.similar_by_word(grouping[0], topn=10)
    else:
        try:
            grouping_options = model.most_similar(positive=grouping, restrict_vocab=50000)
        except:
            print(grouping)
    
    for option in grouping_options:
        if is_valid_option(grouping, option):
            return (grouping, option)
    return (grouping, ("null", 0))

def is_valid_option(grouping, option):
    chars = set('0123456789#$,')
    for clue in grouping:
        if clue.lower() in option[0].lower() or option[0].lower() in clue.lower() or any((c in option[0]) for c in chars):
            return False
    return True

def apply_weight(grouping_option):
    clue = grouping_option[1][0]
    score = grouping_option[1][1] * (len(grouping_option[0])**0.2)
    return (grouping_option[0], (clue, score))

def sort_clue_options(clue_options):
    return list(sorted(clue_options, key=lambda r: r[1][1], reverse=True))

def find_best_clues(clues, opposing_clues, bomb):
    groupings = []
    while len(clues) > 0:
        if len(clues) == 1:
            combination_clues = [(clues[0],)]
        else:
            combination_clues = powerset(clues)[len(clues):]
            
        clue_options = [find_grouping_optimum(grouping, red, bomb) for grouping in combination_clues]
        clue_options = list(map(apply_weight, clue_options))
        sorted_clue_options = sort_clue_options(clue_options)
    
        groupings.append(sorted_clue_options[0])
        for clue in sorted_clue_options[0][0]:
            clues.remove(clue)
    return groupings

In [5]:
blue = ['piano', 'ambulance', 'bugle', 'missile', 'bond', 'heart', 'crane', 'smuggler', 'bank']
red = ['shadow', 'vacuum', 'root', 'ham', 'head', 'march', 'ray', 'air']
bomb = ['Beijing']

find_best_clues(blue, red, bomb)

[(('piano', 'bugle', 'smuggler'), ('violin', 0.8513782321951723)),
 (('ambulance', 'missile', 'crane'), ('helicopter', 0.7761688168828)),
 (('bond', 'bank'), ('bail', 0.6054254601644118)),
 (('heart',), ('cardiac', 0.5147262215614319))]

In [12]:
a = model['Beijing'] - model['China']

In [13]:
b = model['Paris'] - model['France']

array([ 0.00756836,  0.00439453,  0.35836792, -0.05883789, -0.08227539,
       -0.00756836, -0.00878906,  0.02612305, -0.08984375,  0.03088379,
       -0.04296875, -0.10681152, -0.19433594,  0.16113281, -0.11035156,
        0.28999805, -0.0090332 ,  0.10070801, -0.07598877, -0.06298828,
        0.13671875,  0.1586914 , -0.00842285, -0.18676758, -0.10412598,
       -0.11132812,  0.01489258, -0.2557068 , -0.03894043,  0.24230957,
        0.09667969,  0.02050781,  0.1953125 ,  0.01318359,  0.29125977,
       -0.04589844, -0.06640625, -0.06640625,  0.04003906, -0.19396973,
        0.06317139, -0.01086426,  0.06668854,  0.02880859, -0.07055664,
        0.046875  , -0.10351562,  0.04907227, -0.0168457 , -0.05615234,
        0.02392578, -0.09692383, -0.07202148,  0.11621094,  0.04971313,
       -0.02050781, -0.34960938, -0.11523438, -0.21875   ,  0.2159729 ,
        0.01702881,  0.02246094,  0.05310059, -0.16210938,  0.01855469,
       -0.08087158,  0.27657318, -0.05664062,  0.04107666,  0.12