In [1]:
import numpy as np
import pandas as pd
from scipy import spatial
from random import randint

In [None]:
# Import English words from https://github.com/jbowens/codenames
card_words = pd.read_json('./processing/data/words_v1.json')
# 400 words in this dataset
card_words.count()

In [None]:
# Generate pseudo random state
state = randint(1, 1000)

# Get sample from dataframe
friend_series = card_words.sample(n=8, random_state=state)
# Drop so there are no duplicates
card_words = card_words.drop(friend_series.index)
# Convert to set
friend = [fr.lower() for fr in friend_series[0].tolist()]

foe_series = card_words.sample(n=9, random_state=state)
card_words = card_words.drop(foe_series.index)
foe = [f.lower() for f in foe_series[0].tolist()]

neutral_series = card_words.sample(n=6, random_state=state)
card_words = card_words.drop(neutral_series.index)
neutral = [n.lower() for n in neutral_series[0].tolist()]

assassin_series = card_words.sample(n=1, random_state=state)
assassin = [a.lower() for a in assassin_series[0].tolist()]

In [141]:
print(friend)
print(foe)
print(neutral)
print(assassin)
# Empty intersection set = no duplicates
print(set(friend).intersection(set(foe), set(neutral), set(assassin)))

['alps', 'kiwi', 'model', 'tablet', 'bank', 'tick', 'lemon', 'degree']
['nail', 'amazon', 'court', 'hook', 'robot', 'watch', 'hand', 'missile', 'temple']
['penguin', 'code', 'club', 'giant', 'mint', 'soul']
['note']
set()


In [146]:
# friend = ['apple', 'banana', 'moon']
# foe = ['phone', 'text', 'call']
# assassin = ['president']

In [136]:
# Read in processed data as dataframe
vectors = pd.read_pickle('./processing/data/all_vectors.pkl')

In [165]:
card_words[0]

0         AFRICA
1          AGENT
2            AIR
3          ALIEN
6      AMBULANCE
         ...    
395         WHIP
396         WIND
397        WITCH
398         WORM
399         YARD
Name: 0, Length: 346, dtype: object

In [171]:
# game_words = [w.lower() for w in card_words[0].tolist() if w.lower() in all_words]
all_words = list(vectors.index.get_level_values(level=0).drop_duplicates())

In [224]:
friends = ['apple', 'screen', 'bear', 'superhero', 'cook', 'cricket', 'file', 'calf']
foes = ['fence', 'cable', 'pitch', 'nut', 'mountain', 'straw', 'millionaire', 'boot', 'washer']
neutrals = ['mug', 'fence', 'skyscraper', 'cliff', 'brush']
assassin = ['rabbit']

friends = ['apple', 'screen', 'bear', ]
foes = ['fence', 'cable', 'pitch', ]
neutrals = ['mug', 'fence']
assassin = ['rabbit']
bad = foes + neutral + assassin

In [229]:
constant = 4
def distance(source, target, embedding_name='glove'):
    return spatial.distance.cosine(vectors.loc[source, embedding_name].to_numpy(), vectors.loc[target, embedding_name].to_numpy())

def closest_words(target, embedding_name):
    return sorted(vectors.index.get_level_values(0), key=lambda source: distance(source, target, embedding_name))

def goodness(word, friends, bad):
    if word in friends + bad:
        return -999
    return sum([distance(word, b) for b in bad]) - constant * sum([distance(word, f) for f in friends])

def minimax(word, friends, bad):
    if word in friends + bad:
        return -999
    return min([distance(word, b) for b in bad]) - max([distance(word, f) for f in friends])

def candidates(friends, bad, size=100):
    best = sorted(all_words, key=lambda word: -1 * goodness(word, friends, bad))
    res = [minimax(w, friends, bad) for i, w in enumerate(sorted(best[:250], key=lambda word: -1 * minimax(word, friends, bad))[:size])]
    return res

In [230]:
print(candidates(friends, bad))

[0.08798060766080895, 0.07244539010492557, 0.07128717310646349, 0.0641496564904771, 0.05807675681051261, 0.057284568660745494, 0.04954947077142158, 0.04648014051389315, 0.03871736844897278, 0.037356249130429675, 0.03568568980197262, 0.03289462735035997, 0.03259771160511715, 0.02561001472506197, 0.025206115957602693, 0.019483697502809982, 0.01940151406011792, 0.01694502680470167, 0.015515167394504537, 0.01486229428589525, 0.014779386037062747, 0.014323239417787348, 0.01377921884543365, 0.013583296321639904, 0.012661700036340817, 0.00971613284539019, 0.007610876141773915, 0.006785498954120395, 0.006679273580125322, 0.005420367011520932, 0.005407154240274492, 0.0027208125782041126, 0.0016848568991487678, 0.0006772895976365767, -0.0006313179342891662, -0.0009953547066351431, -0.0011714270055168674, -0.0014158518557150224, -0.0022614286959988794, -0.002606448897446656, -0.004978826253277058, -0.00525220681637828, -0.005319993048385818, -0.006963210184908086, -0.007148446689157817, -0.007238

In [253]:
friends = ['apple', 'screen', 'bear']
foes = ['fence', 'cable', 'pitch']
neutrals = ['mug', 'fence']
assassin = 'rabbit'

def get_candidates_df():
    columns = ['word', 'rank', 'assassin_dist', 'friends_dist', 'foes_dist', 'neutrals_dist']
    candidates = pd.DataFrame({'word': all_words, 'rank': [i for i in range(1, len(all_words) + 1)]}, columns=columns)
    candidates.assassin_dist = candidates.word.apply(lambda word: distance(word, 'rabbit'))
    candidates.friends_dist = candidates.word.apply(lambda word: sum([distance(word, friend) for friend in friends]))
    candidates.foes_dist = candidates.word.apply(lambda word: sum([distance(word, foe) for foe in foes]))
    candidates.neutrals_dist = candidates.word.apply(lambda word: sum([distance(word, n) for n in neutrals]))
    return candidates

In [254]:
candidates = get_candidates_df()

In [258]:
candidates.sort_values(['assassin_dist', 'friends_dist', 'foes_dist', 'rank', 'neutrals_dist'], ascending=[False, True, False, True, False])

Unnamed: 0,word,rank,assassin_dist,friends_dist,foes_dist,neutrals_dist
25846,Randell,25847,1.231181,3.644422,3.384413,2.248203
20287,Mohegan,20288,1.225998,3.250139,3.185816,2.149471
28382,Fultz,28383,1.205677,3.521329,3.059045,2.132518
27656,Ismaili,27657,1.201379,3.396440,3.376065,2.216903
27056,Hibernia,27057,1.200992,3.530811,3.081055,2.121074
...,...,...,...,...,...,...
12794,hamster,12795,0.334515,2.260171,2.644559,1.593182
9616,squirrel,9617,0.326396,2.142533,2.336007,1.331845
5196,pig,5197,0.280290,2.109287,2.438909,1.364644
7393,bunny,7394,0.261032,1.973688,2.527244,1.298891
