In [None]:
import numpy as np
import random
import gensim
from nltk.corpus import words
from functools import reduce
import heapq


In [None]:
wrds = np.genfromtxt('wordlist.csv', delimiter=',', dtype=str).tolist()

# randomly select 25 cards, then randomly select a side for each
board = [x[random.random() > 0.5].lower() for x in random.sample(wrds, 25)]

# set up game by assigning each card (agent) a role
player1 = board[:9]
player2 = board[9:17]
neutral = board[17:24]
assassin = board[24]


In [None]:

# load model pre-trained on Google News corpus (downloaded from https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit)
# model_gnews = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)
model_wiki = gensim.models.KeyedVectors.load_word2vec_format("wikipedia-vectors.bin", binary=True)
# choose candidate clues from nltk's words corpus, which itself is drawn from the UNIX words file
dct = words.words('en')


In [None]:
# returns potential clues which are closer to all members of targets than all members of avoids, sorted in order of relevance to targets
def clue (model, targets, avoids, similarity_f, score_f, n=1):
    topclues = []
    for word in dct:
        if word not in model.vocab:
            continue
        if avoids:
            target_distances = [similarity_f(model, word, target) for target in targets]
            avoid_distances = [similarity_f(model, word, avoid) for avoid in avoids]
            if max(avoid_distances) > min(target_distances):
                continue
        score = score_f([similarity_f(model, word, target) for target in targets])
        if reduce((lambda x, y: y not in word and word not in y and x), targets, True):
            if len(topclues) < n:
                heapq.heappush(topclues, (score, word))
            elif score > topclues[0][0]:
                heapq.heapreplace(topclues, (score, word))
    return sorted(topclues, key=lambda x: x[0], reverse=True)

In [None]:
def fast_clue (model, targets, avoids, similarity_f):
    best_clue = (-1, None)
    for word in dct:
        if word not in model.vocab:
            continue
        closest_avoid = -1
        for avoid in avoids:
            closest_avoid = max(closest_avoid, similarity_f(model, word, avoid))
        closest_targets = []
        for target in targets:
            if target not in word and word not in target:
                score = similarity_f(model, word, target)
                if len(closest_targets) < 2:
                    heapq.heappush(closest_targets, (score, target))
                elif score > closest_targets[0][0]:
                    heapq.heapreplace(closest_targets, (score, target))
        if len(closest_targets) >= 2:
            if closest_targets[0][0] > best_clue[0]:
                best_clue = (closest_targets[0][0], word, [x[1] for x in closest_targets])
    return best_clue

In [None]:
def model_sim(model, word, target):
    return model.similarity(word, target)


In [None]:
print ('Clue for board player 1:', player1, '\navoids:', player2 + neutral + [assassin])
print (clue(model_wiki, player1, player2 + neutral + [assassin], model_sim))

In [None]:
candidates = clue(model_gnews, board[:2], [], model_sim, sum, n=5)
candidates_1 = clue(model_gnews, board[:2], [], model_sim, min, n=5)
print ("SUM")
print ('Top clues for pair', board[:2], ':', candidates)
print ("MIN")
print ('Top clues for pair', board[:2], ':', candidates_1)


In [None]:
candidates = clue(model_wiki, board[:2], [], model_sim, sum, n=5)
candidates_1 = clue(model_wiki, board[:2], [], model_sim, min, n=5)
print ("SUM")
print ('Top clues for pair', board[:2], ':', candidates)
print ("MIN")
print ('Top clues for pair', board[:2], ':', candidates_1)


In [None]:
clues = []

for i in range(len(player2)):
    print (clues)
    for j in range(i+1,len(player2)):
        targets = []
        targets.append(player2[i])
        targets.append(player2[j])
        candidates = clue(model_gnews, targets, player1, model_sim, min, n=5)
        for candidate in candidates:
            if len(clues) < 15:
                heapq.heappush(clues, (candidate[0], candidate[1], targets[0], targets[1]))
            elif candidate[0] > clues[0][0]:
                heapq.heapreplace(clues, (candidate[0], candidate[1], targets[0], targets[1]))

In [None]:
clues_sorted = sorted(clues, key=lambda x: x[0], reverse=False)
for c in clues_sorted:
    print (c)
print (player2)
print (player1)

In [None]:
clues_wiki = []

for i in range(len(player2)):
    print (clues_wiki)
    for j in range(i+1,len(player2)):
        targets = []
        targets.append(player2[i])
        targets.append(player2[j])
        candidates = clue(model_wiki, targets, player1, model_sim, min, n=5)
        for candidate in candidates:
            if len(clues_wiki) < 15:
                heapq.heappush(clues_wiki, (candidate[0], candidate[1], targets[0], targets[1]))
            elif candidate[0] > clues_wiki[0][0]:
                heapq.heapreplace(clues_wiki, (candidate[0], candidate[1], targets[0], targets[1]))

In [None]:
clues_wiki_sorted = sorted(clues_wiki, key=lambda x: x[0], reverse=False)
for c in clues_wiki_sorted:
    print (c)
print (player2)
print (player1)