# Proposal algorithm

Let's make a proposal algorithm for the Spymaster.

In [5]:
import numpy as np
import spacy
import nltk
from string import ascii_lowercase
from itertools import combinations

The `nltk` package has an english dictionary which can be downloaded via the package.

In [6]:
# download english words
nltk.download('words')

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\mattg\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [162]:
class SpyMaster:
    
    nlp = spacy.load('en_core_web_lg')
    english_words = set([word.upper() for word in nltk.corpus.words.words()])
    vocab = set([word.upper() for word in nlp.vocab.strings]).intersection(english_words)
    
    @classmethod
    def set_vocab(cls, possible_words: set) -> None:
        cls.vocab = list(set([word.upper() for word in cls.nlp.vocab.strings if word in possible_words]))
    
    def __init__(self, board_dict: dict, my_team: str, alpha: float = 0.01) -> None: 
        
#     Arguments
#     ------------
#     board_dict : dictionary of the form {'blue': ..., 'orange': ..., 'black': ..., 'orange': ...} with lists of words under each team
#     my_team : string, which team the bot is on (should be a key in board_dict)
#     beta : scoring model parameter beta
#     alpha : scoring model parameter alpha
        
        if my_team not in board_dict:
            raise ValueError('Argument "my_team" should be a key in the dictionary argument "board_dict"')
        
        # convert all letters to uppercase when setting instance board words variable
        self.board_dict = {team: [word.upper() for word in words] for team, words in board_dict.items()}
        
        # concatenate all board words into a single list
        self.board_words = []
        i = 0
        for team, words in self.board_dict.items():
            # get the indices in the "all_board_words" list that correspond to words in the bot's team
            if team == my_team:
                self.team_word_indices = np.arange(start = i, stop = len(words))
            self.board_words.extend(words)
            i += len(words)
            
        # get list of all possible proposal words
        self.proposal_words = list(self.vocab.difference(self.board_words))
            
        # initialise spacy NLP instances
        board_word_nlp = SpyMaster.nlp.pipe(self.board_words)
        proposal_word_nlp = SpyMaster.nlp.pipe(self.proposal_words)
        
        # get embeddings for words on the borm
        self.board_embeddings = np.array([word.vector for word in board_word_nlp])
        # calculate L2 norms for board word embeddings
        board_embedding_norms = np.linalg.norm(self.board_embeddings, axis = 1, ord = 2)
        # norm everything to 1
        self.board_embeddings = self.board_embeddings / board_embedding_norms[:, None]
        
        # get proposal word embeddings, calculate norms
        self.proposal_embeddings = np.array([word.vector for word in proposal_word_nlp])
        proposal_embedding_norms = np.linalg.norm(self.proposal_embeddings, axis = 1, ord = 2)
        # remove words with zero-norm embeddings
        nonzero_norm_mask = proposal_embedding_norms != 0
        self.proposal_embeddings = self.proposal_embeddings[nonzero_norm_mask]
        self.proposal_embeddings = self.proposal_embeddings / proposal_embedding_norms[nonzero_norm_mask, None]
        self.proposal_words = np.array(self.proposal_words)[nonzero_norm_mask].tolist()
        
        # get cosine similarity between words on the board and all possible proposals
        self.proposal_board_similarities = self.proposal_embeddings @ self.board_embeddings.T
        
        self.alpha = alpha
        
        self.my_team_score = 0
        self.other_team_score = 0
        
    def make_proposal(self) -> str:
        
        best_combination_score = 0
        # loop through possible numbers of words to propose
        for num_words in range(1, len(self.team_word_indices) + 1):
            # loop through unique combinations of those words
            for comb in combinations(self.team_word_indices, num_words):
                # get distances between all words in the vocabulary and words in the combination
                combination_similarities = self.proposal_board_similarities[:, comb]
                # get the mean similarity between each word in the vocab and all words in the combination
                combination_mean_similarities = combination_similarities.mean(axis = 1) 
                # get score ratio component of the final proposal score
                team_score_ratio = self.alpha * (self.other_team_score + 1) / (self.my_team_score + 1)
                # get scores for each proposal
                proposal_scores = combination_mean_similarities + (team_score_ratio * num_words)
                highest_score_idx = np.argmax(proposal_scores)
                highest_score = proposal_scores[highest_score_idx]
                # if the best score is higher than the best score seen so far, save the highest scoring word
                if highest_score > best_combination_score:
                    best_combination_score = highest_score
                    target_words = [word for i, word in enumerate(self.board_words) if i in comb]
                    highest_score_word = self.proposal_words[highest_score_idx]
                    
        return target_words, highest_score_word

In [163]:
board = {'blue': ['STEEL', 'CHURCHILL', 'OPERA', 'BULB', 'NOTRE DAME', 'PASTE', 'HEAD', 'FIELD'],
         'orange': ['COVER', 'SATURN', 'COLOSSEUM', 'PEARL', 'CLEOPATRA', 'NYLON', 'MARIE CURIE', 'GEAR'],
         'white': ['UNICORN', 'HELMET', 'GATES', 'SPRAY', 'SPINE', 'SILK', 'ROAD', 'COUNTRY'],
         'black': ['STRING']}

In [164]:
spymaster = SpyMaster(board, my_team = 'blue')

In [165]:
spymaster.make_proposal()

(['STEEL'], 'STAINLESS')