# Perfect Hangman Algorithm

In [None]:
import nltk
nltk.download('words')

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [None]:
import time
from nltk.corpus import words as wordlist
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as graph
from collections import Counter


words = list(set(w.lower() for w in wordlist.words()))
df_words = pd.DataFrame({'word': words})
df_words['len'] = df_words['word'].apply(lambda w: len(w))
df_words['set'] = df_words['word'].apply(lambda w: set(w))


def is_letter_in_word(word, letter):
    return letter in word

# Basic Algorithm

```
0. Start with the what words have the n letters in them

remaining_words = complete_dictionary
while len(potential != 1):
    most_common_letter = get_common_letter(remaining_words)
    if most_common_letter in remaining_words:
        remaining_words = {word for word in remaining_words if most_common_letter in word}
    else:
        do opposite
DONE
```

In [None]:
class BlindGuesser:
    def __init__(self, ans_word_len: int) -> None:
        self.memory = set()
        self.df_remain_ = df_words[df_words['len'] == ans_word_len]

    def guess(self, hidden_word: HiddenWord):
        if len(self.df_remain_) == 1:
            return self.df_remain_.iloc[0]['word']
        else:
            counts = Counter()
            self.df_remain_['set'].apply(lambda s: counts.update(s))
            for letter in self.memory:
                del counts[letter]
            most_common_letter = counts.most_common(1)[0][0]
            self.memory.add(most_common_letter)

            time.sleep(1)

            # Check if the letter is in the word
            hidden_word.check_letter(most_common_letter)
            response = most_common_letter in hidden_word.letters
            selector = self.df_remain_['set'].apply(lambda s: most_common_letter in s)
            self.df_remain_ = self.df_remain_[selector if response else ~selector]
            print(f'-> Number of remaining words: {len(self.df_remain_):,}')
            return None


class HiddenWord:
    def __init__(self, word: str) -> None:
        assert word in df_words['word'].values, f'`{word}` is not a valid word'
        self.word = word
        self.letters = set(word)
        self.past_inputs = set()

    def __repr__(self) -> str:
        return f'HiddenWord("{self.word}")'

    def __len__(self) -> int:
        return len(self.word)

    def check_letter(self, letter, should_print=True):
        self.past_inputs.add(letter)
        matches = self.letters.intersection(self.past_inputs)
        if should_print:
            print(' '.join([l if l in matches else '_' for l in self.word]))
        return {f'{i}:{l}' for i, l in enumerate(self.word) if l in matches}

In [None]:
target_word = HiddenWord('mitochondria')
print(target_word)

game = BlindGuesser(len(target_word))

ans = None
while ans is None:
    ans = game.guess(target_word)
    print()
print(' '.join(ans))
print(f'Your word is `{ans}`')

HiddenWord("mitochondria")
_ i _ _ _ _ _ _ _ _ i _
-> Number of remaining words: 15,474

_ i _ _ _ _ _ _ _ _ i _
-> Number of remaining words: 4,345

_ i _ o _ _ o _ _ _ i _
-> Number of remaining words: 3,472

_ i _ o _ _ o _ _ _ i a
-> Number of remaining words: 2,664

_ i t o _ _ o _ _ _ i a
-> Number of remaining words: 1,849

_ i t o _ _ o n _ _ i a
-> Number of remaining words: 1,153

_ i t o c _ o n _ _ i a
-> Number of remaining words: 592

_ i t o c _ o n _ r i a
-> Number of remaining words: 280

_ i t o c _ o n _ r i a
-> Number of remaining words: 179

_ i t o c _ o n _ r i a
-> Number of remaining words: 118

_ i t o c h o n _ r i a
-> Number of remaining words: 49

_ i t o c h o n _ r i a
-> Number of remaining words: 29

m i t o c h o n _ r i a
-> Number of remaining words: 14

m i t o c h o n _ r i a
-> Number of remaining words: 11

m i t o c h o n d r i a
-> Number of remaining words: 3

m i t o c h o n d r i a
-> Number of remaining words: 2



IndexError: ignored

In [None]:
game.df_remain_

Unnamed: 0,word,len,set
104235,mitochondria,12,"{a, c, d, o, i, h, t, r, n, m}"
125148,machairodont,12,"{a, c, d, o, i, h, t, r, n, m}"


# Location Aware Strategy

In [None]:
class LocationAwareGuesser:
    def __init__(self, ans_word_len: int) -> None:
        self.memory = None
        self.past_guesses = set()
        self.df_remain_ = df_words[df_words['len'] == ans_word_len]
        self.df_remain_['encoded'] = self.df_remain_['word'].apply(lambda word: {f'{i}:{l}' for i, l in enumerate(word)})

    def guess(self, hidden_word: HiddenWord):
        if len(self.df_remain_) == 1:
            return self.df_remain_.iloc[0]['word']
        else:
            # Guess the most common letter not currently guessed
            counts = Counter()
            self.df_remain_['set'].apply(lambda s: counts.update(s))
            for letter in self.past_guesses:
                del counts[letter]
            most_common_letter = counts.most_common(1)[0][0]
            self.past_guesses.add(most_common_letter)

            time.sleep(1)

            # Check if the letter is in the word
            response = hidden_word.check_letter(most_common_letter)
            selector = self.df_remain_['encoded'].apply(lambda s: response.issubset(s))
            self.df_remain_ = self.df_remain_[selector]
            print(f'-> Number of remaining words: {len(self.df_remain_):,}')
            return None


In [None]:
target_word = HiddenWord('jazz')  # ('mitochondria')
print(target_word)

perfect_algo = LocationAwareGuesser(len(target_word))
ans = None
while ans is None:
    ans = perfect_algo.guess(target_word)
    print()
print(' '.join(ans))
print(f'Your word is `{ans}`')

HiddenWord("jazz")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df_remain_['encoded'] = self.df_remain_['word'].apply(lambda word: {f'{i}:{l}' for i, l in enumerate(word)})


_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

_ a _ _
-> Number of remaining words: 961

j a _ _
-> Number of remaining words: 29

j a z z
-> N