# Trung Vo NLP Project (The Hangman Game)

In [10]:
import time
import os
import collections
import string

# Model1 based on n-grams concept from NLP

In [22]:
class HangmanAPI(object):
    def __init__(self):        
        self.letter_avail = set(string.ascii_lowercase)
        full_dictionary_location = "dictionary.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)
        self.current_dictionary = self.full_dictionary
        self.ngrams_model(self.current_dictionary)
        
    def ngrams_model(self, current_dictionary):
        self.ngrams = collections.defaultdict(list)
        self.freqs = collections.defaultdict(dict)
        
        for word in current_dictionary:
            for gram in range(1, 7): # gram == number of letter, bigram has gram == 2, we will use 1->6-gram models
                # augment each word with ^ at the beginning and * at the end of the word, number of ^ and * depends on the model
                new_word = "".join(["^"] * (gram-1) + list(word) + ["*"] * (gram-1)) 
                for i in range(len(new_word)):
                    if i+gram > len(new_word):
                        break
                    self.ngrams[gram].append(new_word[i:i+gram])

        for gram in range(1, 7):
            self.freqs[gram] = collections.Counter(self.ngrams[gram])
            
    def guess(self, word):
        word = word[::2]
        guesses = collections.defaultdict(list) # store the "decision" for each letter, we will pick the highest one
        highest_chance = 0
        
        for ch in self.letter_avail:
            # Discounting with Backoff method
            # Run from the highest to the lowest n-grams model until we see the repeated pattern from current dictionary
            for gram in range(6, 1, -1): 
                # augment the word with ^ at the beginning and * at the end of the word to use the corresponding n-grams model
                new_word = "".join(["^"] * (gram-1) + list(word) + ["*"] * (gram-1))
                n = len(new_word)
                skip = False # skip the rest of the loop if we found the pattern in high n-grams model
                for i in range(n):
                    if new_word[i] == '_':
                        # We will consider both cases of the pattern end with [ch] and start with [ch]
                        if i - gram >= 0:
                            tmp = list(new_word[i-gram+1:i])
                            tmp.append(ch)
                            key = "".join(tmp)
                            # replace abcxyz_ with abcxyz[ch] to see if it's in the corresponding n-gram model
                            if key in self.freqs[gram]:
                                guesses[ch].append(self.freqs[gram][key]/self.freqs[gram-1][key[:-1]])
                                # chance of abc[ch] happens = Count(abc[ch])/Count(abc)
                                skip = True

                        if i + gram < n:
                            tmp = [ch]
                            tmp.extend(list(new_word[i+1:i+gram]))
                            key = "".join(tmp)
                            # replace _abcxyz with [ch]abcxyz to see if it's in the corresponding n-gram model
                            if key in self.freqs[gram]:
                                guesses[ch].append(self.freqs[gram][key]/self.freqs[gram-1][key[1:]])
                                skip = True          
                if skip: # found the pattern
                    break
            # Track the ch with the highest chance
            try: 
                ch_chance = sum(guesses[ch])/len(guesses[ch])
                if ch_chance > highest_chance:
                    highest_chance = ch_chance
                    guess_letter = ch
            except:
                pass
        
        # In case of not finding any parttern, usually happens on the first guess
        if highest_chance == 0:
            
            uni_count = collections.Counter()
            for curr in self.current_dictionary:
                # Restrict dictionary for unigram model by len(word)
                if len(curr) != len(word):
                    continue
                # unigram counter is based on number of UNIQUE appearance rather than total appearance in each word
                for ch in set(curr):
                    uni_count[ch] += 1

            most_common_uni = uni_count.most_common()
            i = 0
            while most_common_uni[i][0] not in self.letter_avail: # make sure uni_guess is in letter_avail set
                i += 1
            uni_guess = most_common_uni[i][0]
            return uni_guess
        
        return guess_letter
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

# Play here

In [31]:
# Change word and secret word to play, 
word = '_ _ _ _ _ _ _ _ _ _ _ _ _ _' #m i c r o c e p h a l o u s
secret_word = "m i c r o c e p h a l o u s"

attempts_remains = 6
game = HangmanAPI()

while attempts_remains > 0:
    print("# of tries remaining: {0}. Word: {1}.".format(attempts_remains, word))
    guess_letter = game.guess(word)
    print('Guessing letter: {0}'.format(guess_letter))
    game.letter_avail.remove(guess_letter)
    if guess_letter in secret_word:
        for i, c in enumerate(secret_word):
            if c == guess_letter:
                word = list(word)
                word[i] = c
                word = ''.join(word)
        print('Good guess!, updated Word:', ''.join(word))
    else:
        print('Wrong guess!, updated Word:', ''.join(word))
        attempts_remains -= 1
        new_dict = [] # if guess_letter is wrong, we will retrain the model with new_dict
        for voca in game.current_dictionary:
            if set(voca) & set(guess_letter): # new_dict words don't contain wrong guess_letter
                continue
            new_dict.append(voca)
        game.current_dictionary = new_dict
        game.ngrams_model(game.current_dictionary)

    if '_' not in word:
        print('Congratulations, you won with {0} attempts remains.'.format(attempts_remains))
        break

print('0 attempts left, you lost!')

# of tries remaining: 6. Word: _ _ _ _ _ _ _ _ _ _ _ _ _ _.
Guessing letter: i
Good guess!, updated Word: _ i _ _ _ _ _ _ _ _ _ _ _ _
# of tries remaining: 6. Word: _ i _ _ _ _ _ _ _ _ _ _ _ _.
Guessing letter: n
Wrong guess!, updated Word: _ i _ _ _ _ _ _ _ _ _ _ _ _
# of tries remaining: 5. Word: _ i _ _ _ _ _ _ _ _ _ _ _ _.
Guessing letter: t
Wrong guess!, updated Word: _ i _ _ _ _ _ _ _ _ _ _ _ _
# of tries remaining: 4. Word: _ i _ _ _ _ _ _ _ _ _ _ _ _.
Guessing letter: l
Good guess!, updated Word: _ i _ _ _ _ _ _ _ _ l _ _ _
# of tries remaining: 4. Word: _ i _ _ _ _ _ _ _ _ l _ _ _.
Guessing letter: e
Good guess!, updated Word: _ i _ _ _ _ e _ _ _ l _ _ _
# of tries remaining: 4. Word: _ i _ _ _ _ e _ _ _ l _ _ _.
Guessing letter: r
Good guess!, updated Word: _ i _ r _ _ e _ _ _ l _ _ _
# of tries remaining: 4. Word: _ i _ r _ _ e _ _ _ l _ _ _.
Guessing letter: a
Good guess!, updated Word: _ i _ r _ _ e _ _ a l _ _ _
# of tries remaining: 4. Word: _ i _ r _ _ e _ _ a l _ _ _.


# Model2

Working in progress, planning to train a Deeplearning model using Transformer.