In [1]:
import json
import requests
import random
import string
import secrets
import time
import re
import collections
import numpy as np
try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

In [2]:
HANGMAN_URL = "https://www.trexsim.com/trexsim/hangman"

In [3]:
class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []
        
        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)  # done      
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()
        
        self.current_dictionary = [] # done
        
        # newly added variables
        self.incorrect_guesses = [] # done
        self.letter_set = [i[0] for i in self.full_dictionary_common_letter_sorted] # done 
        self.probabilities = [0] * len(self.letter_set)  # done
        self.unigram, self.bigram, self.trigram, self.fourgram, self.fivegram, self.sixgram, self.sevengram, self.eightgram, self.ninegram  = self.build_n_grams(self.full_dictionary) # done
        self.practice_wins = 0
        
        
    def guess(self, word):
        '''
        output guess for the given configuration of guessed and unguessed letters
        '''
        # keep track of incorrect guesses to update the n-grams
        self.incorrect_guesses = list(set(self.guessed_letters) - set(word))
        
        # recalibrate if <=3 lives left and last guess is wrong
        if len(self.guessed_letters) > 0 and self.guessed_letters[-1] in self.incorrect_guesses and self.tries_remaining <= 3:
            self.recalibrate_n_grams()
        
        # set initial probabilities to zero
        self.probabilities = [0] * len(self.letter_set)

        # replace "_" with "." as "." indicates any character in regular expressions
        clean_word = word[::2]
        
        # run through 9-gram function
        return self.ninegram_probs(clean_word)
    
    
    def build_n_grams(self, dictionary):
        '''
        create nested dictionary for each n gram using the list of words available
        bigram also depends on the length of word
        '''
        unigram = collections.defaultdict(lambda: collections.defaultdict(int))
        bi_gram = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int)))
        tri_gram = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int)))
        four_gram = collections.defaultdict(lambda:collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int))))
        five_gram = collections.defaultdict(lambda: collections.defaultdict(lambda:collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int)))))
        six_gram = collections.defaultdict(lambda: collections.defaultdict(lambda:collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int))))))
        seven_gram = collections.defaultdict(lambda: collections.defaultdict(lambda:collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int)))))))
        eight_gram = collections.defaultdict(lambda: collections.defaultdict(lambda:collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int))))))))
        nine_gram = collections.defaultdict(lambda: collections.defaultdict(lambda:collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int)))))))))
        
        # go through each word in the dictionary
        for word in dictionary:
            for i in range(len(word) - 8):
                bi_gram[len(word)][word[i]][word[i+1]] += 1
                tri_gram[word[i]][word[i+1]][word[i+2]] += 1
                four_gram[word[i]][word[i+1]][word[i+2]][word[i+3]] += 1
                five_gram[word[i]][word[i+1]][word[i+2]][word[i+3]][word[i+4]] += 1
                six_gram[word[i]][word[i+1]][word[i+2]][word[i+3]][word[i+4]][word[i+5]] += 1
                seven_gram[word[i]][word[i+1]][word[i+2]][word[i+3]][word[i+4]][word[i+5]][word[i+6]] += 1
                eight_gram[word[i]][word[i+1]][word[i+2]][word[i+3]][word[i+4]][word[i+5]][word[i+6]][word[i+7]] += 1
                nine_gram[word[i]][word[i+1]][word[i+2]][word[i+3]][word[i+4]][word[i+5]][word[i+6]][word[i+7]][word[i+8]] += 1
                
            i = len(word) - 8
            
            # fill out the rest of the n-grams for words too short
            if len(word) == 2:
                bi_gram[len(word)][word[0]][word[1]] += 1
                
            elif len(word) == 3:
                bi_gram[len(word)][word[0]][word[1]] += 1
                bi_gram[len(word)][word[1]][word[2]] += 1
                tri_gram[word[0]][word[1]][word[2]] += 1
                
            elif len(word) == 4:
                bi_gram[len(word)][word[0]][word[1]] += 1
                bi_gram[len(word)][word[1]][word[2]] += 1
                bi_gram[len(word)][word[2]][word[3]] += 1
                tri_gram[word[0]][word[1]][word[2]] += 1
                tri_gram[word[1]][word[2]][word[3]] += 1
                four_gram[word[0]][word[1]][word[2]][word[3]] += 1
            
            elif len(word) == 5:
                bi_gram[len(word)][word[0]][word[1]] += 1
                bi_gram[len(word)][word[1]][word[2]] += 1
                bi_gram[len(word)][word[2]][word[3]] += 1
                bi_gram[len(word)][word[3]][word[4]] += 1
                tri_gram[word[0]][word[1]][word[2]] += 1
                tri_gram[word[1]][word[2]][word[3]] += 1
                tri_gram[word[2]][word[3]][word[4]] += 1
                four_gram[word[0]][word[1]][word[2]][word[3]] += 1
                four_gram[word[1]][word[2]][word[3]][word[4]] += 1
                five_gram[word[0]][word[1]][word[2]][word[3]][word[4]] += 1
                
            elif len(word) == 6:
                bi_gram[len(word)][word[0]][word[1]] += 1
                bi_gram[len(word)][word[1]][word[2]] += 1
                bi_gram[len(word)][word[2]][word[3]] += 1
                bi_gram[len(word)][word[3]][word[4]] += 1
                bi_gram[len(word)][word[4]][word[5]] += 1
                tri_gram[word[0]][word[1]][word[2]] += 1
                tri_gram[word[1]][word[2]][word[3]] += 1
                tri_gram[word[2]][word[3]][word[4]] += 1
                tri_gram[word[3]][word[4]][word[5]] += 1
                four_gram[word[0]][word[1]][word[2]][word[3]] += 1
                four_gram[word[1]][word[2]][word[3]][word[4]] += 1
                four_gram[word[2]][word[3]][word[4]][word[5]] += 1
                five_gram[word[0]][word[1]][word[2]][word[3]][word[4]] += 1
                five_gram[word[1]][word[2]][word[3]][word[4]][word[5]] += 1
                six_gram[word[0]][word[1]][word[2]][word[3]][word[4]][word[5]] += 1
            
            elif len(word) == 7:
                bi_gram[len(word)][word[0]][word[1]] += 1
                bi_gram[len(word)][word[1]][word[2]] += 1
                bi_gram[len(word)][word[2]][word[3]] += 1
                bi_gram[len(word)][word[3]][word[4]] += 1
                bi_gram[len(word)][word[4]][word[5]] += 1
                bi_gram[len(word)][word[5]][word[6]] += 1
                tri_gram[word[0]][word[1]][word[2]] += 1
                tri_gram[word[1]][word[2]][word[3]] += 1
                tri_gram[word[2]][word[3]][word[4]] += 1
                tri_gram[word[3]][word[4]][word[5]] += 1
                tri_gram[word[4]][word[5]][word[6]] += 1
                four_gram[word[0]][word[1]][word[2]][word[3]] += 1
                four_gram[word[1]][word[2]][word[3]][word[4]] += 1
                four_gram[word[2]][word[3]][word[4]][word[5]] += 1
                four_gram[word[3]][word[4]][word[5]][word[6]] += 1
                five_gram[word[0]][word[1]][word[2]][word[3]][word[4]] += 1
                five_gram[word[1]][word[2]][word[3]][word[4]][word[5]] += 1
                five_gram[word[2]][word[3]][word[4]][word[5]][word[6]] += 1
                six_gram[word[0]][word[1]][word[2]][word[3]][word[4]][word[5]] += 1
                six_gram[word[1]][word[2]][word[3]][word[4]][word[5]][word[6]] += 1
                seven_gram[word[0]][word[1]][word[2]][word[3]][word[4]][word[5]][word[6]] += 1
                
            # fill out rest of the (1-4)-grams
            elif len(word) >= 8:
                bi_gram[len(word)][word[0]][word[1]] += 1
                bi_gram[len(word)][word[1]][word[2]] += 1
                bi_gram[len(word)][word[2]][word[3]] += 1
                bi_gram[len(word)][word[3]][word[4]] += 1
                bi_gram[len(word)][word[4]][word[5]] += 1
                bi_gram[len(word)][word[5]][word[6]] += 1
                bi_gram[len(word)][word[6]][word[7]] += 1
                tri_gram[word[0]][word[1]][word[2]] += 1
                tri_gram[word[1]][word[2]][word[3]] += 1
                tri_gram[word[2]][word[3]][word[4]] += 1
                tri_gram[word[3]][word[4]][word[5]] += 1
                tri_gram[word[4]][word[5]][word[6]] += 1
                tri_gram[word[5]][word[6]][word[7]] += 1
                four_gram[word[0]][word[1]][word[2]][word[3]] += 1
                four_gram[word[1]][word[2]][word[3]][word[4]] += 1
                four_gram[word[2]][word[3]][word[4]][word[5]] += 1
                four_gram[word[3]][word[4]][word[5]][word[6]] += 1
                four_gram[word[4]][word[5]][word[6]][word[7]] += 1
                five_gram[word[0]][word[1]][word[2]][word[3]][word[4]] += 1
                five_gram[word[1]][word[2]][word[3]][word[4]][word[5]] += 1
                five_gram[word[2]][word[3]][word[4]][word[5]][word[6]] += 1
                five_gram[word[3]][word[4]][word[5]][word[6]][word[7]] += 1
                six_gram[word[0]][word[1]][word[2]][word[3]][word[4]][word[5]] += 1
                six_gram[word[1]][word[2]][word[3]][word[4]][word[5]][word[6]] += 1
                six_gram[word[2]][word[3]][word[4]][word[5]][word[6]][word[7]] += 1
                seven_gram[word[0]][word[1]][word[2]][word[3]][word[4]][word[5]][word[6]] += 1
                seven_gram[word[1]][word[2]][word[3]][word[4]][word[5]][word[6]][word[7]] += 1
                eight_gram[word[1]][word[1]][word[2]][word[3]][word[4]][word[5]][word[6]][word[7]] += 1
            # fill out unigrams
            for letter in set(word):
                unigram[len(word)][letter] += 1
                    
        return unigram, bi_gram, tri_gram, four_gram, five_gram, six_gram, seven_gram, eight_gram, nine_gram
                    
        
    def recalibrate_n_grams(self):
        '''
        recreate the n gram dictionaries by removing the words which are not relevant to the given word
        '''
        # updates the dictionary to remove words containing incorrectly guessed letters
        new_dict = [word for word in self.full_dictionary if not set(word).intersection(set(self.incorrect_guesses))]
        self.unigram, self.bigram, self.trigram, self.fourgram, self.fivegram, self.sixgram, self.sevengram, self.eightgram, self.ninegram  = self.build_n_grams(new_dict)

        
        
        
    def ninegram_probs(self, word):
        ''' 
        perform 9 gram: update the probabilities of letters using ninegram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 8):
                        
            # case 1: "1 2 3 4 5 6 7 8 b"
            if word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+5]
                anchor_letter_7 = word[i+6]
                anchor_letter_8 = word[i+7]
                
                # "1 2 3 4 5 6 7 8 b"
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][anchor_letter_8][letter] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 2: "1 2 3 4 5 6 7 b 8"
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] == '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+5]
                anchor_letter_7 = word[i+6]
                anchor_letter_8 = word[i+8]
                
                # "1 2 3 4 5 6 7 b 8"
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][letter][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 3: 1 2 3 4 5 6 b 7 8
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] == '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+5]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                # 1 2 3 4 5 6 b 7 8
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][letter][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 4: 1 2 3 4 5 b 6 7 8
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] == '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][letter][anchor_letter_6][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 5: 1 2 3 4 b 5 6 7 8
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] == '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter][anchor_letter_5][anchor_letter_6][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 6: 1 2 3 b 4 5 6 7 8
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] == '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 7: 1 2 b 3 4 5 6 7 8
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] == '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 8: 1 b 2 3 4 5 6 7 8
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 9: b 1 2 3 4 5 6 7 8
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_' and word[i+8] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                anchor_letter_8 = word[i+8]
                
                for j, letter in enumerate(self.letter_set):
                    if self.ninegram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][anchor_letter_8] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
                        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.5821205455345358)
        
        # run the next level down
        return self.eightgram_probs(word)

        
        
        
    def eightgram_probs(self, word):
        ''' 
        perform 8 gram: update the probabilities of letters using eightgram model
        '''
       
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 7):
                        
            # case 1: "1 2 3 4 5 6 7 b"
            if word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+5]
                anchor_letter_7 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7][letter] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 2: "1 2 3 4 5 6 b 7"
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] == '_' and word[i+7] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+5]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][letter][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 3: 1 2 3 4 5 b 6 7
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] == '_' and word[i+6] != '_' and word[i+7] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][letter][anchor_letter_6][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 4: 1 2 3 4 b 5 6 7
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] == '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter][anchor_letter_5][anchor_letter_6][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 5: 1 2 3 b 4 5 6 7
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] == '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 6: 1 2 b 3 4 5 6 7
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] == '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 7: 1 b 2 3 4 5 6 7
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 8: b 1 2 3 4 5 6 7
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_' and word[i+7] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                anchor_letter_7 = word[i+7]
                
                for j, letter in enumerate(self.letter_set):
                    if self.eightgram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][anchor_letter_7] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
                        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.13397272327320722)
        
        # run the next level down
        return self.sevengram_probs(word)
        

    def sevengram_probs(self, word):
        ''' 
        perform 7 gram: update the probabilities of letters using sevengram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 6):
                        
            # case 1: "1 2 3 4 5 6 b"
            if word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+5]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6][letter] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 2: "1 2 3 4 5 b 6"
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] == '_' and word[i+6] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                anchor_letter_6 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][letter][anchor_letter_6] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 3: 1 2 3 4 b 5 6
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] == '_' and word[i+5] != '_' and word[i+6] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter][anchor_letter_5][anchor_letter_6] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 4: 1 2 3 b 4 5 6
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] == '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4][anchor_letter_5][anchor_letter_6] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 5: 1 2 b 3 4 5 6
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] == '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 6: 1 b 2 3 4 5 6
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 7: b 1 2 3 4 5 6
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_' and word[i+6] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                anchor_letter_6 = word[i+6]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sevengram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][anchor_letter_6] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
                        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.11415750109986801)
        
        # run the next level down
        return self.sixgram_probs(word)
    
        
    def sixgram_probs(self, word):
        ''' 
        perform 6 gram: update the probabilities of letters using sixgram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 5):
                        
            # case 1: "1 2 3 4 5 b"
            if word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+4]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sixgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5][letter] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 2: "1 2 3 4 b 5"
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] == '_' and word[i+5] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                anchor_letter_5 = word[i+5]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sixgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter][anchor_letter_5] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 3: 1 2 3 b 4 5
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] == '_' and word[i+4] != '_' and word[i+5] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sixgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4][anchor_letter_5] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
               
            # case 4: 1 2 b 3 4 5
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] == '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sixgram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4][anchor_letter_5] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
        
            # case 5: 1 b 2 3 4 5
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sixgram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1

            # case 5: b 1 2 3 4 5
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_' and word[i+5] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                anchor_letter_5 = word[i+5]
                
                for j, letter in enumerate(self.letter_set):
                    if self.sixgram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][anchor_letter_5] > 0 and letter not in self.guessed_letters:
                        total_count += 1
                        letter_count[j] += 1
                        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.0859700835899692)
        
        # run the next level down
        return self.fivegram_probs(word)
    
        
    def fivegram_probs(self, word):
        ''' 
        perform 5 gram: update the probabilities of letters using fivegram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 4):
                        
            # case 1: 1 2 3 4 b
            if word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+3]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fivegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter] > 0 and letter not in self.guessed_letters:
                        total_count += self.fivegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter]
                        letter_count[j] += self.fivegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4][letter]
        
            # case 2: 1 2 3 b 4
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] == '_' and word[i+4] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                anchor_letter_4 = word[i+4]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fivegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4] > 0 and letter not in self.guessed_letters:
                        total_count += self.fivegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4]
                        letter_count[j] += self.fivegram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter][anchor_letter_4]
               
            # case 3: 1 2 b 3 4
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] == '_' and word[i+3] != '_' and word[i+4] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fivegram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4] > 0 and letter not in self.guessed_letters:
                        total_count += self.fivegram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4]
                        letter_count[j] += self.fivegram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3][anchor_letter_4]
               
            # case 4: 1 b 2 3 4
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fivegram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4] > 0 and letter not in self.guessed_letters:
                        total_count += self.fivegram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4]
                        letter_count[j] += self.fivegram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3][anchor_letter_4]
        
            # case 5: b 1 2 3 4
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_' and word[i+4] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                anchor_letter_4 = word[i+4]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fivegram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4] > 0 and letter not in self.guessed_letters:
                        total_count += self.fivegram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4]
                        letter_count[j] += self.fivegram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3][anchor_letter_4]
        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.049599648042234934)
        
        # run the next level down
        return self.fourgram_probs(word)
    
    def fourgram_probs(self, word):
        ''' 
        perform 4 gram: update the probabilities of letters using fourgram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 3):
                        
            # case 1: 1 2 3 b 
            if word[i] != '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+2]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fourgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter] > 0 and letter not in self.guessed_letters:
                        total_count += self.fourgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter]
                        letter_count[j] += self.fourgram[anchor_letter_1][anchor_letter_2][anchor_letter_3][letter]
        
            # case 2: 1 2 b 3
            elif word[i] != '_' and word[i+1] != '_' and word[i+2] == '_' and word[i+3] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                anchor_letter_3 = word[i+3]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fourgram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3] > 0 and letter not in self.guessed_letters:
                        total_count += self.fourgram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3]
                        letter_count[j] += self.fourgram[anchor_letter_1][anchor_letter_2][letter][anchor_letter_3]
               
            # case 3: 1 b 2 3
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_' and word[i+3] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                
                for j, letter in enumerate(self.letter_set):
                    if self.fourgram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3] > 0 and letter not in self.guessed_letters:
                        total_count += self.fourgram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3]
                        letter_count[j] += self.fourgram[anchor_letter_1][letter][anchor_letter_2][anchor_letter_3]
               
            # case 4: b 1 2 3
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_' and word[i+3] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                anchor_letter_3 = word[i+3]
                
                # calculate occurences of "blank anchor_letter_1 anchor_letter_2" and for each letter not guessed yet
                for j, letter in enumerate(self.letter_set):
                    if self.fourgram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3] > 0 and letter not in self.guessed_letters:
                        total_count += self.fourgram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3]
                        letter_count[j] += self.fourgram[letter][anchor_letter_1][anchor_letter_2][anchor_letter_3]
        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.02326000879894413)
        
        # run the next level down
        return self.trigram_probs(word)

    def trigram_probs(self, word):
        ''' 
        perform 3 gram: update the probabilities of letters using trigram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)

        # traverse the word and find patterns that have three consecutive letters where one of them is blank
        for i in range(len(word) - 2):
                        
            # case 1: 1 2 b
            if word[i] != '_' and word[i+1] != '_' and word[i+2] == '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+1]
                
                for j, letter in enumerate(self.letter_set):
                    if self.trigram[anchor_letter_1][anchor_letter_2][letter] > 0 and letter not in self.guessed_letters:
                        total_count += self.trigram[anchor_letter_1][anchor_letter_2][letter]
                        letter_count[j] += self.trigram[anchor_letter_1][anchor_letter_2][letter]
        
            # case 2: 1 b 2
            elif word[i] != '_' and word[i+1] == '_' and word[i+2] != '_':
                anchor_letter_1 = word[i]
                anchor_letter_2 = word[i+2]
                
                for j, letter in enumerate(self.letter_set):
                    if self.trigram[anchor_letter_1][letter][anchor_letter_2] > 0 and letter not in self.guessed_letters:
                        total_count += self.trigram[anchor_letter_1][letter][anchor_letter_2]
                        letter_count[j] += self.trigram[anchor_letter_1][letter][anchor_letter_2]
               
            # case 3: b 1 2
            elif word[i] == '_' and word[i+1] != '_' and word[i+2] != '_':
                anchor_letter_1 = word[i+1]
                anchor_letter_2 = word[i+2]
                
                for j, letter in enumerate(self.letter_set):
                    if self.trigram[letter][anchor_letter_1][anchor_letter_2] > 0 and letter not in self.guessed_letters:
                        total_count += self.trigram[letter][anchor_letter_1][anchor_letter_2]
                        letter_count[j] += self.trigram[letter][anchor_letter_1][anchor_letter_2]
        
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
        
        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.009683238011438628)
        
        # run the next level down
        return self.bigram_probs(word)
    
    
    def bigram_probs(self, word):
        ''' 
        perform 2 gram: update the probabilities of letters using bigram model
        '''
        
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)
        
        # traverse the word and find either patterns of "letter blank" or "blank letter"
        for i in range(len(word) - 1):
            # case 1: 1 b
            if word[i] != '_' and word[i+1] == '_':
                anchor_letter = word[i]
                
                # calculate occurences of "anchor_letter blank" and each letter not guessed yet
                for j, letter in enumerate(self.letter_set):
                    if self.bigram[len(word)][anchor_letter][letter] > 0 and letter not in self.guessed_letters:
                        total_count += self.bigram[len(word)][anchor_letter][letter]
                        letter_count[j] += self.bigram[len(word)][anchor_letter][letter]
                            
            # case 2: b 1
            elif word[i] == '_' and word[i+1]!= '_':
                anchor_letter = word[i+1]
                
                # calculate occurences of "blank anchor_letter" and each letter not guessed yet
                for j, letter in enumerate(self.letter_set):
                    if self.bigram[len(word)][letter][anchor_letter] > 0 and letter not in self.guessed_letters:
                        total_count += self.bigram[len(word)][letter][anchor_letter]
                        letter_count[j] += self.bigram[len(word)][letter][anchor_letter]
                                                                    
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count

        # interpolate probabilities between trigram and bigram
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * (0.001161460624725033) 
        
        # return letter associated with highest probability
        return self.unigram_probs(word)
    
    
    def unigram_probs(self, word):
        ''' 
        perform 1 gram: update the probabilities of letters using unigram model
        '''
                
        # vector of probabilities for each letter
        probs = [0] * len(self.letter_set)
        
        total_count = 0
        letter_count = [0] * len(self.letter_set)
        
        # traverse the word and find blank spaces
        for i in range(len(word)):
            # case 1: "letter blank"
            if word[i] == '_':
                                
                # calculate occurences of pattern and each letter not guessed yet
                for j, letter in enumerate(self.letter_set):
                    if self.unigram[len(word)][letter] > 0 and letter not in self.guessed_letters:
                        total_count += self.unigram[len(word)][letter]
                        letter_count[j] += self.unigram[len(word)][letter]
                       
        # calculate the probabilities of each letter appearing
        if total_count > 0:
            for i in range(len(self.letter_set)):
                probs[i] = letter_count[i] / total_count
                
        # interpolate probabilities
        for i, p in enumerate(self.probabilities):
            self.probabilities[i] = p + probs[i] * float(17/227300)
        
        # adjust probabilities so they sum to one (not necessary but looks better)
        final_probs = [0] * len(self.letter_set)
        if sum(self.probabilities) > 0:
            for i in range(len(self.probabilities)):
                final_probs[i] = self.probabilities[i] / sum(self.probabilities)
            
        self.probabilities = final_probs
        
        # find letter with largest probability
        max_prob = 0
        guess_letter = ''
        for i, letter in enumerate(self.letter_set):
            if self.probabilities[i] > max_prob:
                max_prob = self.probabilities[i]
                guess_letter = letter
        
        # if no letter chosen from above, pick a random one (extra weight on vowels)
        if guess_letter == '':
            letters = self.letter_set.copy()
            random.shuffle(letters)
            letters_shuffled = ['e','a','i','o','u'] + letters
            for letter in letters_shuffled:
                if letter not in self.guessed_letters:
                    return letter
            
        return guess_letter

    ##########################################################
    # You'll likely not need to modify any of the code below #
    ##########################################################
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary
                
    def start_game(self, practice=True, verbose=True):
        # reset guessed letters to empty set and current plausible dictionary to the full dictionary
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
                         
        response = self.request("/new_game", {"practice":practice})
        if response.get('status')=="approved":
            game_id = response.get('game_id')
            word = response.get('word')
#             print("word: ",word)
            tries_remains = response.get('tries_remains')
            self.tries_remaining = tries_remains
            
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
            while tries_remains>0:
                # get guessed letter from user code
                guess_letter = self.guess(word)
                    
                # append guessed letter to guessed letters field in hangman object
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))
                    
                try:    
                    res = self.request("/guess_letter", {"request":"guess_letter", "game_id":game_id, "letter":guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e
               
                if verbose:
                    print("Sever response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status=="success":
                    self.practice_wins+=1
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                        
                    return True
                elif status=="failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status=="ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status=="success"
        
    def my_status(self):
        return self.request("/my_status", {})
    
    def request(
            self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"

        # Add `access_token` to post_args or args if it has not already been
        # included.
        if self.access_token:
            # If post_args exists, we assume that args either does not exists
            # or it does not need `access_token`.
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token

        num_retry, time_sleep = 5, 2                                                                                        
        for it in range(num_retry):                                                                                         
            try:                                                                                                            
                response = self.session.request(                                                                            
                    method or "GET",                                                                                        
                    HANGMAN_URL + path,                                                                                     
                    timeout=self.timeout,                                                                                   
                    params=args,                                                                                            
                    data=post_args                                                                                          
                )                                                                                                           
                break                                                                                                       
            except requests.HTTPError as e:                                                                                 
                response = json.loads(e.read())                                                                             
                raise HangmanAPIError(response)                                                                             
            except requests.exceptions.SSLError as e:                                                                       
                if it + 1 == num_retry:                                                                                     
                    raise                                                                                                   
                time.sleep(time_sleep)  

        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')

        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result
    
class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""

        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result

        Exception.__init__(self, self.message)

In [5]:
api = HangmanAPI(access_token="6a03334157d3edd748e0f6f9876cd6", timeout=2000)

for i in range(10):
    api.start_game(practice=1,verbose=False)
    time.sleep(1)
    print(i+1,api.practice_wins)

1 0
2 1
3 2
4 3
5 3
6 3
7 3
8 4
9 4
10 4


### Finding optimal weights for each n gram

In [7]:
def build_dictionary(dictionary_file_location):
    text_file = open(dictionary_file_location,"r")
    full_dictionary = text_file.read().splitlines()
    text_file.close()
    return full_dictionary

In [12]:
word_dict = {}
input_files = ['words_250000_train.txt']

for filename in input_files:
    full_dict = build_dictionary(filename)

    for word in full_dict:
        if word.isalpha():
            word_dict[word.lower()] = None

# create a list to be used as input later
words = list(np.random.permutation(list(word_dict.keys())))
with open('word_list.txt', 'w') as f:
    for word in words:
        f.write('{}\n'.format(word))

In [13]:
MAX_NUM_INPUTS = max([len(i) for i in words])
BATCH_SIZE = np.array([len(i) for i in words[:train_val_split_idx]]).mean()
print('Max word length: {}, average word length: {:0.1f}'.format(MAX_NUM_INPUTS, BATCH_SIZE))

NameError: name 'train_val_split_idx' is not defined

In [14]:
from collections import Counter
data = Counter([len(word) for word in words])
data.most_common()

[(9, 30906),
 (8, 30452),
 (10, 26953),
 (7, 25948),
 (11, 22786),
 (6, 19541),
 (12, 18178),
 (13, 12956),
 (5, 11274),
 (14, 8710),
 (4, 5287),
 (15, 5211),
 (16, 3143),
 (3, 2201),
 (17, 1775),
 (18, 859),
 (19, 441),
 (2, 264),
 (20, 225),
 (21, 98),
 (22, 44),
 (1, 17),
 (23, 14),
 (24, 9),
 (25, 3),
 (27, 2),
 (29, 2),
 (28, 1)]

In [15]:
len(words)

227300

In [16]:
len([word for word in words if len(word)>=9])

132316

In [17]:
weights = {
9:132316/227300,
8:30452/227300,
7:25948/227300,
6:19541/227300,
5:11274/227300,
4:5287/227300,
3:2201/227300,
2:264/227300,
1:17/227300,
}

In [18]:
weights[9]

0.5821205455345358

## Playing recorded games:
Please finalize your code prior to running the cell below. Once this code executes once successfully your submission will be finalized. Our system will not allow you to rerun any additional games.

Please note that it is expected that after you successfully run this block of code that subsequent runs will result in the error message "Your account has been deactivated".

Once you've run this section of the code your submission is complete. Please send us your source code via email.

In [19]:
for i in range(1000):
    print('Playing ', i, ' th game')
    # Uncomment the following line to execute your final runs. Do not do this until you are satisfied with your submission
    api.start_game(practice=0,verbose=False)
    
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(1)

Playing  0  th game


HangmanAPIError: {'error': 'You have reached 1000 of games', 'status': 'denied'}

## To check your game statistics
1. Simply use "my_status" method.
2. Returns your total number of games, and number of wins.

In [10]:
[total_practice_runs,total_recorded_runs,total_recorded_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
success_rate = total_recorded_successes/total_recorded_runs
print('overall success rate = %.3f' % success_rate)

overall success rate = 0.558
