In [5]:
import random
import string
import collections
import numpy as np
from functools import reduce
import re

In [6]:
class HangmanPlayer(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.guessed_letters = []
        
        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)  
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()

        target_dictionary_location = "words_final_test.txt"
        self.target_dictionary = self.build_dictionary(target_dictionary_location) 
                
    def reset(self,length):
        """
        Resets the probability estimates to the full dictionary before a word is started        
        """
        max_length = length
        self.prob_matrix = []
        self.dictionary_i = []
        for i in range(max_length):
            # Estimate positional probabilities
            self.prob_matrix.append(self.letter_distribution_at_place(self.full_dictionary,i)) 
            # Append the dictionary for each index which is being used in the previous estimate
            self.dictionary_i.append(self.full_dictionary)
            
    def letter_distribution(self,dictionary): 
        """
        Returns the probability distribution of letters given words in a dictionary     
        
        """
        dict_string = "".join(dictionary)        
        c = collections.Counter(dict_string)
        letter_count = dict(c.most_common()) 
        for letter in string.ascii_lowercase:
            if letter in letter_count:
                letter_count[letter] = (letter_count[letter]+1)/(len(dict_string)+26)
            else:
                letter_count[letter] = 1/26
        return letter_count
    
    def unique_letter_distribution(self,dictionary):
        """
        Returns the probability distribution of unique letters ('aaa will resolve to a') for dictionary
        
        """
        remove_duplicates = lambda s: ''.join(sorted(set(s), key=s.index))
        dictionary = [remove_duplicates(word) for word in dictionary]
        return self.letter_distribution(dictionary)
        
    def letter_distribution_at_place(self,dictionary,i):
        """
        Returns the letter distribution at position i in all words
        
        """
        dictionary = [word[i] for word in dictionary if len(word) > i]
        return self.letter_distribution(dictionary)
        
    def guess(self, word): # word input example: "_ p p _ e "
        clean_word = word.replace("_",".")
        len_word = len(clean_word)
        
        self.update_prob_matrix(clean_word) # Update the prob matrix
        self.prev_clean_word = clean_word 
              
        guess_letter = '!'
        
        
        # Here I sum up the prob vectors to get the expected number of hits for all letters and then
        # I choose the maximum
        prob_vector = reduce(lambda a,b : dict(collections.Counter(a)+collections.Counter(b)),self.prob_matrix[:len_word])
        prob_vector = collections.Counter(prob_vector).most_common()
        for letter,probability in prob_vector:
            if letter not in self.guessed_letters:
                guess_letter = letter
                break

        if guess_letter == '!':
            sorted_letter_count = self.full_dictionary_common_letter_sorted
            for letter,instance_count in sorted_letter_count:
                if letter not in self.guessed_letters:
                    guess_letter = letter
                    break  
                                
        return guess_letter
    
    def update_prob_matrix(self,clean_word):
        
        def regex_to_match(word,i,c):
            """
            Build a regex to match for the word for a position i and c context window
            This regex is used to filter the dictionary for the new probability estimates
            """   
            if (i == 0) or (i == len(word)-1):
                c = c+1
            start = max(i-c,0)
            end = min(i+c+1,len(word))
            regex = "."*start + word[start:end]+".*"*(end < len(word))
            return regex   
        
        char_list = list(clean_word)
        singleton_dict = dict([(letter,0) for letter in string.ascii_lowercase])
        context = 2
        for i,elem in enumerate(char_list):
            if elem != ".": # Already guessed, assign 1 probabiltiy to this letter for this index
                self.prob_matrix[i] = singleton_dict.copy()
                self.prob_matrix[i][elem] = 1
            else:
                filtered_dict = self.dictionary_i[i]
                regex = regex_to_match(clean_word,i,context) 
                prev_regex = regex_to_match(self.prev_clean_word,i,context)
                if regex != prev_regex: # Some new word has been guessed in the context window
                    filtered_dict = list(filter(lambda word: re.match(regex,word) is not None,
                                                    self.dictionary_i[i]))      
                if len(filtered_dict)<len(self.dictionary_i[i]):
                    self.prob_matrix[i] = self.letter_distribution_at_place(filtered_dict,i) # Estimate new
                    self.dictionary_i[i] = filtered_dict # Update the dictionary for this estimate
        return

    ##########################################################
    # You'll likely not need to modify any of the code below #
    ##########################################################
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary
                
    def start_game(self, practice=True, verbose=True):
        # reset guessed letters to empty set and current plausible dictionary to the full dictionary
        self.guessed_letters = []
        target_word = self.target_dictionary[np.random.randint(len(self.target_dictionary))]
        self.TARGET = target_word         
        word = "_"*len(target_word)
        self.reset(len(word))
        self.prev_clean_word = word
        
        self.incorrect_guesses = []
        self.consecutive_incorrect_guesses = []
        
        if verbose:
            print("Word is ",target_word)
            print("Successfully start a new game!")
           
        tries_remains = 6
        
        while tries_remains>0:
            # get guessed letter from user code
            guess_letter = self.guess(word)

            # append guessed letter to guessed letters field in hangman object
            self.guessed_letters.append(guess_letter)
            
            if verbose:
                print("Guessing letter: {0}".format(guess_letter))
            
            updated_word,correct = self.update_word(target_word,guess_letter,word)
                
            if correct == False:
                tries_remains = tries_remains -1 
                self.incorrect_guesses.append(guess_letter)
                if verbose:
                    print("Incorrect guess")
                    print("Tries remaining ",tries_remains)
           
            if updated_word == target_word:
                if verbose:
                    print("Game won")
                return 1
            word = updated_word
        if verbose:
            print("Couldnt guess")
        return 0
                    
    def update_word(self,target_word,guess_letter,word):
        if guess_letter in target_word:
            temp_word = ''.join(map(lambda char: char if char == guess_letter else '_', target_word))
            union = ""
            for c1, c2 in zip(temp_word, word):
                if c1 != "_":
                    union += c1
                else:
                    union += c2
            return(union,1)
        return (word,0)

In [8]:
np.random.seed(29)
player = HangmanPlayer()
total_won = 0
total_games = 1

winning_words = []
losing_words = []
for i in range(total_games):
    won = player.start_game(verbose=False)
    total_won = total_won + won
    if won:
        winning_words.append(player.TARGET)
    else:
        losing_words.append(player.TARGET)
    print(total_won/(i+1))
    
print(total_won/total_games)

0.0
0.0
