In [2]:
import random  
import re 
from collections import defaultdict 
import sys 
import time 

In [3]:
# simulate hangman

def generate_string(correct,guessed_letter,res): 
    guessed_correct = False
    for i in range(len(correct)): 
        if correct[i]==guessed_letter: 
            res[i]=guessed_letter
            guessed_correct = True
    return guessed_correct

def simulate_hangman(correct,guesses):
    res = ['_']*len(correct)
    guessed_correct = False
    while guesses: 
        print('Word to guess: ',res) 
        guess = input('Enter guess: ') 
        if guess in res: 
            print('Letter already revealed. Enter a different one.')
        guessed_correct_letter = generate_string(correct,guess,res)
        if ''.join(res) == correct:
            guessed_correct = True 
            break
        if not guessed_correct_letter:
            guesses -= 1
        print('Guesses remaining: ',guesses)
    print('Correct word: ',correct)
    if guessed_correct: 
        print('Nice! You guessed correctly!')
    else:
        print('Oops! Better luck next time.') 

In [None]:
lexicon = ["apple", "banana", "orange", "grape", "strawberry", "pineapple", "mango", "kiwi"]
correct = random.choice(lexicon)
guesses = 5 

simulate_hangman(correct,guesses)

In [None]:
# create letter frequency map 
letter_frequency = defaultdict(int)
for word in lexicon: 
    for letter in word:
        if('a' <= letter <= 'z'): 
            letter_frequency[letter] += 1

In [None]:
print('Letter frequencies:')
for letter,count in sorted(letter_frequency.items(), key=lambda x:x[1],reverse=True): 
    print(f'{letter}: {count}')

In [None]:
# Lets test hangman game on this newly created lexicon 
correct = random.choice(lexicon) 
guesses = 5 

simulate_hangman(correct,guesses) 

Okay, now on to the real stuff. 
First, we'll try the binary search approach. Each turn, our guess will be the letter which occurs in close to half of the possible words. 
Based on the outcome, we eliminate the other half, and consider all words which fit the outcome (like regex). 

In [None]:
lexicon_size = len(lexicon)
letter_frequency_fractions = {letter: count/lexicon_size for letter, count in letter_frequency.items()}

print('Fraction of words containing a letter: ') 
for k,v in sorted(letter_frequency_fractions.items(),key=lambda x:x[1],reverse=True): 
    print(f'{k}: {v:.4f}')

In [None]:
# find letter frequencies after each guess 
def calculate_letter_frequencies(res,incorrect_guesses,lexicon): 
    letter_frequency = defaultdict(int)
    
    # create regex from res 
    regex = []
    for char in res:
        if char == '_':
            if incorrect_guesses:
                escaped_incorrect = ''.join(re.escape(g) for g in incorrect_guesses)
                regex.append(f"[^{escaped_incorrect}]")
            else:
                regex.append('.')
        else:
            regex.append(re.escape(char))
    regex = '^' + ''.join(regex) + '$'

    # create letter frequency map out of possible words
    lexicon_size = 0
    for word in lexicon:
        if re.fullmatch(regex,word): 
            for letter in word:
                if('a' <= letter <= 'z'): 
                    letter_frequency[letter] += 1
            lexicon_size += 1
       
    
    # convert frequency to fraction
    if lexicon_size > 0:
        letter_frequency_fractions = {letter: count/lexicon_size for letter, count in letter_frequency.items()}
    
    return letter_frequency_fractions

def guess(res,guessed_letters,incorrect_guesses,lexicon): 
    # based on res, update letter frequencies 
    letter_frequencies = calculate_letter_frequencies(res,incorrect_guesses,lexicon)
    
    # find the letter which has frequency closest to 0.5
    max_diff = float('inf') 
    guessed_letter = 'a' # random guess
    for letter in letter_frequencies: 
        # skip already guessed letters
        if letter in guessed_letters: 
            continue
        diff = abs(letter_frequencies[letter] - 0.5)
        if diff < max_diff: 
            guessed_letter = letter 
            max_diff = diff

    guessed_letters.append(guessed_letter)
            
    return guessed_letter

def simulate_hangman_v2(correct,guesses): 
    res = ['_'] * len(correct) 
    guessed_letters = []
    incorrect_guesses = []
    guessed_correct = False
    
    print(f'You have {guesses} guesses. You will lose a guess each time you guess incorrectly.')
    
    while guesses: 
        print('Word to guess: ',res) 
        guessed_letter = guess(res,guessed_letters,incorrect_guesses,lexicon) 
        guessed_correct_letter = generate_string(correct,guessed_letter,res)
        if ''.join(res) == correct:
            guessed_correct = True 
            break
        if not guessed_correct_letter:
            incorrect_guesses.append(guessed_letter)
            guesses -= 1
        print('Guesses remaining: ',guesses)
        
    print('Correct word: ',correct)
    
    if guessed_correct: 
        print('Nice! You guessed correctly!')
    else:
        print('Oops! Better luck next time.') 
        
    return guessed_correct

In [None]:
correct = random.choice(lexicon)
guesses = 5

simulate_hangman_v2(correct,guesses)