In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
# import word list
with open("data/allowed_words.txt", "r") as f:
    long_word_list = [word.strip() for word in f.readlines()]
with open("data/possible_words.txt", "r") as f:
    short_word_list = [word.strip() for word in f.readlines()]

# generate reply list
reply_list = []
for i in range(3):
    for j in range(3):
        for k in range(3):
            for l in range(3):
                for m in range(3):
                    reply_list.append([i, j, k, l, m])

In [None]:
# split the word into letters
def split_word(word):
    return [letter for letter in word]


# get the game output
def guess_reply(guessing_word, answer):
    reply = [0, 0, 0, 0, 0]
    guessing_word_letters = split_word(guessing_word)
    answer_letters = split_word(answer)
    for letter in guessing_word_letters:
        if letter in answer_letters:
            reply[guessing_word_letters.index(letter)] = 1
        if letter == answer_letters[guessing_word_letters.index(letter)]:
            reply[guessing_word_letters.index(letter)] = 2
    return reply


# 输入一个词和一个回复，从词库中找出所有可能的词
def simu_reply(guessing_word, judged_word, reply):
    # 1:必须在其他位置有
    # 2:必须在相同位置有
    for i in range(5):
        if reply[i] == 1:
            if (
                guessing_word[i] not in judged_word
                or guessing_word[i] == judged_word[i]
            ):
                return False
        if reply[i] == 2:
            if guessing_word[i] != judged_word[i]:
                return False
        if reply[i] == 0:
            if guessing_word[i] in judged_word:
                return False
    return True


# find the possible words in the long_word_list
def find_possible_words(guessing_word, reply):
    possible_words = []
    for word in long_word_list:
        if simu_reply(guessing_word, word, reply):
            possible_words.append(word)
    return possible_words


def find_possible_words_adjusted(guessing_word, reply, possible_words):
    possible_words_reducted = []
    for word in possible_words:
        if simu_reply(guessing_word, word, reply):
            possible_words_reducted.append(word)
    return possible_words_reducted


# calculate the info of the guessing word
def calc_info(guessing_word, reply):
    possible_words = find_possible_words(guessing_word, reply)
    num_of_words = len(possible_words)
    p_x = num_of_words / len(long_word_list)
    return np.log2(1 / p_x)


# calculate the entropy of the guessing word
def calc_E_info(guessing_word):
    E_info = 0
    for reply in reply_list:
        possible_words = find_possible_words(guessing_word, reply)
        p_x = len(possible_words) / len(long_word_list)
        if p_x != 0:
            E_info += p_x * np.log2(1 / p_x)
    return E_info


def calc_E_info_adjusted(guessing_word, possible_words):
    E_info = 0
    total_num = len(possible_words)
    for reply in reply_list:
        possible_words = find_possible_words_adjusted(
            guessing_word, reply, possible_words
        )
        p_x = len(possible_words) / total_num
        if p_x != 0:
            E_info += p_x * np.log2(1 / p_x)
    return E_info

In [None]:
E_info_for_all_words = pd.DataFrame(columns=["word", "E_info"])

for word in long_word_list:
    E_info_for_all_words.loc[len(E_info_for_all_words)] = [word, calc_E_info(word)]
    print(E_info_for_all_words.loc[len(E_info_for_all_words) - 1])

# 排序
E_info_for_all_words.sort_values(by="E_info", ascending=False, inplace=True)

# 保存
E_info_for_all_words.to_csv("result/E_info_for_all_words.csv", index=False)

word         aahed
E_info    4.292292
Name: 0, dtype: object
word         aalii
E_info    3.702683
Name: 1, dtype: object
word         aargh
E_info    3.980467
Name: 2, dtype: object
word         aarti
E_info    4.621203
Name: 3, dtype: object
word        abaca
E_info    2.99165
Name: 4, dtype: object
word         abaci
E_info    3.704593
Name: 5, dtype: object
word         aback
E_info    3.330902
Name: 6, dtype: object
word         abacs
E_info    4.162528
Name: 7, dtype: object
word         abaft
E_info    3.468908
Name: 8, dtype: object
word         abaka
E_info    2.885794
Name: 9, dtype: object
word         abamp
E_info    3.390907
Name: 10, dtype: object
word         aband
E_info    3.821515
Name: 11, dtype: object
word         abase
E_info    4.523136
Name: 12, dtype: object
word         abash
E_info    3.882148
Name: 13, dtype: object
word         abask
E_info    3.807766
Name: 14, dtype: object
word         abate
E_info    4.363298
Name: 15, dtype: object
word         abaya
E

KeyboardInterrupt: 

In [None]:
# # Wordle game
# # roll an answer
# ANSWER = np.random.choice(short_word_list)
# ANSWER = 'state'

# game_reply = {}
# possible_words_list = {}
# num_possible_words = {}
# guessing_word = {}

# # game solution1: use the word has the highest E_info satisfied the reply every time
# def solution1(start_word):
#     if start_word == '':
#         start_word = 'trane'

#     game_reply = {}
#     guessing_word = {}

#     guessing_word[0] = start_word
#     possible_words=long_word_list
#     turn=0

#     while True:
#         game_reply[turn] = guess_reply(guessing_word[turn], ANSWER)
#         print(turn,guessing_word[turn],game_reply[turn])

#         possible_words= find_possible_words_adjusted(guessing_word[turn],
#                                                 game_reply[turn],
#                                                 possible_words)
#         E_info_for_possible_words = pd.DataFrame(columns=['word', 'E_info'])
#         for word in possible_words:
#             E_info_for_possible_words.loc[len(E_info_for_possible_words)] = [word, calc_E_info_adjusted(word,possible_words)]
#         E_info_for_possible_words.sort_values(by='E_info', ascending=False, inplace=True)
#         guessing_word[turn+1]=E_info_for_possible_words['word'][0]

#         if guessing_word[turn+1]==ANSWER:
#             print(ANSWER)
#             break

#         turn+=1

#     return guessing_word

# solution1('')

0 trane [1, 0, 2, 0, 2]
1 abate [1, 0, 0, 2, 2]


KeyError: 0