In [8]:
import pickle
import math
import random
import tqdm

In [3]:
def load_word_set(path: str):
    word_set = set()
    with open(path, "r") as f:
        for line in f.readlines():
            word = line.strip().upper()
            word_set.add(word)
    return word_set

In [4]:
def compare_word(guess, actual):
    assert len(guess) == len(actual), "ERROR: words provided in method compare_word are of unequal length!"
    
    return ''.join(str((guess[i] == actual[i]) + (guess[i] in actual)) for i in range(0, len(guess)))

def negative_log_2(x):
    return -math.log(x, 2)

def shannon_entropy(vals):
    vals_sum = sum(vals)
    return sum(negative_log_2(v/vals_sum)*(v/vals_sum) for v in vals)

In [5]:
class ChoiceNode:
    def __init__(self, possible_words):
        
        assert type(possible_words) in [set, list], "ERROR: must initialize possible_words as a set/list when declaring ChoiceNode!"
        
        self.children = list()
        self.possible_words = possible_words
        
    def generate_children(self):
        for word in self.possible_words:
            self.children.append(WordNode(self, word))
    
class WordNode:
    
    def __init__(self, parent, word):
        self.parent = parent
        self.word = word
        self.possibilities = dict()
        self.entropy = None
        
    def calculate_possibilities(self):
        
        for word in self.parent.possible_words:
            result = compare_word(self.word, word)
            if result not in self.possibilities:
                #You don't declare choicenodes like this
                self.possibilities[result] = ChoiceNode({word})
            else:
                self.possibilities[result].possible_words.add(word)
                
        self.entropy = shannon_entropy([len(x.possible_words) for x in self.possibilities.values()])
        


In [49]:
def vibecheck(starting_node, WORDS):
    
    losses = 0
    
    for i in tqdm.trange(len(WORDS)):
        SECRET = WORDS[i]
        guessed_word = None
        MAX_GUESSES = 5

        n1 = starting_node.parent


        for j in range(0, MAX_GUESSES):

            #print(n1.possible_words)

            #n1.generate_children()
            maxent = -2
            chosen = None

            for child in n1.children:
                #child.calculate_possibilities()
                if child.entropy > maxent:
                    chosen = child
                    maxent = child.entropy

            guessed_word = chosen.word
#             print("Guessed word: ")
#             print(chosen.word)
#             print("Correctness: ")
#             print(compare_word(chosen.word, SECRET))

            if guessed_word == SECRET:
#                 print('Successfully guessed!')
                break

            n1 = chosen.possibilities[compare_word(chosen.word, SECRET)]

            if j == MAX_GUESSES-1:
                losses += 1
            
    print('games lost: ', losses)

    
def generate_subnodes(node):
    if type(node) == ChoiceNode:
        if len(node.possible_words) == 1:
            node.generate_children()
            node.children[0].calculate_possibilities()
            return
            
        elif len(node.possible_words) == 0:
            print('Something\'s very wrong. Nodes shouldn\'t have dicts of len 0.')
            return
        
        node.generate_children()
        for child in node.children:
            child.calculate_possibilities()
            generate_subnodes(child)
        
    elif type(node) == WordNode:
        node.calculate_possibilities()
        for child in node.possibilities.values():
            generate_subnodes(child)
        
    else:
        #Thank god this error never occured. Yet.
        print('FATAL ERROR: illegal node type provided when generating tree!')
        return
    
    
def generate_tree(MAX_GUESSES, word_dict):
    
    node = ChoiceNode(word_dict)
    node.generate_children()
    
    #Code for calculating the first pick the agent would choose given all possible words.
    #Since our agent always chooses the same word given the same dictionary, our tree starts from here.
    
    maxent = -2
    chosen = None
    
    for child in node.children:
        child.calculate_possibilities()
        if child.entropy > maxent:
            chosen = child
            maxent = child.entropy
            
    starting_node = chosen
    
#     for node in starting_node.possibilities.values():
#         generate_subnodes(node)
    dict_data = list(starting_node.possibilities.values())
    
    for i in tqdm.trange(len(dict_data)):
        node = dict_data[i]
        generate_subnodes(node)
        
    return starting_node

In [31]:
WORDS = list(load_word_set("Word_Data\wordle_words.txt"))

In [42]:
#Code to generate tree
thousand_w_node = generate_tree(5, WORDS)

100%|████████████████████████████████████████████████████████████████████████████████| 149/149 [23:31<00:00,  9.47s/it]


In [48]:
#Runs the decision tree through each possible word in our dataset,
vibecheck(thousand_w_node, WORDS)

100%|████████████████████████████████████████████████████████████████████████████| 1367/1367 [00:00<00:00, 7802.31it/s]

games lost:  12





In [51]:
pickle.dump(test, open('wordle_words_node.p', 'wb+'))