In [2]:
import numpy as np

import pandas as pd

from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
from pgmpy.estimators import MaximumLikelihoodEstimator

import math

import json

In [3]:
words = pd.read_csv("wordle-at-words.csv")

model = BayesianNetwork([("first", "second"), ("third", "second"), ("third", "forth"), ("fifth", "forth")])

model.fit(words, estimator=MaximumLikelihoodEstimator)

In [4]:
infer = VariableElimination(model)
evidence = {"third": "N", "forth": "I", "fifth": "S"}

In [5]:
def get_suggestion_word(suggestion, evidence):
    word = ["", "", "", "", ""]
    
    if "first" in evidence:
        word[0] = evidence["first"]
    else: 
        word[0] = suggestion["first"]
        
    if "second" in evidence:
        word[1] = evidence["second"]
    else:
        word[1] = suggestion["second"]
    
    if "third" in evidence:
        word[2] = evidence["third"]
    else:
        word[2] = suggestion["third"]
    
    if "forth" in evidence:
        word[3] = evidence["forth"]
    else:
        word[3] = suggestion["forth"]
    
    if "fifth" in evidence:
        word[4] = evidence["fifth"]
    else:
        word[4] = suggestion["fifth"]
        
    return word

def word_is_valid(word, words, must_contain=[], must_not_contain=[], must_not_contain_at={}):
    if "".join(str(char) for char in word) in words:
        
        for letter in must_contain:
            if not letter in word:
                return False
        
        for letter in must_not_contain:
            if letter in word:
                return False
        
        if "first" in must_not_contain_at and word[0] in must_not_contain_at["first"]:
            return False
                
        if "second" in must_not_contain_at and word[1] in must_not_contain_at["second"]:
            return False
                
        if "third" in must_not_contain_at and word[2] in must_not_contain_at["third"]:
            return False
                
        if "forth" in must_not_contain_at and word[3] in must_not_contain_at["forth"]:
            return False
                
        if "fifth" in must_not_contain_at and word[4] in must_not_contain_at["fifth"]:
            return False
        
        return True
    else:
        return False
    
def get_suggestion(infer, variables, evidence, must_contain=[], must_not_contain=[], must_not_contain_at=[]):
    q = infer.query(variables, evidence=evidence)
    
    count_predictions = len(q.values.flatten()[q.values.flatten() != 0])
    max_value_indices = (-q.values.flatten()).argsort()[:count_predictions]

    print(count_predictions)
    
    result = []
    
    with open("wordle-at-words.json", "r") as f:
        words = json.load(f)
        
        for max_value_index in max_value_indices:
            indices = np.unravel_index(max_value_index, q.values.shape)

            suggestion = {}

            for index, variable in enumerate(q.variables):
                suggestion[variable] = model.get_cpds(variable).state_names[variable][indices[index]]

            word = get_suggestion_word(suggestion, evidence)

            if word_is_valid(word, words, must_contain, must_not_contain, must_not_contain_at):
                #result.append(word)
                return word
            
    return result

In [6]:
first = infer.map_query(["first"])["first"]
second = infer.map_query(["second"], evidence={"first": first})["second"]
third = infer.map_query(["third"], evidence={"first": first, "second": second})["third"]
forth = infer.map_query(["forth"], evidence={"first": first, "second": second, "third": third})["forth"]
fifth = infer.map_query(["fifth"], evidence={"first": first, "second": second, "third": third, "forth": forth})["fifth"]

print(first, second, third, forth, fifth)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

S A B E N


In [7]:
words[(words["first"] == "S") & (words["second"] == "A") & (words["third"] == "B")]

Unnamed: 0,first,second,third,forth,fifth
3397,S,A,B,O,R


In [12]:
suggestions = get_suggestion(
    infer, 
    variables=["first", "third"], 
    evidence={
        "second": "I",
        "forth": "F",
        "fifth":"E"
    }, 
    must_contain=["F"],
    must_not_contain=["S", "A", "B", "O", "R", "T", "M", "U"], 
    must_not_contain_at={
        "first":["F"], 
        "second":[], 
        "third":["F"],
        "forth":[],
        "fifth":[]
    }
)

suggestions

0it [00:00, ?it/s]

0it [00:00, ?it/s]

1160


['L', 'I', 'E', 'F', 'E']