# Setup
## Some items necessary for importing lists

In [1]:
import math
import statistics
from bs4 import BeautifulSoup
import requests
import pandas as pd
max_field_size = 17

In [2]:
# this will reverse the letters in a word
# this is needed because one of the files was generated with words all in reverse

def rev(word):
    reverse_word = ''
    for i in range(len(word)):
        reverse_word = reverse_word + (word[-(i+1)])
    return reverse_word

# Word lists
## Past Wordle words

In [None]:
html_page = requests.get('https://www.rockpapershotgun.com/wordle-past-answers')
soup = BeautifulSoup(html_page.content, 'html.parser')

In [None]:
wordle_soup = soup.find('div', class_="article_body").findAll('li')
soup_size = len(wordle_soup)
past_wordle_words = set()
for i in range(7):
    past_wordle_words.add(str(wordle_soup[i])[-14:-9])

for i in range(8, soup_size - 3):
    past_wordle_words.add(str(wordle_soup[i])[4:9])

len(past_wordle_words)

## All Wordle words

In [None]:
my_file = open("all_wordle_words.txt", "r")
data = my_file.read()
data_into_list = data.replace('\n', ' ').split(' ')
my_file.close()

all_wordle_words = set()

for word in data_into_list:
    if len(word) == 5 and '0' not in word and '1' not in word and '2' not in word:
        all_wordle_words.add(word.upper())

len(all_wordle_words)

## AG words from Evernote
### First the original list, which needs editing

In [3]:
my_file = open("evernote_words_2022_12_07.txt", "r")
data = my_file.read()
data_into_list = data.replace('\n', ' ').split(' ')
my_file.close()

In [4]:
evernote_strings = set()
for word in data_into_list:
    evernote_strings.add(rev(word))

In [5]:
# words will be a master dictionary consisting of all the words I have personally entered

ag_words = set()
for word in evernote_strings:
    if '-' not in word and len(word) == 10:
        for i in range(int(len(word) / 5)):
            slim = word[5*i:5*i+5]
            if '[' not in slim and ']' not in slim and '_' not in slim:
                ag_words.add(slim)
    if (len(word) == 5 and
    '-' not in word and
    '_' not in word and
    ',' not in word and
    ']' not in word and
    '*' not in word):
        ag_words.add(word)

len(ag_words)

1610

## Dictionary words

In [None]:
# this dictionary is fucked!

dic_df = pd.read_csv('dictionary.csv')
dic_set = set()
for i in range(54554):
    dic_set.add(dic_df['A'][i])
dic_words = set()
for word in dic_set:
    if len(word) == 5 and '-' not in word and ' ' not in word:
        dic_words.add(word.lower())

'Elbow' in dic_set
# len(dic_df['A'])
# len(dic_set)
# len(dic_words)

### Then additional words, which do not need editing

In [6]:
my_file = open("more_ag_words.txt", "r")
data = my_file.read()
data_into_list = data.replace('\n', ' ').split(" ")
my_file.close()

for word in data_into_list:
    if len(word) == 5:
        ag_words.add(word)

len(ag_words)

1660

# Core functions

In [7]:
# this function helps with the mechanics of generating a SCORE
# this will replace a letter in a word with another character
# this insures that, e.g., for a candidate such as DRILL,
# a guess of LADDY would only generate one yellow for D and one yelloe for L

def rep(word, place, letter):
    return word[0:place] + letter + word[place+1:len(word)]

In [8]:
# given a CANDIDATE and a GUESS, this generates the SCORE
# a SCORE is a 5 letter sequence of greens, yellows, and blanks

def gyb(candidate, guess):
    score = '_____'
    if len(candidate) != 5:
        print('The candidate ' + candidate + ' is not 5 letters long.')
        return
    elif len(guess) != 5:
        print('The guess is not 5 letters long.')
        return
    for i in range(5):
        if guess[i] == candidate[i]:
            score = rep(score, i, 'G')
            guess = rep(guess, i, '?')
            candidate = rep(candidate, i, '_')
    for i in range(5):
        for j in range(5):
            if guess[i] == candidate[j]:
                score = rep(score, i, 'Y')
                guess = rep(guess, i, '?')
                candidate = rep(candidate, j, '_')
    return score

In [9]:
# this will establish a set of all 3^5 SCOREs of the form GY__Y

all_scores = set()
for i in range(3**5):
    score = ''
    num = i
    for j in range(5):
        if num % 3 == 0:
            score = score + '_'
        if num % 3 == 1:
            score = score + 'G'
        if num % 3 == 2:
            score = score + 'Y'
        num = int(num / 3)
    all_scores.add(score)

In [10]:
# this takes an average of any list

def avg(item):
    return statistics.mean(item)

# Dealing with Wordle records
These functions will explain and establish SCOREBOOKs and GAMESTATEs

In [11]:
# this function will take a FIELD of CANDIDATES and a BOOK of GUESSES and return a SCOREBOOK
# the SCOREBOOK will have as its entries the GUESSES from the BOOK
# each entry in the SCOREBOOK will list all possible SCORES and associate them with a set of CANDIDATES from the FIELD

def scorebook(field, book):
    scorebook = {}
    for guess in book:
        scorebook[guess] = {}
        for score in all_scores:
            scorebook[guess][score] = set()
        for candidate in field:
            score = gyb(candidate, guess)
            scorebook[guess][score].add(candidate)
    return scorebook

In [12]:
# given a FIELD of CANDIDATES (words that might be the ANSWER),
# and a BOOK of GUESSES (words that the player can guess),
# ... NOTE: the FIELD and BOOK may or may not be equal ...
# this returns a GAMESTATE
# a GAMESTATE is an item that contains:
# 1) the FIELD
# 2) the BOOK
# 3) the SCOREBOOK
# the SCOREBOOK has ___ levels
# level A) it lists the GUESSES in the BOOK
# level B) at each GUESS, it lists all SCORES
# level C) at each SCORE, it lists each candidate (if any) that would give that SCORE for this GUESS

def gamestate(field, book):
    gamestate = {}
    gamestate['field'] = field
    gamestate['book'] = book
    gamestate['scorebook'] = scorebook(field, book)
    return gamestate

In [13]:
master_gamestate = gamestate(ag_words, ag_words)

In [14]:
master_scorebook = master_gamestate['scorebook']

In [None]:
expert_gamestate = gamestate(all_wordle_words, all_wordle_words)

# Optimizing Wordle
These functions will evaluate the strength of guesses and recommend best guesses

In [None]:
# given a SCOREBOOK and a GUESS, and given a list of SCORE_SIZES (which may or may not be empty)
# this adds more score sizes to that list
# and returns the SCORE_SIZES list with the added items

def add_score_size_logs(scorebook_guess, score_sizes):
    for score in all_scores:
        size = len(scorebook_guess[score])
        if size > 0:
            for candidate in scorebook_guess[score]:
                score_sizes.append(math.log(size,2))
    return score_sizes    

In [None]:
# given a GAMESTATE and a GUESS,
# this either tells you the GUESS is not in the BOOK or returns the average log subfield size

def tellscore(gamestate, guess):
    if guess not in gamestate['book']:
        return 'This guess is not in the book of eligible guesses'
    scorebook = gamestate['scorebook']
    return avg(add_score_size_logs(scorebook[guess], []))

In [None]:
# given a GAMESTATE,
# this will select the/a GUESS with the lowest average score size log
# this gives preference to GUESSes that belong to the FIELD of CANDIDATES

def bestguess(gamestate):
    field = gamestate['field']
    book = gamestate['book']
    scorebook = gamestate['scorebook']
    bestguess = ''
    lowest_avg = 1000
    for guess in book:
        guess_avg = tellscore(gamestate, guess)
        if guess_avg < lowest_avg or (guess_avg == lowest_avg and guess in field):
            bestguess = guess
            lowest_avg = guess_avg
    return [bestguess, lowest_avg]

In [None]:
bestguess(master_gamestate)

In [None]:
# this updates a GAMESTATE after a GUESS has been made, for which a SCORE has been received

def new_gamestate(gamestate, guess, score):
    new_gs = {}
    field = gamestate['scorebook'][guess][score]
    new_gs['field'] = field
    book = gamestate['book']
    new_gs['book'] = book
    new_gs['scorebook'] = scorebook(field, book)
    return new_gs

In [None]:
def nextguess(gs_0, guess_list, score_list):
    
    score_list = score_list.replace(' ', '')
    if len(score_list) %5 != 0:
        return 'One of the scores is the wrong length'
    else:
        score_count = int(len(score_list) / 5)
        
    guess_list = guess_list.replace(' ', '')
    if len(guess_list) %5 != 0:
        return 'One of the guesses is the wrong length'
    else:
        guess_count = int(len(guess_list) / 5)
    
    if guess_count > score_count:
        return 'There are too many guesses'
    
    gs = gs_0

    for turn in range(score_count):
        guess = guess_list[(5*turn) : (5*turn) + 5]
        if guess == '':
            guess = bestguess(gs)[0]
        score = score_list[(5*turn) : (5*turn) + 5]
        if score not in gs['scorebook'][guess]:
            return score + ' is not in the scorebook'
        elif len(gs['scorebook'][guess][score]) == 0:
            return 'There are no candidates corresponding to these scores'
        else:
            gs = new_gamestate(gs, guess, score)

    field = gs['field']
    if len(field) > max_field_size:
        print('The field has ' + str(len(field)) + ' candidates')
    else:
        print('The field of candidates is')
        print(field)

    print('The recommended next guess is ' + bestguess(gs)[0])

In [None]:
nextguess(expert_gamestate, '', '_____')

# Assessing what's missing

In [None]:
ag_scorebook = scorebook(ag_words, ['STERN'])['STERN']
official_scorebook = scorebook(all_wordle_words, ['STERN'])['STERN']
score_compare = {}
not_words = set()
for score in all_scores:
    score_compare[score] = len(official_scorebook[score]) - len(ag_scorebook[score])
    if score_compare[score] > 5:
        print(score + ' is missing ' + str(score_compare[score]) + ' words')

In [None]:
for word in ag_words:
    if word not in all_wordle_words:
        not_words.add(word)
not_words

In [None]:
len(not_words)

In [None]:
missing_logs = set()
for word in past_wordle_words:
    score = gyb(word, 'STERN')
    field = master_scorebook['STERN'][score]
    if len(field) == 0:
        missing_logs.add(score)
for log in missing_logs:
    print(log)

# Dealing with Quordle records
Some tweaks may be needed to deal with Quordle

# Optimizing Quordle

In [None]:
def nextguess_q(gs_0, guess_list, score_list):
    score_count = []
    for i in range(len(score_list)):
        score_list[i] = score_list[i].replace(' ', '')
        if len(score_list[i]) %5 != 0:
            return 'One of the scores is the wrong length'
        else:
            score_count.append(int(len(score_list[i]) / 5))

    guess_list = guess_list.replace(' ', '')
    if len(guess_list) %5 != 0:
        return 'One of the guesses is the wrong length'
    else:
        guess_count = int(len(guess_list) / 5)

    turns = max(score_count)

    if guess_count != turns:
        return 'The number of guesses needs to match the maximum number of scores'

    gs = [gs_0, gs_0, gs_0, gs_0]
    still_active = [True, True, True, True]

    for i in range(len(score_list)):

        if score_count[i] == turns:

            for turn in range(turns):
                guess = guess_list[(5*turn) : (5*turn) + 5]
                score = score_list[i][(5*turn) : (5*turn) + 5]
                if score not in gs[i]['scorebook'][guess]:
                    return score + ' is not in the scorebook'
                elif len(gs[i]['scorebook'][guess][score]) == 0:
                    return 'There are no candidates corresponding to these scores'
                else:
                    gs[i] = new_gamestate(gs_0, guess, score)

            print('Board #' + str(i + 1))
            if len(gs[i]['field']) > max_field_size:
                print('The field has ' + str(len(gs[i]['field'])) + ' candidates')
            else:
                print('The field of candidates is')
                print(gs[i]['field'])
            print('The recommended next guess is ' + bestguess(gs[i])[0])

        else:
            still_active[i] = False