# App

In [25]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from joblib import Parallel, delayed

## Load Words

### Wordle Words

In [26]:
with open('data/wordle-candidates.json', 'r') as file:
    wordle_candidates = json.load(file)
    
with open('data/wordle-answers.json', 'r') as file:
    wordle_answers = json.load(file)

wordle_candidates = pd.DataFrame(wordle_candidates['words'], columns=['word'])
wordle_answers = pd.DataFrame(wordle_answers['words'], columns=['word'])
wordle_candidates['is_answer'] = 0
wordle_answers['is_answer'] = 1
wordle = wordle_candidates.append(wordle_answers).reset_index(drop=True)

### Popular Words

In [27]:
words_all = pd.read_table('data/archive/en_words_1_5-5.txt', delimiter=' ', header=None, index_col=None,
                         names=['word_len', 'word_freq', 'n_articles']).reset_index()
words_all = words_all.rename(columns={'index': 'word'})

# Filter by english
alphabet = list('abcdefghijklmnopqrstuvwxyz')
words_all = words_all.loc[words_all.word.apply(lambda x: all([l in alphabet for l in x]))].reset_index(drop=True)

# At least 100
words_all = words_all.loc[words_all.word_freq.ge(100)]

In [96]:
words_all

Unnamed: 0,word,word_len,word_freq,n_articles
0,which,5,1220752,890394
1,first,5,1033698,751444
2,known,5,742591,654233
3,after,5,694687,537462
4,their,5,655785,443953
...,...,...,...,...
12600,baras,5,100,59
12601,wurtz,5,100,49
12602,jafri,5,100,56
12603,makki,5,100,46


## Game Logic

In [28]:
def get_feedback(input_word, solution):
    output = ''
    for i in range(5):
        if input_word[i] == solution[i]:
            output += 'G'
        elif input_word[i] in solution:
            output += 'Y'
        else:
            output += 'X'
    return output

In [29]:
def filter_wordset(input_word, feedback, wordset):
    newset = wordset.copy()
    for i in range(5):
        if feedback[i] == 'G':
            newset = newset.loc[newset.word.str[i] == input_word[i]]
        elif feedback[i] == 'Y':
            # newset = newset.loc[newset.word.str.contains(input_word[i])]
            newset = newset.loc[newset.word.str.contains(input_word[i]) & newset.word.apply(lambda x: x[i] != input_word[i])]
        else:
            newset = newset.loc[~newset.word.str.contains(input_word[i])]
    return newset

In [30]:
def compute_letter_frequencies(wordset):
    w = wordset.copy()
    for letter in list('abcdefghijklmnopqrstuvwxyz'):
        w[letter] = w.word.str.contains(letter).astype(int)
    return w.iloc[:, 1:]

def compute_score(x, freqs):
    letters = set(x)
    output = 0
    for letter in letters:
        output += freqs[letter]
    return output

In [31]:
global_freqs = compute_letter_frequencies(wordle).sum().to_dict()
global_scores = wordle.word.apply(compute_score, freqs=global_freqs)
global_scores = pd.DataFrame({'word': wordle.word, 'score': global_scores}).sort_values('score', ascending=False)

In [32]:
def compute_pos_letter_distribution(wordset):
    pos_scores = {}
    pos_scores[0] = wordset.word.str[0].value_counts().to_dict()
    pos_scores[1] = wordset.word.str[1].value_counts().to_dict()
    pos_scores[2] = wordset.word.str[2].value_counts().to_dict()
    pos_scores[3] = wordset.word.str[3].value_counts().to_dict()
    pos_scores[4] = wordset.word.str[4].value_counts().to_dict()
    
    return pos_scores

In [33]:
def compute_pos_score(letters, pos_scores):
    output = 0
    for i, letter in enumerate(letters):
        output += pos_scores[i].get(letter, 0)
    return output

### Compute Letter Frequency by Position

In [34]:
pos_scores = compute_pos_letter_distribution(wordle)
global_fl_scores = wordle.word.apply(compute_pos_score, pos_scores=pos_scores)
global_fl_scores = pd.DataFrame({'word': wordle.word, 'score': global_fl_scores}).sort_values('score', ascending=False)

In [35]:
# Merge additional data
wordle = wordle.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
wordle = wordle.fillna(0)

# Merge additional data
global_fl_scores = global_fl_scores.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
global_fl_scores = global_fl_scores.fillna(0)

In [36]:
def app_wordle_double(input_word1, input_word2):
    feedback = ''
    wordset = wordle.copy()
    tested_words = []
    n_iter = 1
    
    input_word = input_word1

    while feedback.lower() != 'ggggg' or feedback != 'quit':

        # Check solution
        feedback = input('Enter feedback:')
        if feedback.lower() == 'quit':
            return
        elif feedback.lower() == 'ggggg':
            print(f'Solved in {n_iter} steps.')
            return
        tested_words.append(input_word)
        print(f'[{n_iter}] Input: {input_word} | Feedback: {feedback}')

        # Filter wordset
        wordset = filter_wordset(input_word, feedback.upper(), wordset)
        wordset = wordset.loc[~wordset.word.isin(tested_words)]

        # Compute letter distribution of updated wordset
        pos_scores = compute_pos_letter_distribution(wordset)

        # Obtain scores
        scores = wordset.word.apply(compute_pos_score, pos_scores=pos_scores)
        scores = pd.DataFrame({'word': wordset.word, 'score': scores,
                              'word_freq': wordset.word_freq,
                              'n_articles': wordset.n_articles}).sort_values('score', ascending=False)
        scores = scores.sort_values(['score'], ascending=False)
        # display(scores.head(10))

        # Set new input word
        if scores.shape[0] > 0:
            if len(tested_words) < 2:
                input_word = input_word2
            else:
                display(scores.head(10))
                input_word = input('Choose a word:')
                if input_word == 'quit':
                    return
            n_iter += 1
    print(f'Solved in {n_iter} steps.')

In [37]:
def app_wordle_global_double(input_word1, input_word2):
    feedback = ''
    wordset = wordle.copy()
    tested_words = []
    n_iter = 1
    
    input_word = input_word1

    while feedback.lower() != 'ggggg' or feedback != 'quit':

        # Check solution
        feedback = input('Enter feedback:')
        if feedback.lower() == 'quit':
            return
        elif feedback.lower() == 'ggggg':
            print(f'Solved in {n_iter} steps.')
            return
        tested_words.append(input_word)
        print(f'[{n_iter}] Input: {input_word} | Feedback: {feedback}')

        # Filter wordset
        wordset = filter_wordset(input_word, feedback.upper(), wordset)
        wordset = wordset.loc[~wordset.word.isin(tested_words)]

        # Compute letter distribution of updated wordset
        wordset_letterdist = compute_letter_frequencies(wordset)
        freqs = wordset_letterdist.sum().to_dict()

        # Obtain scores
        scores = wordset.word.apply(compute_score, freqs=freqs)
        scores = pd.DataFrame({'word': wordset.word, 'score': scores, 'word_freq': wordset.word_freq}) \
            .sort_values(['score', 'word_freq'], ascending=False)
        # display(scores.head(10))

        # Set new input word
        if scores.shape[0] > 0:
            if len(tested_words) < 2:
                input_word = input_word2
            else:
                display(scores.head(10))
                input_word = input('Choose a word:')
                if input_word == 'quit':
                    return
            n_iter += 1
    print(f'Solved in {n_iter} steps.')

## Run

In [91]:
app_wordle_double('pares', 'doilt')

Enter feedback: xxgyy


[1] Input: pares | Feedback: xxgyy


Enter feedback: xxxxx


[2] Input: doilt | Feedback: xxxxx


Unnamed: 0,word,score,word_freq,n_articles
4026,herse,40,0.0,0.0
5643,merse,40,0.0,0.0
8115,serge,40,2225.0,1543.0
8122,serre,40,332.0,204.0
11464,verse,40,7694.0,5712.0
10666,serve,39,35195.0,32791.0
1303,burse,35,0.0,0.0
11379,nurse,35,5961.0,4255.0
12372,curse,35,2909.0,1951.0
12666,surge,35,1662.0,1348.0


Choose a word: herse
Enter feedback: xggyg


[3] Input: herse | Feedback: xggyg


Unnamed: 0,word,score,word_freq,n_articles
8115,serge,13,2225.0,1543.0
8122,serre,13,332.0,204.0
10666,serve,13,35195.0,32791.0


Choose a word: quit


In [95]:
app_wordle_global_double('resin', 'ploat')

Enter feedback: yxxxx


[1] Input: resin | Feedback: yxxxx


Enter feedback: xxxyx


[2] Input: ploat | Feedback: xxxyx


Unnamed: 0,word,score
1417,carby,252.0
677,bardy,250.0
1420,cardy,250.0
2094,darcy,250.0
687,barmy,248.0
276,ambry,248.0
5512,mardy,246.0
685,barky,246.0
1152,braky,246.0
1528,chary,245.0


Choose a word: carby
Enter feedback: xggxx


[3] Input: carby | Feedback: xggxx


Unnamed: 0,word,score
3394,garum,23.0
2098,darga,20.0
3387,garda,20.0
5518,marka,20.0
10670,karma,20.0
2,aargh,19.0
5526,marra,18.0


Choose a word: garum
Enter feedback: xggxy


[4] Input: garum | Feedback: xggxy


Unnamed: 0,word,score
5518,marka,11.0
10670,karma,11.0
5526,marra,9.0


KeyboardInterrupt: Interrupted by user