# App

In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from joblib import Parallel, delayed

## Load Words

### Wordle Words

In [2]:
with open('data/wordle-candidates.json', 'r') as file:
    wordle_candidates = json.load(file)
    
with open('data/wordle-answers.json', 'r') as file:
    wordle_answers = json.load(file)

wordle_candidates = pd.DataFrame(wordle_candidates['words'], columns=['word'])
wordle_answers = pd.DataFrame(wordle_answers['words'], columns=['word'])
wordle_candidates['is_answer'] = 0
wordle_answers['is_answer'] = 1
wordle = wordle_candidates.append(wordle_answers).reset_index(drop=True)

### Popular Words

In [3]:
words_all = pd.read_table('data/archive/en_words_1_5-5.txt', delimiter=' ', header=None, index_col=None,
                         names=['word_len', 'word_freq', 'n_articles']).reset_index()
words_all = words_all.rename(columns={'index': 'word'})

# Filter by english
alphabet = list('abcdefghijklmnopqrstuvwxyz')
words_all = words_all.loc[words_all.word.apply(lambda x: all([l in alphabet for l in x]))].reset_index(drop=True)

# At least 100
# words_all = words_all.loc[words_all.word_freq.ge(10)]

In [4]:
words_all

Unnamed: 0,word,word_len,word_freq,n_articles
0,which,5,1220752,890394
1,first,5,1033698,751444
2,known,5,742591,654233
3,after,5,694687,537462
4,their,5,655785,443953
...,...,...,...,...
154840,showi,5,2,2
154841,gceap,5,2,1
154842,neroc,5,2,1
154843,hipep,5,2,1


## Game Logic

In [5]:
def get_feedback(input_word, solution):
    output = ''
    for i in range(5):
        if input_word[i] == solution[i]:
            output += 'G'
        elif input_word[i] in solution:
            output += 'Y'
        else:
            output += 'X'
    return output

In [6]:
def filter_wordset(input_word, feedback, wordset):
    newset = wordset.copy()
    for i in range(5):
        if feedback[i] == 'G':
            newset = newset.loc[newset.word.str[i] == input_word[i]]
        elif feedback[i] == 'Y':
            # newset = newset.loc[newset.word.str.contains(input_word[i])]
            newset = newset.loc[newset.word.str.contains(input_word[i]) & newset.word.apply(lambda x: x[i] != input_word[i])]
        else:
            newset = newset.loc[~newset.word.str.contains(input_word[i])]
    return newset

In [7]:
def compute_letter_frequencies(wordset):
    w = wordset.copy()
    for letter in list('abcdefghijklmnopqrstuvwxyz'):
        w[letter] = w.word.str.contains(letter).astype(int)
    return w.iloc[:, 1:]

def compute_score(x, freqs):
    letters = set(x)
    output = 0
    for letter in letters:
        output += freqs[letter]
    return output

In [8]:
global_freqs = compute_letter_frequencies(wordle).sum().to_dict()
global_scores = wordle.word.apply(compute_score, freqs=global_freqs)
global_scores = pd.DataFrame({'word': wordle.word, 'score': global_scores}).sort_values('score', ascending=False)

In [9]:
def compute_pos_letter_distribution(wordset):
    pos_scores = {}
    pos_scores[0] = wordset.word.str[0].value_counts().to_dict()
    pos_scores[1] = wordset.word.str[1].value_counts().to_dict()
    pos_scores[2] = wordset.word.str[2].value_counts().to_dict()
    pos_scores[3] = wordset.word.str[3].value_counts().to_dict()
    pos_scores[4] = wordset.word.str[4].value_counts().to_dict()
    
    return pos_scores

In [10]:
def compute_pos_score(letters, pos_scores):
    output = 0
    for i, letter in enumerate(letters):
        output += pos_scores[i].get(letter, 0)
    return output

### Compute Letter Frequency by Position

In [11]:
pos_scores = compute_pos_letter_distribution(wordle)
global_fl_scores = wordle.word.apply(compute_pos_score, pos_scores=pos_scores)
global_fl_scores = pd.DataFrame({'word': wordle.word, 'score': global_fl_scores}).sort_values('score', ascending=False)

In [12]:
# Merge additional data
wordle = wordle.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
wordle = wordle.fillna(0)

# Merge additional data
global_fl_scores = global_fl_scores.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
global_fl_scores = global_fl_scores.fillna(0)

In [24]:
def app_wordle_double(input_word1, input_word2):
    feedback = ''
    wordset = wordle.copy()
    tested_words = []
    n_iter = 1
    
    input_word = input_word1

    while feedback.lower() != 'ggggg' or feedback != 'quit':

        # Check solution
        feedback = input('Enter feedback:')
        if feedback.lower() in ['quit', 'q']:
            return
        elif feedback.lower() == 'ggggg':
            print(f'Solved in {n_iter} steps.')
            return
        tested_words.append(input_word)
        print(f'[{n_iter}] Input: {input_word} | Feedback: {feedback}')

        # Filter wordset
        wordset = filter_wordset(input_word, feedback.upper(), wordset)
        wordset = wordset.loc[~wordset.word.isin(tested_words)]

        # Compute letter distribution of updated wordset
        pos_scores = compute_pos_letter_distribution(wordset)

        # Obtain scores
        scores = wordset.word.apply(compute_pos_score, pos_scores=pos_scores)
        scores = pd.DataFrame({'word': wordset.word, 'score': scores,
                              'word_freq': wordset.word_freq,
                              'n_articles': wordset.n_articles}).sort_values('score', ascending=False)
        scores = scores.sort_values(['score', 'word_freq'], ascending=False)
        # display(scores.head(10))

        # Set new input word
        if scores.shape[0] > 0:
            if len(tested_words) < 2:
                input_word = input_word2
            else:
                display(scores.head(10))
                input_word = input('Choose a word:')
                if input_word in ['quit', 'q']:
                    return
            n_iter += 1
    print(f'Solved in {n_iter} steps.')

In [22]:
def app_wordle_global_double(input_word1, input_word2):
    feedback = ''
    wordset = wordle.copy()
    tested_words = []
    n_iter = 1
    
    input_word = input_word1

    while feedback.lower() != 'ggggg' or feedback != 'quit':

        # Check solution
        feedback = input('Enter feedback:')
        if feedback.lower() in ['quit', 'q']:
            return
        elif feedback.lower() == 'ggggg':
            print(f'Solved in {n_iter} steps.')
            return
        tested_words.append(input_word)
        print(f'[{n_iter}] Input: {input_word} | Feedback: {feedback}')

        # Filter wordset
        wordset = filter_wordset(input_word, feedback.upper(), wordset)
        wordset = wordset.loc[~wordset.word.isin(tested_words)]

        # Compute letter distribution of updated wordset
        wordset_letterdist = compute_letter_frequencies(wordset)
        freqs = wordset_letterdist.sum().to_dict()

        # Obtain scores
        scores = wordset.word.apply(compute_score, freqs=freqs)
        scores = pd.DataFrame({'word': wordset.word, 'score': scores, 'word_freq': wordset.word_freq}) \
            .sort_values(['score', 'word_freq'], ascending=False)
        # display(scores.head(10))

        # Set new input word
        if scores.shape[0] > 0:
            if len(tested_words) < 2:
                input_word = input_word2
            else:
                display(scores.head(10))
                input_word = input('Choose a word:')
                if input_word in ['quit', 'q']:
                    return
            n_iter += 1
    print(f'Solved in {n_iter} steps.')

## Run

In [None]:
app_wordle_double('tales', 'corni')

Enter feedback: yyxyx


[1] Input: tales | Feedback: yyxyx


Enter feedback: xxxxx


[2] Input: corni | Feedback: xxxxx


Unnamed: 0,word,score,word_freq,n_articles
749,beath,51,72.0,33.0
751,beaty,50,192.0,93.0
10678,death,47,151842.0,121176.0
5591,meath,47,3046.0,1490.0
10667,heath,46,8879.0,5218.0
11948,meaty,46,93.0,81.0
6673,peaty,45,124.0,113.0
753,beaut,43,16.0,12.0
830,betta,40,291.0,148.0
10759,agate,38,386.0,201.0


Choose a word: beath
Enter feedback: yyggx


[3] Input: beath | Feedback: yyggx


Unnamed: 0,word,score,word_freq,n_articles
10675,abate,5,317.0,166.0


In [17]:
app_wordle_global_double('soare', 'rutin')

Enter feedback: quit
