# Human App

In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from joblib import Parallel, delayed

## Load Words

### Wordle Words

In [2]:
with open('data/wordle-candidates.json', 'r') as file:
    wordle_candidates = json.load(file)
    
with open('data/wordle-answers.json', 'r') as file:
    wordle_answers = json.load(file)

wordle_candidates = pd.DataFrame(wordle_candidates['words'], columns=['word'])
wordle_answers = pd.DataFrame(wordle_answers['words'], columns=['word'])
wordle_candidates['is_answer'] = 0
wordle_answers['is_answer'] = 1
wordle = wordle_candidates.append(wordle_answers).reset_index(drop=True)

### Popular Words

In [3]:
words_all = pd.read_table('data/archive/en_words_1_5-5.txt', delimiter=' ', header=None, index_col=None,
                         names=['word_len', 'word_freq', 'n_articles']).reset_index()
words_all = words_all.rename(columns={'index': 'word'})

# Filter by english
alphabet = list('abcdefghijklmnopqrstuvwxyz')
words_all = words_all.loc[words_all.word.apply(lambda x: all([l in alphabet for l in x]))].reset_index(drop=True)

# At least 100
# words_all = words_all.loc[words_all.word_freq.ge(10)]

In [8]:
def compute_letter_frequencies(wordset):
    w = wordset.copy()
    for letter in list('abcdefghijklmnopqrstuvwxyz'):
        w[letter] = w.word.str.contains(letter).astype(int)
    return w.iloc[:, 1:]

def compute_score(x, freqs):
    letters = set(x)
    output = 0
    for letter in letters:
        output += freqs[letter]
    return output

In [5]:
global_freqs = compute_letter_frequencies(wordle).sum().to_dict()
global_scores = wordle.word.apply(compute_score, freqs=global_freqs)
global_scores = pd.DataFrame({'word': wordle.word, 'score': global_scores}).sort_values('score', ascending=False)

## Game Logic

In [6]:
def get_feedback(input_word, solution):
    output = ''
    for i in range(5):
        if input_word[i] == solution[i]:
            output += 'G'
        elif input_word[i] in solution:
            output += 'Y'
        else:
            output += 'X'
    return output

In [7]:
def filter_wordset(input_word, feedback, wordset):
    newset = wordset.copy()
    for i in range(5):
        if feedback[i] == 'G':
            newset = newset.loc[newset.word.str[i] == input_word[i]]
        elif feedback[i] == 'Y':
            # newset = newset.loc[newset.word.str.contains(input_word[i])]
            newset = newset.loc[newset.word.str.contains(input_word[i]) & newset.word.apply(lambda x: x[i] != input_word[i])]
        else:
            newset = newset.loc[~newset.word.str.contains(input_word[i])]
    return newset

In [8]:
def compute_letter_frequencies(wordset):
    w = wordset.copy()
    for letter in list('abcdefghijklmnopqrstuvwxyz'):
        w[letter] = w.word.str.contains(letter).astype(int)
    return w.iloc[:, 1:]

def compute_score(x, freqs):
    letters = set(x)
    output = 0
    for letter in letters:
        output += freqs[letter]
    return output

## Full Dataset

In [9]:
# Merge additional data
wordle = wordle.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
wordle = wordle.fillna(0)

## App

In [37]:
def run_app(pre_load=None):
    mask = np.array([True] * wordle.shape[0])
    step = 1
    fb = ''
    scores = pd.DataFrame([{'a': 1}, {'a': 1}])
    w = wordle.copy()
    w_full = wordle.copy()
    w_full = w_full.merge(global_scores[['word', 'score']], how='left', left_on='word', right_on='word')
    res = pd.DataFrame([{'a': 1}, {'a': 1}])
    tested_words = []
    all_chars = []
    
    while scores.shape[0] > 1 and fb.lower() != 'ggggg':
        print(f'[ ---- STEP {step} ----]')
        if not (step == 1 and pre_load):
            guess = input('Input a guess:')
        else:
            guess = pre_load
        if guess.lower() in ['quit', 'q']:
            return
        tested_words.append(guess)
        all_chars = all_chars + list(set(guess))
        all_chars = list(set(all_chars))
        
        fb = input('Input feedback:')
        if fb.lower() in ['quit', 'q']:
            return
        
        w = filter_wordset(guess, fb.upper(), w)
        w = w.loc[~w.word.isin(tested_words)]
        
        # Candidates
        print(f'Found {w.shape[0]} candidates. Running analysis...')
        # Get scores
        scores = pd.DataFrame({'word': w.word, 'word_freq': w.word_freq})
        scores['lettermatch'] = scores.word.apply(lambda x: np.sum([x.count(l) for l in all_chars]))
        w_full['lettermatch'] = w_full.word.apply(lambda x: np.sum([x.count(l) for l in all_chars]))
        df_filter = w_full.sort_values(['lettermatch', 'score', 'word_freq'], ascending=[True, False, False])
        df_solve = scores.sort_values(['word_freq', 'lettermatch'], ascending=[False, True])
        
        print(f'Suggestions for step {step + 1}:')
        
        if w.shape[0] <= 20:
            print(f'Small number of candidates remaining ({w.shape[0]}). We recommend attempting to solve:')
            display(df_solve.head(5))
        
        # Check for repeats
        elif w.shape[0] <= 10 and w.shape[0] >= 3:
            w_copy = scores.sort_values('word_freq', ascending=False).copy()
            # Extract letters
            for i in range(5):
                w_copy[f'p{i}'] = w_copy.word.str[i]
            
            # Count the number of unique columns
            unique_mask = w_copy.iloc[:, -5:].nunique() > 1
            
            # If only 1, then recommend another word
            if unique_mask.sum() == 1:
                wc = wordle.copy()
                total_letters = w_copy.shape[0]
                wc['scores'] = 0
                wc['counts'] = 0
                for i, letter in enumerate(np.squeeze(w_copy[unique_mask.index[unique_mask]].values)):
                    wc['scores'] = wc['scores'] + (total_letters - i) * wc.word.str.contains(letter).astype(int)
                    wc['counts'] = wc['counts'] + wc.word.str.contains(letter).astype(int)
                    
                print(f'\nWords with only one letter differential detected. Consider filtering:')
                display(wc.loc[wc.counts.le(total_letters // 2 * 3)].sort_values('scores', ascending=False).head(5))
        else:
            print(f'Large number of candidates remaining ({w.shape[0]}). We recommend filtering further:')
            display(df_filter.head(5))
            print(f'\n But, if you want to solve:')
            display(df_solve.head(5))
                
        step += 1
        print()

In [None]:
run_app(pre_load='arise')

[ ---- STEP 1 ----]


Input feedback: yxxxx


Found 796 candidates. Running analysis...
Suggestions for step 2:
Large number of candidates remaining (796). We recommend filtering further:


Unnamed: 0,word,is_answer,word_freq,n_articles,score,lettermatch
5323,lound,0,61.0,25.0,14546,0
6251,nould,0,0.0,0.0,14546,0
11670,donut,1,300.0,201.0,14465,0
12237,clout,1,180.0,150.0,14414,0
6919,pluto,0,1083.0,650.0,14379,0



 But, if you want to solve:


Unnamed: 0,word,word_freq,lettermatch
11758,local,182670.0,1
11283,coach,130438.0,1
4429,japan,124684.0,2
10957,black,121935.0,1
11230,total,115817.0,1



[ ---- STEP 2 ----]


Input a guess: donut
Input feedback: xxyxx


Found 47 candidates. Running analysis...
Suggestions for step 3:
Large number of candidates remaining (47). We recommend filtering further:


Unnamed: 0,word,is_answer,word_freq,n_articles,score,lettermatch
11341,lymph,1,1076.0,494.0,10606,0
11193,glyph,1,297.0,222.0,10281,0
3489,ghyll,0,41.0,23.0,8396,0
3151,flyby,0,276.0,206.0,7654,0
12789,pygmy,1,1892.0,920.0,7327,0



 But, if you want to solve:


Unnamed: 0,word,word_freq,lettermatch
4429,japan,124684.0,3
10665,naval,38766.0,3
1518,chang,4553.0,2
11243,pagan,2935.0,3
11934,flank,2643.0,2



[ ---- STEP 3 ----]


Input a guess: lymph
Input feedback: yxxxx


Found 10 candidates. Running analysis...
Suggestions for step 4:
Small number of candidates remaining (10). We recommend attempting to solve:


Unnamed: 0,word,word_freq,lettermatch
10665,naval,38766.0,4
11934,flank,2643.0,3
11694,blank,2429.0,3
4793,klang,1091.0,3
6015,nabla,520.0,4



[ ---- STEP 4 ----]


## Sim

In [11]:
def run_sim(input_word, solution):
    fb = ''
    w = wordle.copy()
    tested_words = []
    ncands = []
    step = 1
    
    guess = input_word

    while fb != 'GGGGG':
        
        if step == 1:
            guess = input_word
            
        # Check solution
        fb = get_feedback(guess, solution)
        tested_words.append(guess)
        
        # Filter wordset
        w = filter_wordset(guess, fb, w)
        w = w.loc[~wordset.word.isin(tested_words)]
        ncands.append(wordset.shape[0])
        
        # Get scores
        scores = pd.DataFrame({'word': w.word, 'score': w.word_freq}) \
            .sort_values('score', ascending=False)
        
        # Filters
        if w.shape[0] > 10:
            guess = scores.sort_values('score', ascending=False).word.iloc[0]
        elif w.shape[0] <= 10:
            if w.shape[0] <= 8 and w.shape[0] >= 3:
                w_copy = scores.sort_values('score', ascending=False).copy()
                # Extract letters
                for i in range(5):
                    w_copy[f'p{i}'] = w_copy.word.str[i]

                # Count the number of unique columns
                unique_mask = w_copy.iloc[:, -5:].nunique() > 1

                # If only 1, then recommend another word
                if unique_mask.sum() == 1:
                    wc = wordle.copy()
                    total_letters = w_copy.shape[0]
                    wc['scores'] = 0
                    wc['counts'] = 0
                    for i, letter in enumerate(np.squeeze(w_copy[unique_mask.index[unique_mask]].values)):
                        wc['scores'] = wc['scores'] + (total_letters - i) * wc.word.str.contains(letter).astype(int)
                        wc['counts'] = wc['counts'] + wc.word.str.contains(letter).astype(int)

                    special_res = wc.loc[wc.counts.le(total_letters // 2 * 3)].sort_values('scores', ascending=False)
                    guess = special_res.word.iloc[0]
                else:
                    guess = scores.sort_values('score', ascending=False).word.iloc[0]
            else:
                guess = scores.sort_values('score', ascending=False).word.iloc[0]
        if not fb.upper() == 'GGGGG':
            step += 1
            
    return input_word, solution, step, ncands, tested_words

## Run

In [None]:
results = Parallel(n_jobs=5, verbose=1)(delayed(run_sim)(input_word, solution) \
                                       for input_word in ['soare', 'roate', 'raise'] \
                                       for solution in wordle_answers.word)

Enter feedback: yyxyx


[1] Input: tales | Feedback: yyxyx


Enter feedback: xxxxx


[2] Input: corni | Feedback: xxxxx


Unnamed: 0,word,score,word_freq,n_articles
749,beath,51,72.0,33.0
751,beaty,50,192.0,93.0
10678,death,47,151842.0,121176.0
5591,meath,47,3046.0,1490.0
10667,heath,46,8879.0,5218.0
11948,meaty,46,93.0,81.0
6673,peaty,45,124.0,113.0
753,beaut,43,16.0,12.0
830,betta,40,291.0,148.0
10759,agate,38,386.0,201.0


Choose a word: beath
Enter feedback: yyggx


[3] Input: beath | Feedback: yyggx


Unnamed: 0,word,score,word_freq,n_articles
10675,abate,5,317.0,166.0
