# App

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from joblib import Parallel, delayed

## Load Words

In [2]:
words_12k = pd.read_csv('words_12k.csv')
words_5k = pd.read_csv('words_5k.csv')
words_all = pd.read_table('data/en_words_1_5-5.txt', delimiter=' ', header=None, index_col=None,
                         names=['word_len', 'word_freq', 'n_articles']).reset_index()
words_all = words_all.rename(columns={'index': 'word'})

In [3]:
# Filter by english
alphabet = list('abcdefghijklmnopqrstuvwxyz')
words_all = words_all.loc[words_all.word.apply(lambda x: all([l in alphabet for l in x]))].reset_index(drop=True)

# At least 100
words_all = words_all.loc[words_all.word_freq.ge(100)]

## Game Logic

In [4]:
def wordle(input_word, solution):
    output = ''
    for i in range(5):
        if input_word[i] == solution[i]:
            output += 'G'
        elif input_word[i] in solution:
            output += 'Y'
        else:
            output += 'X'
    return output

In [5]:
def filter_wordset(input_word, feedback, wordset):
    newset = wordset.copy()
    for i in range(5):
        if feedback[i] == 'G':
            newset = newset.loc[newset.word.str[i] == input_word[i]]
        elif feedback[i] == 'Y':
            # newset = newset.loc[newset.word.str.contains(input_word[i])]
            newset = newset.loc[newset.word.str.contains(input_word[i]) & newset.word.apply(lambda x: x[i] != input_word[i])]
        else:
            newset = newset.loc[~newset.word.str.contains(input_word[i])]
    return newset

In [6]:
def compute_pos_letter_distribution(wordset):
    pos_scores = {}
    pos_scores[0] = wordset.word.str[0].value_counts().to_dict()
    pos_scores[1] = wordset.word.str[1].value_counts().to_dict()
    pos_scores[2] = wordset.word.str[2].value_counts().to_dict()
    pos_scores[3] = wordset.word.str[3].value_counts().to_dict()
    pos_scores[4] = wordset.word.str[4].value_counts().to_dict()
    
    return pos_scores

In [7]:
def compute_pos_score(letters, pos_scores):
    output = 0
    for i, letter in enumerate(letters):
        output += pos_scores[i][letter]
    return output

## Setup

In [8]:
pos_scores = compute_pos_letter_distribution(words_12k)
global_fl_scores = words_12k.word.apply(compute_pos_score, pos_scores=pos_scores)
global_fl_scores = pd.DataFrame({'word': words_12k.word, 'score': global_fl_scores}).sort_values('score', ascending=False)

In [9]:
# Merge additional data
words_12k = words_12k.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
words_12k = words_12k.fillna(0)

# Merge additional data
global_fl_scores = global_fl_scores.merge(words_all[['word', 'word_freq', 'n_articles']], how='left', left_on='word', right_on='word')
global_fl_scores = global_fl_scores.fillna(0)

In [10]:
def app_wordle_double(input_word1, input_word2):
    feedback = ''
    wordset = words_12k.copy()
    tested_words = []
    n_iter = 1
    
    input_word = input_word1

    while feedback.lower() != 'ggggg' or feedback != 'quit':

        # Check solution
        feedback = input('Enter feedback:')
        if feedback.lower() == 'quit':
            return
        elif feedback.lower() == 'ggggg':
            print(f'Solved in {n_iter} steps.')
            return
        tested_words.append(input_word)
        print(f'[{n_iter}] Input: {input_word} | Feedback: {feedback}')

        # Filter wordset
        wordset = filter_wordset(input_word, feedback.upper(), wordset)
        wordset = wordset.loc[~wordset.word.isin(tested_words)]

        # Compute letter distribution of updated wordset
        pos_scores = compute_pos_letter_distribution(wordset)

        # Obtain scores
        scores = wordset.word.apply(compute_pos_score, pos_scores=pos_scores)
        scores = pd.DataFrame({'word': wordset.word, 'score': scores,
                              'word_freq': wordset.word_freq,
                              'n_articles': wordset.n_articles}).sort_values('score', ascending=False)
        scores = scores.sort_values(['score', 'n_articles'], ascending=False)

        # Set new input word
        if scores.shape[0] > 0:
            if len(tested_words) < 2:
                input_word = input_word2
            else:
                display(scores.head(10))
                input_word = input('Choose a word:')
                if input_word == 'quit':
                    return
            n_iter += 1
    print(f'Solved in {n_iter} steps.')

In [None]:
app_wordle_double('pares', 'doilt')

Enter feedback: xxxxx


[1] Input: pares | Feedback: xxxxx


Enter feedback: gxxxx


[2] Input: doilt | Feedback: gxxxx


Unnamed: 0,word,score,word_freq,n_articles
3033,ducky,27,173.0,112.0
3058,dumky,27,0.0,0.0
3067,dungy,27,0.0,0.0
3070,dunny,27,0.0,0.0
3031,duchy,26,8985.0,5332.0
3059,dummy,26,813.0,543.0
3035,duddy,25,101.0,64.0
3064,dunch,21,0.0,0.0
