# Wordle Step 1

In [1]:
import json
import numpy as np
import pandas as pd
import sqlite3
import wordlebot

from joblib import Parallel, delayed
from tqdm.notebook import tqdm

## Load Data

In [2]:
wordle_candidates, wordle_answers = wordlebot.load_data('data')
wordle = wordle_candidates.loc[
    wordle_candidates.word.apply(lambda x: len(x)==len(set(x)))
].append(wordle_answers).reset_index(drop=True)

## Compute Letter Frequencies

In [3]:
from wordlebot.lf import compute_letter_frequencies, compute_lf_score

lf_freqs = compute_letter_frequencies(wordle).sum().to_dict()
lf_scores = pd.DataFrame({'word': wordle.word,
                          'letter_freq': wordle.word.apply(compute_lf_score, freqs=lf_freqs)})

## Compute Green/Yellow/Grey (GYX) Scores

In [4]:
from wordlebot.gyx import get_gyx_scores_all, compute_ncands_all, summarise_ncands

df_scores = get_gyx_scores_all(wordle, wordle_answers)

  0%|          | 0/9071 [00:00<?, ?it/s]

[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  65 tasks      | elapsed:    0.8s
[Parallel(n_jobs=5)]: Done 4325 tasks      | elapsed:    5.0s
[Parallel(n_jobs=5)]: Done 9071 out of 9071 | elapsed:    9.4s finished


In [5]:
shortlist = df_scores.loc[df_scores.green_rank.le(10) | \
                          df_scores.yellow_rank.le(10) | 
                          df_scores.wt_avg_rank.le(10)]

## Compute No. of Candidates

In [6]:
df_ncands = compute_ncands_all(shortlist, wordle_answers, wordle_answers)
df_ncands_sum = summarise_ncands(df_ncands)

## Combine Results

In [11]:
df = shortlist.merge(df_ncands_sum, how='left', on='word')
df['ncands_max_rank'] = df.ncands_max.rank()
df['ncands_mean_rank'] = df.ncands_mean.rank()
df['bucket_entropy_rank'] = df.bucket_entropy.rank(ascending=False)
df['avg_rank'] = df[['wt_avg_rank', 'ncands_max_rank', 'bucket_entropy_rank']].mean(axis=1)

In [12]:
df.sort_values('avg_rank')

Unnamed: 0,word,green_avg,yellow_avg,weighted_avg,green_rank,yellow_rank,wt_avg_rank,ncands_max,ncands_mean,nbuckets,bucket_entropy,ncands_max_rank,ncands_mean_rank,bucket_entropy_rank,avg_rank
6,soare,0.660043,1.107991,3.088121,2.0,1561.5,1.0,183,62.30108,127,4.079837,1.0,1.0,1.0,1.0
3,saine,0.666091,0.966739,2.965011,1.0,3963.0,4.0,207,72.592225,136,3.995355,3.0,4.0,5.0,4.0
15,slate,0.620734,1.063067,2.92527,5.5,2210.0,5.0,221,71.572786,147,4.058914,5.0,3.0,2.0,4.0
5,slane,0.639309,0.993952,2.911879,4.0,3450.0,9.0,225,73.985745,133,3.999585,6.0,5.0,4.0,6.333333
16,stare,0.572786,1.192657,2.911015,53.0,703.0,10.0,227,71.2946,133,4.0253,7.0,2.0,3.0,6.666667
2,saice,0.653132,0.935637,2.895032,3.0,4624.0,11.0,211,76.517927,124,3.94487,4.0,6.0,6.0,7.0
19,erase,0.536069,1.397408,3.005616,221.0,37.0,2.0,326,114.186177,79,3.513473,13.0,12.0,13.0,9.333333
11,tease,0.558531,1.301512,2.977106,98.0,182.0,3.0,347,119.062203,84,3.530741,18.0,13.0,12.0,11.0
20,lease,0.532181,1.319654,2.916199,243.0,143.0,8.0,344,121.463067,84,3.493649,17.0,15.0,16.0,13.666667
22,cease,0.579698,1.185745,2.924838,39.5,753.5,6.0,370,136.372786,76,3.392527,20.0,20.0,19.0,15.0


`soare` is the optimal starter word.