In [1]:
import numpy as np
import pandas as pd
import datetime
import pytz
import requests
import wordle_cheater as wrdlc
from IPython import display
import requests
from lxml import html
import datetime
import importlib as imp

___
### Use wordle_cheater.solveall to solve wordle, given 2 initial words and a solution word


In [2]:
# Create a wordle_cheater.wordl instance
wdl = wrdlc.wordl()
# Define your first 2 words
initial_words = ['alert','noisy','chump']

# Define a month and a day
month = "Feb"
day = 15
month_day = month + " " + str(day) + " -"
df_wdlh = wdl.df_word_history[wdl.df_word_history['date']<=month_day]

# Get the first occurrence which is that day, or lower
df_wdlh = df_wdlh.iloc[0:1]

# Get that days solution
solution = df_wdlh.solution.iloc[0].lower()
# words_used,letter_lists,list_df = wrdlc.solve(initial_words,solution)
words_used,letter_status_used,list_df = wrdlc.solveall(initial_words,solution)

In [3]:
for i,df in enumerate(list_df):
    display.display(
        df.style.set_caption(
            f"Possible Words after word: {words_used[i]}"
        )
    )

Unnamed: 0,word,probability
1,local,0.249211
2,small,0.1918
3,shall,0.176697
4,daily,0.097421
5,final,0.089883
6,valid,0.052554
7,villa,0.016189
8,usual,0.015689
9,salon,0.009277
10,laugh,0.009041


Unnamed: 0,word,probability
1,salsa,1.0


Unnamed: 0,word,probability
1,salsa,1.0


___
### Basic usage of the ```wordle_cheater```

Create an instance of the class ```wordl``` in the```wordle_cheater.py``` module to help solve for the wordle solution **whine**.

In [4]:
# instantiate the wordl class
wdl = wrdlc.wordl()
# get todays word
td_word = wdl.todays_word

In [8]:
td_word

'polka'

In [11]:
wrdlc.solveall(['alert'],td_word)

(['alert', 'local', 'polka', 'polka'],
 [[2, 2, 3, 3, 3], [2, 1, 3, 2, 2], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1]],
 [     word  probability
  1   local     0.249211
  2   small     0.191800
  3   shall     0.176697
  4   daily     0.097421
  5   final     0.089883
  6   valid     0.052554
  7   villa     0.016189
  8   usual     0.015689
  9   salon     0.009277
  10  laugh     0.009041
  11  vocal     0.008360
  12  salad     0.008214
  13  canal     0.007620
  14  naval     0.007448
  15  sally     0.005461
  16  focal     0.005380
  17  badly     0.004869
  18  loyal     0.004141
  19  salsa     0.003897
  20  sadly     0.003595
  21  basil     0.002538
  22  nasal     0.002334
  23  chalk     0.002300
  24  viola     0.002263
  25  psalm     0.002043
  26  basal     0.001677
  27  lilac     0.001600
  28  modal     0.001582
  29  snail     0.001455
  30  palsy     0.001297
  31  scalp     0.001249
  32  quail     0.001177
  33  manly     0.001160
  34  polka     0.001144
  35  bylaw    

___
### Add the word *alert* to the ```wdl``` instance.

   * Each word that you add to the ```wordl``` instance requires an array of 5 integers between 1 and 3 (inclusive), for each of the 5 letters of the word that you add.  This array is called the ```letter_status```. 
   * The ```letter_status``` array tells the ```wordl.try_it()``` method one of 3 things about the respective letter:
     1. The letter code is **1** for that letter if:
       * that letter is *in the solution word*, and 
       * the letter's position in the word is *the same as the letter's position in the solution word*, 
     2. The letter code is **2** for that letter if:
       * the letter is *in the solution word*, 
       * but *NOT in the same position as the solution word*
     3. The letter code is **3** for that letter if:
       * the letter is NOT at all in the solution word

In other words:
  * a status code of **1** is equal to the color **GREEN** in the Wordle Game, 
  * a status code of **2** is equal to the color **GOLD** in the Wordle Game, 
  * a status code of **3** is equal to the color **GREY** in the Wordle Game, 


#### For Example, when the solution word is **whine**:
In the word **alert**:
  1. The letter **a** is not in the word **whine**, so it's letter code = 3
  2. The letter **l** is not in the word **whine**, so it's letter code = 3
  3. The letter **e** is in the word **whine**, but NOT is the correct position, so it's letter code = 2
  4. The letter **r** is not in the word **whine**, so it's letter code = 3
  4. The letter **t** is not in the word **whine**, so it's letter code = 3

So, for the word **alert**, the letter_status array is:
  * ```[3,3,2,3,3]```

In [5]:
# add the word 'alert' to the instance
wdl.add_word('alert',[3,3,2,3,3])
df = wrdlc.filter_words(wdl.try_it()) 
df.probability = df.probability.round(5)
display.display(
    df.style.set_caption(
        "Possible Words After adding 'alert' to the wdl instance"
    )
)

Unnamed: 0,word,count,probability
1,video,365410017,0.1067
2,phone,256643812,0.07494
3,index,242826246,0.07091
4,being,242783091,0.0709
5,women,242520455,0.07082
6,house,231310420,0.06755
7,since,214302926,0.06258
8,guide,213378807,0.06231
9,money,190205072,0.05554
10,movie,158421100,0.04626


___
### Add the word *noisy* to the ```wdl``` instance.

In the word **noisy**:
  1. The letter **n** is in the word **whine**, but NOT is the correct position, so it's letter code = 2
  2. The letter **o** is not in the word **whine**, so it's letter code = 3
  3. The letter **i** is in the word **whine**, AND it is in the correct position, so it's letter code = 1
  4. The letter **s** is not in the word **whine**, so it's letter code = 3
  4. The letter **t** is not in the word **whine**, so it's letter code = 3

So, for the word **noisy**, the letter_codes array is:
  * ```[2,3,1,3,3]```

In [6]:
wdl.add_word('noisy',[2,3,1,3,3])
df = wrdlc.filter_words(wdl.try_it()) 
df.probability = df.probability.round(5)
display.display(
    df.style.set_caption(
        "Possible Words After adding 'noisy' to the wdl instance"
    )
)

Unnamed: 0,word,count,probability
1,being,242783091,0.95152
2,knife,11297715,0.04428
3,whine,832743,0.00326
4,feign,116753,0.00046
5,deign,94806,0.00037
6,eking,26695,0.0001


### Since *being* is the most probable word, add it

In the word **being**:
  1. The letter **b** is not in the word **whine**, so it's letter code = 3
  2. The letter **e**  is in the word **whine**, BUT is is NOT in the correct position, so it's letter code = 2
  3. The letter **i** is in the word **whine**, AND it is in the correct position, so it's letter code = 1
  4. The letter **n** is in the word **whine**, AND it is in the correct position, so it's letter code = 1
  4. The letter **g** is not in the word **whine**, so it's letter code = 3

So, for the word **being**, the letter_codes array is:
  * ```[3,2,1,1,3]```

In [7]:
wdl.add_word('being',[3,2,1,1,3])
df = wrdlc.filter_words(wdl.try_it()) 
df.probability = df.probability.round(5)
display.display(
    df.style.set_caption(
        "Possible Words After adding 'being' to the wdl instance"
    )
)

Unnamed: 0,word,count,probability
1,whine,832743,1.0


___
### The only word left is *whine*.
___

## Show frequencies of different letter combinations

In [None]:
import string
sss = string.ascii_lowercase
df_ww = wrdlc.df_ww

df_consonants = pd.DataFrame([
    [l,df_ww[df_ww.word.str.contains(l)].count().values[0]]
    for l in sss
    if l not in ['a','e','i','o','u']
],columns=['letter','num_times']).sort_values('num_times',ascending=False)

df_vowels = pd.DataFrame([
    [l,df_ww[df_ww.word.str.contains(l)].count().values[0]]
    for l in sss
    if l  in ['a','e','i','o','u']
],columns=['letter','num_times']).sort_values('num_times',ascending=False)

#### Show vowel frequencies

In [None]:
df_vowels

#### Show consonant frequencies

In [None]:
df_consonants

#### Show frequencies of first letters

In [None]:
df_first_letters = pd.DataFrame([
    [l,df_ww[df_ww.word.str.slice(0,1)==l].count().values[0]]
    for l in sss
    if l  in string.ascii_lowercase
],columns=['letter','num_times']).sort_values('num_times',ascending=False)
df_first_letters

#### Show frequencies of last letters

In [None]:
df_last_letters = pd.DataFrame([
    [l,df_ww[df_ww.word.str.slice(4,5)==l].count().values[0]]
    for l in sss
    if l  in string.ascii_lowercase
],columns=['letter','num_times']).sort_values('num_times',ascending=False)
df_last_letters

#### For a given value of ```letter_combo``` below, show the most frequent possible words

In [None]:
letter_combo = 'sate'
def part_of(word,letters = letter_combo):
    return all([l in word for l in letters])

df_wwpo = wrdlc.df_ww[wrdlc.df_ww.word.apply(part_of)]
df_wwpo = df_wwpo.merge(wrdlc.df_ugf,on='word').sort_values('count',ascending=False)
df_wwpo['perc'] = df_wwpo['count'] / df_wwpo['count'].sum()
df_wwpo.head(20)

#### Show most current wordle solution history

In [None]:
wdl.df_word_history

In [None]:
df_uf = pd.read_csv('unigram_freq.csv')
df_uf

In [None]:
import string
alllets = string.ascii_lowercase
def difflets(a1,a2):
    a3 = ''.join(sorted(set(a1).difference(set(a2))))
    return a3
badlets = 'alertno'
goodlets = difflets(alllets,badlets)
goodlets

In [None]:
# r1 = difflets(goodlets,'eijqsuy')
# g2 = [
#     '_is.y'.replace('.',v) for v in r1
# ]
# g2

In [None]:
# r1 = difflets(goodlets,'eijqsuy')
# g2 = [
#     'si_.y'.replace('.',v) for v in r1
# ]
# g2

### Get new df_word_history.csv

df_word_history.csv contains a list of wordle solutions for past and future dates

In [2]:
df_word_history = wrdlc.get_combined_word_histories()

UnboundLocalError: local variable 'df_word_hist' referenced before assignment

In [45]:
[
    len(df_word_history[df_word_history['date'].str.lower().str.contains('apr')]['date']),
    len(df_word_history[df_word_history['date'].str.lower().str.contains('apr')]['date'].unique()),
]

[30, 30]

In [None]:
# df_word_history.to_csv('temp_folder/df_word_history.csv',index=False)