In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns; sns.set()
from wordle_wise.api import *
import matplotlib.pyplot as plt
from collections import Counter

In [47]:
file_path = 'data/unigram_freq.csv'
WORD_COLUMN = 'word'
FREQ_COLUMN = 'count'
ignore_characters = ['_', '', ' ']
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','x','z']
alphabet = [x.upper() for x in alphabet]

word_bank = get_wordle_word_bank(pd.read_csv(file_path), nletters=5)

word_bank.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[WORD_COLUMN] = df[WORD_COLUMN].str.upper()


Unnamed: 0,word,count
35,ABOUT,1226734006
45,OTHER,978481319
56,WHICH,810514085
57,THEIR,782849411
62,THERE,701170205


In [46]:
# helper functions

def score_word(guessed_word,actual_word):

    if len(guessed_word) != len(actual_word):
        raise ValueError('Word lengths do not match')
    gw = list(guessed_word)
    aw = list(actual_word)

    remaining_positions_guess = [i for i in range(len(gw))]
    remaining_positions_actual = [i for i in range(len(gw))]
    is_green_positions = []
    is_yellow_positions = []

    # Greens
    for i,letter in enumerate(gw):
        if gw[i] == aw[i]:
            is_green_positions += [i]
            remaining_positions_guess.remove(i)
            remaining_positions_actual.remove(i)

    # Yellows
    aw_remaining = [w for i, w in enumerate(aw) if i in remaining_positions_actual]
    for i,letter in enumerate(gw):
            if i not in remaining_positions_guess:
                continue 
            if gw[i] in aw_remaining:
                is_yellow_positions += [i]
                remaining_positions_guess.remove(i)
                aw_remaining.remove(gw[i])

    result = list('_' * len(guessed_word))

    for i in is_green_positions:
        result[i] = 'G'

    for i in is_yellow_positions:
        result[i] = 'Y'

    result = "".join(result)
    return result      


def count_letter(result, letters=['G','Y']):
    cnt = tuple([Counter(result)[l] for l in letters])
    return cnt

def calc_expectations(cnt_greens, cnt_yellows, weights, green_value_premium=3):
    expected_greens = (cnt_greens @ weights)
    expected_yellow = (cnt_yellows @ weights)
    expected_value = expected_greens * green_value_premium + expected_yellow
    return expected_greens, expected_yellow, expected_value

assert score_word(guessed_word = 'TATOO',actual_word = 'ABOUT') == 'YY_Y_'
assert score_word(guessed_word = 'ABOUT',actual_word = 'TATOO') == 'Y_Y_Y'
assert score_word(guessed_word = 'TASTE',actual_word = 'ABOUT') == 'YY___'
assert score_word(guessed_word = 'TABLE',actual_word = 'TREAT') == 'GY__Y'


In [35]:
class WordleGame:

    def __init__(self, words, weights, sample_max:int=100, rounds:int=6):
        self.rounds = rounds
        self.remaining_guesses = rounds
        self.possible_guesses = words
        self.weights = normalize(weights)             
        self.scoreboard = []
        self.sample_max = sample_max
    
    def normalize(x):
        return x / x.sum()

    def fit(self):
        
        #Sample subset
        words_index= np.array([i for i in range(len(self.possible_guesses))],dtype=int)
        
        if (self.weights > 0).sum() > self.sample_max:
            sample_index = np.random.choice(words_index, size=self.sample_max, replace=False, p=self.weights)
        else:
            sample_index = np.array([i for i,weight_i in enumerate(self.weights) if weight_i > 0])
        
        sample_words = words[sample_index]
        sample_weights = normalize(weights[sample_index])
    
        # Grids
        results_grid = np.array([score_word(guessed_word=gw,actual_word=aw) for gw in self.possible_guesses for aw in sample_words])
        scores_grid = np.array([count_letter(sg) for sg in results_grid])

        results_matrix = results_grid.reshape(len(self.possible_guesses), len(sample_words))
        cnt_greens = scores_grid[:,0].reshape(len(self.possible_guesses), len(sample_words))
        cnt_yellows = scores_grid[:,1].reshape(len(self.possible_guesses), len(sample_words))

        # Expectations
        exptected_greens, exptected_yellows, expected_value = calc_expectations(cnt_greens, cnt_yellows, sample_weights, green_value_premium=3)

        # Recommendations
        recommendations = [(self.possible_guesses[i],expected_value[i])  for i in np.argsort(expected_value)[::-1]]
        [print(x) for x in recommendations[0:5]]
        print("\n\n")
#         print(cnt_greens.shape)
#         print(cnt_yellows.shape)
#         print(exptected_greens.shape)
#         print(exptected_yellows.shape)
#         print(expected_value.shape)
    
    def make_guess(self, guess:str, result:str):
        if self.remaining_guesses == 0:
            raise ValueError("No more remaining guesses")
        else: 
            self.remaining_guesses -= 1

        # Make sure to use upper case words
        if guess != guess.upper():
            guess = guess.upper()

        if result != result.upper():
            result = result.upper()

        # Update weights
        result_is_possible = np.array([(score_word(guess,aw) == result)*1 for aw in self.possible_guesses])
        self.weights = normalize(self.weights * result_is_possible)

        self.scoreboard += [guess]
        print(self.weights.shape)
        print(self.weights.sum())
        return None


In [36]:
n = 5000
words = word_bank['word'].values[:n]
weights = word_bank['count'].values[:n]

wg = WordleGame(words=words, weights=weights, sample_max=100)
wg.fit()
wg.make_guess(guess='THEIR', result='___YY')

('THOSE', 3.729991405651436)
('THIER', 3.724731660676773)
('THREE', 3.605437168348293)
('SHORE', 3.5812671458260663)
('THEIR', 3.5713331274085047)



(5000,)
1.0


In [37]:
wg.fit()
wg.make_guess(guess='GIRLS', result='_YY__')

('GIRLS', 6.955558657960447)
('GRILS', 6.487126493610173)
('BIRDS', 6.14751429449349)
('GRIDS', 5.958453346528746)
('GRIPS', 5.763987064394225)



(5000,)
0.9999999999999999


In [38]:
wg.fit()
wg.make_guess(guess='TRICE', result='_GGG_')


('BRIAN', 7.677148280950486)
('BRINK', 7.53995259666754)
('DRINK', 7.467586299564044)
('IRINA', 6.974756746057111)
('BRICK', 6.801963990787381)



(5000,)
1.0


In [34]:
wg.fit()
wg.make_guess(guess='BRICK', result='_GGGG')
wg.fit()


('BRICK', 14.702415035613228)
('PRICK', 12.29758496438677)
('TRICK', 12.0)
('ERICK', 12.0)
('BRICE', 11.702415035613228)



(5000, 2)
(5000, 2)
(5000,)
(5000,)
(5000,)
(5000,)
1.0


In [25]:
wg.make_guess(guess='TRICK', result='_GGGG')
wg.fit()


(10000,)
1.0
('BRICK', 14.34787962255799)
('PRICK', 12.258544177950371)
('ERICK', 12.181861295776708)
('FRICK', 12.106637142087234)
('CRICK', 12.105077761627697)



(10000, 5)
(10000, 5)
(10000,)
(10000,)
(10000,)


# Scratch

In [50]:
def update_weights(words, weights, possible_results, guess,result):
    updated_weights = ((results_matrix[:,words == guess] == result) * 1).reshape(weights.shape)

    # Posterior Update
    weights = weights * updated_weights
    weights = weights / weights.sum()
    return weights

def filter_word_bank(word_bank, guess, result):
    updated_word_bank = word_bank[np.array([score_word(guessed_word = guess,actual_word = aw) == result for aw in words])]
    return updated_word_bank


n = 2500
sample_n = 75
green_value_premium = 1.5
np.random.seed(123)

# Word Bank
words = word_bank['word'].values[:n]

# Weights / Priors
weights = word_bank['count'].values[:n]
weights = weights / weights.sum() 

#Sample subset
words_index = np.array([i for i in range(len(words))],dtype=int)
sample_index = np.random.choice(words_index, size=sample_n, replace=False, p=weights)
sample_words = words[sample_index]
sample_weights = weights[sample_index]

# Grids
results_grid = np.array([score_word(gw,aw) for gw in words for aw in sample_words])
scores_grid = np.array([count_letter(sg) for sg in results_grid])

results_matrix = results_grid.reshape(len(words), len(sample_words))
cnt_greens = scores_grid[:,0].reshape(len(words), len(sample_words))
cnt_yellows = scores_grid[:,1].reshape(len(words), len(sample_words))

# Expectations
exptected_greens, exptected_yellows, expected_value = calc_expectations(cnt_greens, cnt_yellows, sample_weights, green_value_premium=3)

# Recommendations
recommendations = [(words[i],expected_value[i])  for i in np.argsort(expected_value)[::-1]]
[print(x) for x in recommendations[0:5]]

guess = 'SLATE'
result = '_____'
print(f"Guessing: {guess}, Reusult: {result}")

# Update weights
result_is_possible = np.array([(score_word(guess,aw) == result)*1 for aw in words])
weights = weights * result_is_possible
weights = weights / weights.sum()


# weights = update_weights(words, weights=weights, results_matrix=results_matrix, guess='SLATE',result='_____')

# Expectations
recommendations = [(words[i],weights[i])  for i in np.argsort(weights)[::-1]]
[print(x) for x in recommendations[0:5]]

# guess = 'CLICK'
# result = '__GGG'
# print(f"Guessing: {guess}, Reusult: {result}")


# weights = update_weights(words, weights=weights, results_matrix=results_matrix, guess=guess,result=result)
# # Expectations
# recommendations = [(words[i],weights[i])  for i in np.argsort(weights)[::-1]]
# [print(x) for x in recommendations[0:5]]


# guess = 'QUICK'
# result = '__GGG'
# print(f"Guessing: {guess}, Reusult: {result}")


# weights = update_weights(words, weights=weights, results_matrix=results_matrix, guess=guess,result=result)
# # Expectations
# recommendations = [(words[i],weights[i])  for i in np.argsort(weights)[::-1]]
# [print(x) for x in recommendations[0:5]]


['GRANT' 'PHOTO' 'LOCAL' 'ADULT' 'CHAIR' 'STUDY' 'SHACK' 'DANCE' 'WRITE'
 'STOCK' 'SITES' 'UPPER' 'BELOW' 'THERE' 'SALES' 'PORNO' 'GROUP' 'ORDER'
 'MEANS' 'VOICE' 'TALKS' 'SANTA' 'APPLY' 'COUNT' 'SINCE' 'CLASS' 'THOSE'
 'WHILE' 'ASIAN' 'THESE' 'USERS' 'APRIL' 'MIGHT' 'STAFF' 'POSTS' 'GRAIN'
 'BLOOM' 'SPACE' 'SCORE' 'STATE' 'MEDIA' 'MARCH' 'ICONS' 'BLACK' 'OFFER'
 'VOLTS' 'LEAST' 'ASKED' 'EMAIL' 'RISKS' 'RIVER' 'SOUND' 'PLACE' 'YAHOO'
 'MAYBE' 'LOOSE' 'ADDED' 'SHIPS' 'ALONG' 'SHOWN' 'MULTI' 'CRAFT' 'WOULD'
 'WANTS' 'BASED' 'GAMES' 'CLEAR' 'CLICK' 'SKYPE' 'ALLOW' 'CLAIM' 'ABOUT'
 'LISTS' 'IDEAS' 'STORE']
('SLATE', 0.7378749074715556)
('STOLE', 0.7225146754962011)
('STARE', 0.7133385934410235)
('STORE', 0.7131679976113405)
('THOSE', 0.711565114169505)
Guessing: SLATE, Reusult: _____
('WHICH', 0.2328037067894133)
('GROUP', 0.09244285949622434)
('FORUM', 0.07309368822853615)
('FOUND', 0.0666389802716281)
('GOING', 0.044602197952164524)


[None, None, None, None, None]

In [28]:
def class Test():
    
    def __init__:
        self.x = x

(2500, 75)
(75,)
(2500,)


In [34]:
calc_expectations(cnt_greens, cnt_yellows, sample_weights, green_value_premium=3)


(array([0.1317241 , 0.16228361, 0.09317182, ..., 0.03764318, 0.0478023 ,
        0.03721265]),
 array([0.17563401, 0.29682719, 0.06505301, ..., 0.22240815, 0.09211355,
        0.15217919]),
 array([0.57080632, 0.78367804, 0.34456848, ..., 0.33533769, 0.23552046,
        0.26381715]))

('THEIR', 74479372427)
('THREE', 73048620231)
('TREES', 70929599375)
('THERE', 70769934759)
('THOSE', 69867495361)



('RATES', 165554286761)
('NOTES', 162621926917)
('TREES', 162412449865)
('SHARE', 162081431189)
('STORE', 160513173329)





In [90]:
wg.possible_answers[wg.weights > 0]

array(['CLICK', 'BLACK', 'AGAIN', 'APRIL', 'DAVID', 'DAILY', 'FINAL',
       'LINUX', 'INDIA', 'QUICK', 'BRAND', 'JAPAN', 'ALBUM', 'BUILD',
       'APPLY', 'AWARD', 'BRING', 'VALID', 'GRAND', 'CIVIL', 'FULLY',
       'CLAIM', 'URBAN', 'ADMIN', 'MAGIC', 'GAMMA', 'FUNNY', 'BRAIN',
       'CARRY', 'FRANK', 'RURAL', 'BRIAN', 'MIAMI', 'DRINK', 'ARRAY',
       'PLAIN'], dtype=object)