In [1]:
import collections, random, time
import requests, json, re
from string import ascii_lowercase
from itertools import product
from collections import Counter
import pickle

In [2]:
wordle_num = 249
answers = []
guesses = []
BEARER_TOKEN = ""

In [3]:
#Load in possible answer and guess words
with open('answers.txt', 'r') as f:
    answers = [x.strip() for x in f.readlines()]
with open('guesses.txt', 'r') as f:
    guesses = [x.strip() for x in f.readlines()]
#Token for twitter API
with open("bearer_token", 'r') as f:
    BEARER_TOKEN = f.read().strip()

Given the result pattern for a guess and the possible guesses and answers, figure out which answers have a guess which could've resulted in that pattern, and thus could have been the possible answer for the puzzle. Repeat with multiple different guess patterns for a day to narrow down the possible answers.

In [4]:
#Method to score wordle guess. From https://mathspp.com/blog/solving-wordle-with-python
def score(secret, guess):
    # All characters that are not correct go into the usable pool.
    pool = collections.Counter(s for s, g in zip(secret, guess) if s != g)
    # Create a first tentative score by comparing char by char.
    score = []
    for secret_char, guess_char in zip(secret, guess):
        if secret_char == guess_char:
            score.append(2)
        elif guess_char in secret and pool[guess_char] > 0:
            score.append(1)
            pool[guess_char] -= 1
        else:
            score.append(0)
    return score
#Check if an answer and a guess will make the pattern result
def check_fit(answer, guess, result):
    return result == score(answer, guess)

In [5]:
#Takes ~1.5 min, creates dicts to speed up narrowing down possibilities for a guess pattern
def create_useful_dicts():
    pos_letter_answer_dict = {}
    #set up position letter dictionary
    for i in range(5):
        pos_letter_answer_dict[i] = {}
        for j in ascii_lowercase:
            pos_letter_answer_dict[i][j] = set()
    for guess in guesses:
        for i, j in enumerate(guess):
            pos_letter_answer_dict[i][j].add(guess)

    #So I want to have an answer, and then from it get possible guesses. These guesses will be ones that have, at 
    # index i, a character found anywhere in answer. Not super useful, and big.
    any_letter_answer_dict = {}
    for answer in answers:
        any_letter_answer_dict[answer] = {}
        for i in range(5):
            any_letter_answer_dict[answer][i] = set()
        for guess in guesses:
            for i, j in enumerate(guess):
                ind = answer.find(j)
                if ind != -1 and ind != i:
                    any_letter_answer_dict[answer][i].add(guess)
    return pos_letter_answer_dict, any_letter_answer_dict

In [6]:
#Method to find possible answers for a given result
def result_answers_opt(result, answers, guesses, pos_letter_answer_dict, any_letter_answer_dict):
    possible_answers = set()
    for answer in answers:
        #filter guesses based on 2s in the result
        filtered_guesses = set(guesses)
        for i, x in enumerate(result):
            if x == 2:
                filtered_guesses = filtered_guesses.intersection(pos_letter_answer_dict[i][answer[i]])
            if x == 1:
                filtered_guesses = filtered_guesses.intersection(any_letter_answer_dict[answer][i])
        for guess in filtered_guesses:
            if check_fit(answer, guess, result):
                possible_answers.add(answer)
                break
    return possible_answers

In [7]:
#Create a dictionary mapping possible patterns given as responses to wordle guesses to possible words that 
# could've been the answer for that pattern
def create_pattern_words_dict():
    try:
        #Read from file for words that fit each possible pattern
        with open("saved_word_patterns", "rb") as f:
            pattern_words = pickle.load(f)
    except:
        #If file is missing or something, recreate it. Takes ~10 min
        pos_letter_answer_dict, any_letter_answer_dict = create_useful_dicts()
        #All sequences of 5 colored blocks
        possible_patterns = product([0,1,2], repeat=5)
        pattern_words = {}
        for pattern in possible_patterns:
            pattern_words[pattern] = result_answers_opt(list(pattern), answers, guesses, pos_letter_answer_dict, any_letter_answer_dict)
        
        #Dump to file to be read if needed later
        with open("saved_word_patterns", "wb") as f:
            pickle.dump(pattern_words, f)
    return pattern_words

In [8]:
def filter_answers(answers, pattern_words, results):
    filtered_answers= set(answers)
    for res in results:
        oth = pattern_words[res]
        filtered_answers = filtered_answers.intersection(oth)
    return filtered_answers

#Create all possible patterns for a guess for answer word. Check how well those can narrow it down
def test_filter(answers, pattern_words, word):
    results = set()
    for guess in guesses:
        results.add(tuple(score(word, guess)))
    return filter_answers(answers, pattern_words, results)

In [9]:
pattern_words = create_pattern_words_dict()
#Little test, just for fun. See from 100 words how many wouldn't be able to be found
for answer in random.sample(answers, 100):
    b = list(test_filter(answers, pattern_words, answer))
    if len(b) != 1 or b[0] != answer:
        print(answer, b)

('knack', ['shore', 'knack'])
('maxim', ['share', 'maxim', 'laden'])
('pygmy', ['elate', 'terse', 'saute', 'daily', 'surge', 'parse', 'beady', 'diary', 'tawny', 'pygmy', 'sauce'])
('slyly', ['brain', 'slyly', 'saint', 'pried'])
('skiff', ['stare', 'shard', 'loser', 'spiel', 'spied', 'swath', 'spelt', 'saint', 'skiff', 'sober', 'stall', 'stole'])
('tight', ['posit', 'tight'])
('amaze', ['amaze', 'slate'])


In [10]:
#Twitter search from https://towardsdatascience.com/searching-for-tweets-with-python-f659144b225f

#define search twitter function
def search_twitter(query, tweet_fields, bearer_token = BEARER_TOKEN, count = 10, end_time = -1):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    
    endpoint_url = 'https://api.twitter.com/2/tweets/search/recent'
    if end_time == -1:
        url = "{}?query={}&max_results={}&{}".format(endpoint_url, query, count, tweet_fields)
    else:
        url="{}?query={}&max_results={}&end_time={}&{}".format(endpoint_url, query, count, end_time, tweet_fields)
    response = requests.request("GET", url, headers=headers)

    print(response.status_code)

    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def get_twitter_response(wordle_num, num = 50, end_time = -1):
    #search term
    query = "Wordle "+str(wordle_num)
    #twitter fields to be returned by api call
    tweet_fields = "tweet.fields=text,author_id,created_at"
    json_response = search_twitter(query, tweet_fields, BEARER_TOKEN, count=num, end_time=end_time)
    
    return json_response

def produce_results(json_response):
    results = set()
    counted_results = Counter()
    for response in json_response['data']:
        text = response['text']
        for line in text.split('\n'):
            if re.search(u'[^\U0001f7e8|\U0001f7e9|\u2b1b|\u2b1c]', line):
                continue
            try:
                converted = line\
                    .replace(u'\U0001f7e8', '1')\
                    .replace(u'\U0001f7e9', '2')\
                    .replace(u'\u2b1b', '0')\
                    .replace(u'\u2b1c', '0')
                tup = tuple([int(x) for x in list(converted)])
                if len(tup) == 5: 
                    results.add(tup)
                    counted_results.update([tup])
            except:
                print (line)
                continue
    return results, counted_results

In [11]:
def answer_wordle(wordle_num, time_wait = 2, count = 20):
    pattern_words = create_pattern_words_dict()
    all_responses = []
    
    json_resp = get_twitter_response(wordle_num, num = count)
    all_responses.append(json_resp)
    earliest_time = json_resp['data'][-1]['created_at']
    results, counted_results = produce_results(json_resp)
    
    filtered_answers = filter_answers(answers, pattern_words, results)
    
    while len(filtered_answers) > 1:
        if len(filtered_answers) == 0:
            print("Oops. Ran out of words....")
            break
        
        print("getting more tweets. still have", len(filtered_answers), "words left")
        time.sleep(time_wait) #Just so as to not overdo it
        json_resp = get_twitter_response(wordle_num, end_time=earliest_time, num = count)
        all_responses.append(json_resp)
        earliest_time = json_resp['data'][-1]['created_at']
        
        #Update the results and counted results sets/counters
        new_results, new_counted_results = produce_results(json_resp)
        new_results = new_results.union(results)
        new_counted_results.update(counted_results)
        if len(new_results) == len(results): #Shouldn't just run forever. Stop here I guess
            print("didn't find any new patterns, sorry")
            break
        results = new_results
        counted_results = new_counted_results
        filtered_answers = filter_answers(answers, pattern_words, [x for x,i in new_counted_results.most_common() if i > 1])
        
    return  filtered_answers, all_responses

In [12]:
words, responses = answer_wordle(250)

200
('getting more tweets. still have', 16, 'words left')
200
('getting more tweets. still have', 57, 'words left')
200
('getting more tweets. still have', 51, 'words left')
200
('getting more tweets. still have', 44, 'words left')
200
('getting more tweets. still have', 36, 'words left')
200
('getting more tweets. still have', 13, 'words left')
200
('getting more tweets. still have', 13, 'words left')
200
('getting more tweets. still have', 13, 'words left')
200
('getting more tweets. still have', 13, 'words left')
200
('getting more tweets. still have', 8, 'words left')
200
('getting more tweets. still have', 8, 'words left')
200
('getting more tweets. still have', 8, 'words left')
200
('getting more tweets. still have', 8, 'words left')
200
('getting more tweets. still have', 6, 'words left')
200
('getting more tweets. still have', 6, 'words left')
200
('getting more tweets. still have', 6, 'words left')
200
('getting more tweets. still have', 6, 'words left')
200
didn't find any ne

In [13]:
words

{'bloke', 'crate', 'lease', 'saute', 'slate', 'slope'}

#### A few test to work out why it wasn't finding the words right. Looks like sometimes there are tweets that just don't seem possible for the quiz of the day. Possibly a mistake in making the text that gets shared on social media. Also possible that the search for "Wordle xxx" on twitter returns shares from other websites using the same patterns of blocks.

In [14]:
actual_answer = 'bloke'

In [15]:
for res in responses:
    for i,r in enumerate(res ['data']):
        if actual_answer not in filter_answers(answers, pattern_words, produce_results({'data':[r]})[0]):
            print r, r['text'], 'https://twitter.com/a/status/' + str(r['id'])

{u'text': u'Wordle 250 5/6\n\n\u2b1b\U0001f7e8\u2b1b\U0001f7e8\u2b1b\n\u2b1b\u2b1b\u2b1b\U0001f7e9\U0001f7e8\n\u2b1b\U0001f7e8\U0001f7e8\U0001f7e9\U0001f7e8\n\u2b1b\U0001f7e8\U0001f7e9\U0001f7e9\u2b1b\n\U0001f7e9\U0001f7e9\U0001f7e9\U0001f7e9\U0001f7e9', u'created_at': u'2022-02-25T06:49:44.000Z', u'author_id': u'403138409', u'id': u'1497101488824352770'} Wordle 250 5/6

⬛🟨⬛🟨⬛
⬛⬛⬛🟩🟨
⬛🟨🟨🟩🟨
⬛🟨🟩🟩⬛
🟩🟩🟩🟩🟩 https://twitter.com/a/status/1497101488824352770
{u'text': u'Wordle 250 4/6\n\n\u2b1c\u2b1c\u2b1c\u2b1c\u2b1c\n\u2b1c\U0001f7e8\U0001f7e8\u2b1c\U0001f7e8\n\U0001f7e9\U0001f7e8\U0001f7e9\u2b1c\u2b1c\n\U0001f7e9\U0001f7e9\U0001f7e9\U0001f7e9\U0001f7e9', u'created_at': u'2022-02-25T06:22:04.000Z', u'author_id': u'45521586', u'id': u'1497094528498429952'} Wordle 250 4/6

⬜⬜⬜⬜⬜
⬜🟨🟨⬜🟨
🟩🟨🟩⬜⬜
🟩🟩🟩🟩🟩 https://twitter.com/a/status/1497094528498429952
