In [1]:
import pythainlp
import numpy as np
import pandas as pd

from utils.THWordle import *
from utils.reverse_wordle import *

In [2]:
with open('./data/Thai words.txt') as f:
    lines = f.readlines()
    vocab = [line.strip('\n') for line in lines]

In [3]:
wddf = pd.DataFrame({'word': vocab})
wddf['length'] = wddf['word'].apply(lambda x: len(split_word_by_base(x)))

In [4]:
length = 5

wddf = wddf[wddf['length'] == length]
vocab = wddf['word']

wddf

Unnamed: 0,word,length
783,เสมือน,5
784,ผลเสีย,5
785,ลูกชาย,5
786,หนูน้อย,5
787,กระแส,5
...,...,...
1461,สันธาน,5
1462,สมควร,5
1463,ตอนนั้น,5
1464,สวรรค์,5


In [5]:
df = get_th_score_matrix(wddf, f"./{length}TH-matrix.csv")

df.columns

Index(['เสมือน', 'ผลเสีย', 'ลูกชาย', 'หนูน้อย', 'กระแส', 'เลี้ยงดู', 'รวมทั้ง',
       'หนทาง', 'ละลาย', 'เหมาะ',
       ...
       'สุพรรณ', 'เดี๋ยวนี้', 'สัมพันธ์', 'กระบะ', 'คำขวัญ', 'สันธาน', 'สมควร',
       'ตอนนั้น', 'สวรรค์', 'เสบียง'],
      dtype='object', length=683)

In [6]:
len(df.columns)

683

# Play Wordle

In [7]:
fixed_ans = False

fixed_word = "ลักษณะ"

In [8]:
wd = THWordle(wddf, allow_random=False)

# overide
if fixed_ans:
    wd.ans = fixed_word

print("(hidden) answer:", wd.ans)
print("vocab size:", len(vocab))
print("")



# first guess
guess = None
correct = False
possible_ind = range(len(df))

while not(correct):
    # guess turn i
    
    # the first turn pick a word from V0 i.e. vocab
    # pick word from V(i-1), based on previous possible vocab
    if guess == None:
        guess = np.random.choice(vocab, 1)[0]
    else:
        guess = np.random.choice(possible_words, 1)[0]
        
    wd_result = wd.guess(guess)
    flatten_score = [s for group in wd_result['score'] for s in group]
    guess_result = encode_score(flatten_score)
    
    # create new vocab based on guess turn i and score turn i
    possible_ind, possible_words = filter_possible_words(df, possible_ind, guess, guess_result)
    
    correct = (wd_result['score'] == 2).all()
    
    print(f"guess: {wd_result['#guess']} | {wd_result['guessed word']} | {''.join([str(i) for i in wd_result['score']])}")
    print(f"words left: {len(possible_words)}")
    if len(possible_words) < 10:
        print(f"{possible_words}")
    print("")

(hidden) answer: รัชกาล
vocab size: 683

guess: 1 | ทำขวัญ | [0 2 0 2][0 2 2 2][0 2 2 2][0 2 1 2][0 2 2 2]
words left: 20

guess: 2 | รัฐบาล | [2 2 2 2][0 2 2 2][0 2 2 2][2 2 2 2][2 2 2 2]
words left: 1
['รัชกาล']

guess: 3 | รัชกาล | [2 2 2 2][2 2 2 2][2 2 2 2][2 2 2 2][2 2 2 2]
words left: 1
['รัชกาล']



In [10]:
wd = THWordle(wddf, allow_random=False)

# overide
if fixed_ans:
    wd.ans = fixed_word

print("(hidden) answer:", wd.ans)
print("vocab size:", len(vocab))
print("")

# first guess
guess = None
correct = False
possible_ind = range(len(df))

while not(correct):
    # guess turn i
    
    # the first turn pick a word from V0 i.e. vocab
    # pick word from V(i-1), based on previous possible vocab
    if guess == None:
        guess = find_best_guess(df, range(len(df)), use_all_word=True)
    else:
        guess = find_best_guess(df, possible_ind, use_all_word=True)
        
    wd_result = wd.guess(guess)
    flatten_score = [s for group in wd_result['score'] for s in group]
    guess_result = encode_score(flatten_score)
    
    # create new vocab based on guess turn i and score turn i
    possible_ind, possible_words = filter_possible_words(df, possible_ind, guess, guess_result)
    
    correct = (wd_result['score'] == 2).all()
    
    print(f"guess: {wd_result['#guess']} | {wd_result['guessed word']} | {''.join([str(i) for i in wd_result['score']])}")
    print(f"words left: {len(possible_words)}")
    if len(possible_words) < 10:
        print(f"{possible_words}")
    print("")

(hidden) answer: บริวาร
vocab size: 683

ทรมาน: expected vocab size = 3.9
guess: 1 | ทรมาน | [0 2 2 2][2 2 0 2][0 2 2 2][2 2 2 2][0 2 2 2]
words left: 4
['บริการ', 'บริวาร', 'บริหาร', 'บริจาค']

จนกว่า: expected vocab size = 1.0
guess: 2 | จนกว่า | [0 2 2 2][0 2 0 2][0 2 2 2][1 2 2 0][1 2 2 2]
words left: 1
['บริวาร']

guess: 3 | บริวาร | [2 2 2 2][2 2 2 2][2 2 2 2][2 2 2 2][2 2 2 2]
words left: 1
['บริวาร']

