# Try to use OCR to recognize the letters in the Wordle game

In [None]:
# Load test image
from IPython.display import Image, display

# Show the image
display(Image(filename='images/solved.PNG'))

In [None]:
from image_processing import get_wordle_grid_boxes, crop_cell_margin, detect_letter, tesseract_inference
import cv2

image = cv2.imread('images/solved.PNG')

rows = get_wordle_grid_boxes(image)

if rows:
    print(f"Extracted {len(rows)} rows from the Wordle board")

    # Crop the cells and put them into a nested list
    wordle_board_imgs = []
    for row in rows:
        wordle_board_row = []
        for cell in row:
            x, y, w, h = cell[1:5]
            cropped_cell = image[y:y+h, x:x+w]
            cropped_cell = crop_cell_margin(cropped_cell)
            wordle_board_row.append(cropped_cell)
        wordle_board_imgs.append(wordle_board_row)


    # Run Tesseract OCR on each cell
    for i, row in enumerate(wordle_board_imgs):
        for j, cell in enumerate(row):
            if detect_letter(cell.copy()):
                # Show the thresholded image
                cv2.imshow("Croped Image", cell)
                cv2.waitKey(0)
                cv2.destroyAllWindows()


                letter = tesseract_inference(cell)
                print(f"Row {i+1}, Column {j+1}: {letter}")
            else:
                print(f"Row {i+1}, Column {j+1}: No letter detected")
else:
    print(f"Failed to extract wordle board, found {len(rows)} rows instead of 6")

# Also extract the colors from the Wordle board

In [None]:
from image_processing import get_wordle_grid_boxes, crop_cell_margin, detect_letter, extract_color_from_cell
import cv2

image = cv2.imread('images/row 2.PNG')

rows = get_wordle_grid_boxes(image)

if rows:
    print(f"Extracted {len(rows)} rows from the Wordle board")

    # Crop the cells and put them into a nested list
    wordle_board_imgs = []
    for row in rows:
        wordle_board_row = []
        for cell in row:
            x, y, w, h = cell[1:5]
            cropped_cell = image[y:y+h, x:x+w]
            cropped_cell = crop_cell_margin(cropped_cell)
            wordle_board_row.append(cropped_cell)
        wordle_board_imgs.append(wordle_board_row)


    # Run Tesseract OCR on each cell
    for i, row in enumerate(wordle_board_imgs):
        for j, cell in enumerate(row):
            if detect_letter(cell.copy()):
                # Show the thresholded image
                cv2.imshow("Croped Image", cell)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

                color = extract_color_from_cell(cell)
                print(f"Row {i+1}, Column {j+1}: {color}")
            else:
                print(f"Row {i+1}, Column {j+1}: No letter detected")
else:
    print(f"Failed to extract wordle board, found {len(rows)} rows instead of 6")

# Try CNN letter recognition

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class MNISTClassifier(nn.Module):
    """
    v1 Source: https://nextjournal.com/gkoehler/pytorch-mnist
    v2 Source: https://github.com/PyTorch/examples/blob/main/mnist/main.py
    Had to find better model due to not able to make mistake on android app
    """
    def __init__(self):
        super(MNISTClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 26)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [3]:
from image_processing import get_wordle_grid_boxes, crop_cell_margin, detect_letter, tesseract_inference
import cv2
import torch
from PIL import Image
import torchvision

# load pytorch model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MNISTClassifier().to(device)
state_dict = torch.load("alphabet_classifier/models/finetune_model_17.pth", map_location=device)
model.load_state_dict(state_dict)
model.eval()
def get_mnist_transform():
    transform = torchvision.transforms.Compose([
        torchvision.transforms.Grayscale(num_output_channels=1),  # if your images are RGB
        torchvision.transforms.Resize((28,28)),                  # match MNIST size
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])
    return transform
transform = get_mnist_transform()


image = cv2.imread('images/solved2.PNG')

rows = get_wordle_grid_boxes(image)

if rows:
    print(f"Extracted {len(rows)} rows from the Wordle board")

    # Crop the cells and put them into a nested list
    wordle_board_imgs = []
    for row in rows:
        wordle_board_row = []
        for cell in row:
            x, y, w, h = cell[1:5]
            cropped_cell = image[y:y+h, x:x+w]
            cropped_cell = crop_cell_margin(cropped_cell)
            wordle_board_row.append(cropped_cell)
        wordle_board_imgs.append(wordle_board_row)


    # Run Tesseract OCR on each cell
    for i, row in enumerate(wordle_board_imgs):
        for j, cell in enumerate(row):
            if detect_letter(cell.copy()):
                # Show the thresholded image
                cv2.imshow("Croped Image", cell)
                cv2.waitKey(0)
                cv2.destroyAllWindows()


                # Pytorch model inference
                # Convert image to binary thresholded image
                cell_gray = cv2.cvtColor(cell, cv2.COLOR_BGR2GRAY)

                # 2) Threshold + invert so letters are white on black
                #    Using Otsu’s method to pick a good threshold automatically:
                _, cell_th = cv2.threshold(
                    cell_gray,
                    0,
                    255,
                    cv2.THRESH_BINARY | cv2.THRESH_OTSU
                )

                # 3) Wrap in PIL image
                pil_cell = Image.fromarray(cell_th)

                # 4) Apply your MNIST‐style transform
                img_t = transform(pil_cell)               # → 1×28×28, normalized
                img_t = img_t.unsqueeze(0).to(device)     # → 1×1×28×28 batch tensor

                # Show the transformed image
                cv2.imshow("Transformed Image", img_t.squeeze().cpu().numpy())
                cv2.waitKey(0)
                cv2.destroyAllWindows()

                # 4) Inference
                with torch.no_grad():
                    logits = model(img_t)   # no more size mismatch!
                    pred = logits.argmax(dim=1).item()
                    letter = chr(ord('A') + pred)
                print(f"Row {i+1}, Column {j+1}: {letter}")
            else:
                print(f"Row {i+1}, Column {j+1}: No letter detected")
else:
    print(f"Failed to extract wordle board, found {len(rows)} rows instead of 6")

Extracted 6 rows from the Wordle board
Row 1, Column 1: S
Row 1, Column 2: O
Row 1, Column 3: A
Row 1, Column 4: R
Row 1, Column 5: E
Row 2, Column 1: M
Row 2, Column 2: A
Row 2, Column 3: R
Row 2, Column 4: O
Row 2, Column 5: N
Row 3, Column 1: O
Row 3, Column 2: R
Row 3, Column 3: G
Row 3, Column 4: A
Row 3, Column 5: N
Row 4, Column 1: G
Row 4, Column 2: R
Row 4, Column 3: O
Row 4, Column 4: A
Row 4, Column 5: N
Row 5, Column 1: No letter detected
Row 5, Column 2: No letter detected
Row 5, Column 3: No letter detected
Row 5, Column 4: No letter detected
Row 5, Column 5: No letter detected
Row 6, Column 1: No letter detected
Row 6, Column 2: No letter detected
Row 6, Column 3: No letter detected
Row 6, Column 4: No letter detected
Row 6, Column 5: No letter detected
