In [251]:
import os
import pickle
import time
import importlib
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from features.data_processing import load_scrabble_data
from features.quadrant_features import count_tiles_in_quadrants
from game_logic.utils import pretty_print_board

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

In [171]:
cached_features_path = "../data/numerical_features.pkl"

def cache_features_df(df: pd.DataFrame, filepath: str) -> None:
    """
    Saves a filtered version of the DataFrame as a pickle file, excluding complex data types.

    Args:
        df (pd.DataFrame): The original DataFrame.
        filepath (str): The path to save the filtered pickle file.
    """
    exclude_columns = {"board", "board_rep", "cs_h", "cs_v", "8_letter_bingo_lanes_list", "7_letter_bingo_lanes_list"}

    # Filter DataFrame by excluding specified columns
    df_filtered = df.drop(columns=[col for col in exclude_columns if col in df.columns])

    # Save the filtered DataFrame as a pickle
    df_filtered.to_pickle(cached_features_path)
    print(f"Filtered DataFrame cached successfully at: {filepath}")

# Example usage:
cached_features_path = "../data/numerical_features.pkl"
cache_filtered_df(df_raw, cached_features_path)


Filtered DataFrame cached successfully at: ../data/numerical_features.pkl


In [2]:
data_path = "../data/magpie-sims-400k.csv"
cached_path = "../data/cached_raw.pkl"
# Check if cached file exists
if os.path.exists(cached_path):
    print("Loading cached data...")
    with open(cached_path, "rb") as f:
        df_raw = pickle.load(f)
else:
    print("Processing and caching data...")
    df_raw = load_scrabble_data(data_path)
    
    # Save to cache
    with open(cached_path, "wb") as f:
        pickle.dump(df_raw, f)

# Verify DataFrame loaded
print(f"Data loaded with {len(df_raw)} rows")

Loading cached data...
Data loaded with 395715 rows


In [3]:
from game_logic.dawg import DAWG

# Define dictionary path
dict_path = "../data/serialized_dawg_CSW24.bin"

# Initialize DAWG
dawg = DAWG()

# Load and Deserialize
with open(dict_path, "rb") as f:
    dawg_data = f.read()  # Read entire binary file
    dawg.deserialize(dawg_data)  # Deserialize into DAWG structure

print("DAWG loaded successfully!")


DAWG loaded successfully!


In [4]:
words = dawg.get_all_words()

### CrossChecks

In [77]:
import game_logic.crosschecks  # Import the full module
import importlib

importlib.reload(game_logic.crosschecks)

# Now call the function
from game_logic.crosschecks import find_anchors_with_cross_checks

from typing import List
import numpy as np

import game_logic.utils

importlib.reload(game_logic.utils)

from game_logic.types import Board, CrossCheckBoard
from game_logic.utils import transpose, pretty_print_board_with_crosschecks


In [58]:
start_time = time.time()
df_raw["cs_h"] = df_raw["board"].apply(lambda b: find_anchors_with_cross_checks(b, dawg))
end_time = time.time()

elapsed_time = time.time() - start_time
print(f"Data loaded in {elapsed_time:.4f} seconds")

Data loaded in 242.7373 seconds


In [59]:
start_time = time.time()
df_raw["cs_v"] = df_raw["board"].apply(lambda b: transpose(find_anchors_with_cross_checks(transpose(b), dawg)))
end_time = time.time()

elapsed_time = time.time() - start_time
print(f"Data loaded in {elapsed_time:.4f} seconds")

Data loaded in 280.4659 seconds


In [86]:
row = 214751
board = df_raw["board"][row]
cs_v = df_raw["cs_v"][row]
cs_h = df_raw["cs_h"][row]

In [87]:
pretty_print_board(board)

 ★  .  .  ◆  .  .  .  ★  A  .  .  ◆  .  .  ★ 
 .  ✦  .  .  .  ▲  .  .  N  ▲  .  .  .  ✦  . 
 .  .  ✦  .  .  .  ◆  .  O  F  .  .  ✦  .  . 
 ◆  .  .  ✦  .  .  .  ◆  A  R  P  A  .  .  ◆ 
 .  .  .  .  ✦  .  .  .  .  I  ✦  .  .  .  . 
 .  ▲  .  .  .  ▲  .  .  .  V  .  .  .  ▲  . 
 .  .  ◆  .  .  .  ◆  .  ◆  O  H  .  ◆  .  . 
 ★  .  .  ◆  .  .  G  A  L  L  O  W  .  .  ★ 
 .  .  ◆  .  .  .  ◆  .  ◆  E  M  .  ◆  .  . 
 .  ▲  .  .  .  ▲  .  .  .  D  I  .  .  ▲  . 
 .  .  .  .  ✦  .  .  .  .  .  E  .  .  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  .  ✦  .  .  ◆ 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [88]:
pretty_print_board_with_crosschecks(board, cs_h)

 ★  .  .  ◆  .  .  .  ○  A  ○  .  ◆  .  .  ★ 
 .  ✦  .  .  .  ▲  .  ○  N  0  .  .  .  ✦  . 
 .  .  ✦  .  .  .  ◆  ○  O  F  2  15 ✦  .  . 
 ◆  .  .  ✦  .  .  .  ○  A  R  P  A  ○  .  ◆ 
 .  .  .  .  ✦  .  .  .  1  I  4  16 .  .  . 
 .  ▲  .  .  .  ▲  .  .  ○  V  0  .  .  ▲  . 
 .  .  ◆  .  .  .  2  15 2  O  H  3  ◆  .  . 
 ★  .  .  ◆  .  ○  G  A  L  L  O  W  ○  .  ★ 
 .  .  ◆  .  .  .  3  16 3  E  M  2  ◆  .  . 
 .  ▲  .  .  .  ▲  .  .  ○  D  I  ○  .  ▲  . 
 .  .  .  .  ✦  .  .  .  .  0  E  ○  .  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  2  ✦  .  .  ◆ 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [89]:
pretty_print_board_with_crosschecks(board, cs_v)

 ★  .  .  ◆  .  .  .  15 A  16 .  ◆  .  .  ★ 
 .  ✦  .  .  .  ▲  .  5  N  5  .  .  .  ✦  . 
 .  .  ✦  .  .  .  ◆  3  O  F  2  ○  ✦  .  . 
 ◆  .  .  ✦  .  .  .  0  A  R  P  A  1  .  ◆ 
 .  .  .  .  ✦  .  .  .  14 I  6  ○  .  .  . 
 .  ▲  .  .  .  ▲  .  .  0  V  0  .  .  ▲  . 
 .  .  ◆  .  .  .  ○  ○  8  O  H  3  ◆  .  . 
 ★  .  .  ◆  .  0  G  A  L  L  O  W  1  .  ★ 
 .  .  ◆  .  .  .  ○  ○  6  E  M  4  ◆  .  . 
 .  ▲  .  .  .  ▲  .  .  0  D  I  11 .  ▲  . 
 .  .  .  .  ✦  .  .  .  .  15 E  13 .  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  ○  ✦  .  .  ◆ 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


### Bingo Lanes

In [94]:
from typing import List, Tuple
from game_logic.types import Board, CrossCheckBoard

def compute_8_letter_bingo_lanes(
    board: Board, crosscheck_board_h: CrossCheckBoard, crosscheck_board_v: CrossCheckBoard
) -> List[Tuple[int, int, str, int]]:
    """
    Computes 8-letter bingo lanes by checking spaces around existing tiles.

    Args:
        board (Board): The Scrabble board (15x15 grid).
        crosscheck_board_h (CrossCheckBoard): Horizontal cross-check constraints.
        crosscheck_board_v (CrossCheckBoard): Vertical cross-check constraints.

    Returns:
        List[Tuple[int, int, str, int]]: A list of (row, col, direction, lane_size) tuples.
    """
    bingo_lanes = []

    def is_valid_extension(r, c, r_next, c_next, crosscheck_board):
        """Returns True if we can extend a word into this space and the next space."""
        if not (0 <= r < 15 and 0 <= c < 15):
            return False  # Out of bounds
        if board[r][c] is not None:
            return False  # Occupied
        if crosscheck_board[r][c] is not None and len(crosscheck_board[r][c].valid_letters) == 0:
            return False  # Cross-check restriction
        if 0 <= r_next < 15 and 0 <= c_next < 15 and (
            board[r_next][c_next] is not None
        ):
            return False  # Next space blocked
        return True

    def has_adjacent_tile_in_direction(r, c, direction):
        """Checks if a tile has an adjacent tile in the given direction."""
        if direction == "H":
            return (c > 0 and board[r][c - 1] is not None) or (c < 14 and board[r][c + 1] is not None)
        if direction == "V":
            return (r > 0 and board[r - 1][c] is not None) or (r < 14 and board[r + 1][c] is not None)
        return False

    # Scan board for existing tiles
    for row in range(15):
        for col in range(15):
            if board[row][col] is None:
                continue  # Skip empty spaces

            # Horizontal playability: Skip if adjacent horizontal tile exists
            if not has_adjacent_tile_in_direction(row, col, "H"):
                left_spaces = 0
                right_spaces = 0

                for i in range(1, 8):
                    if is_valid_extension(row, col - i, row, col - i - 1, crosscheck_board_h):
                        left_spaces += 1
                    else:
                        break

                for i in range(1, 8):
                    if is_valid_extension(row, col + i, row, col + i + 1, crosscheck_board_h):
                        right_spaces += 1
                    else:
                        break

                lane_size_h = max(0, left_spaces + right_spaces - 6)
                if lane_size_h > 0:
                    bingo_lanes.append((row, col, "H", lane_size_h))

            # Vertical playability: Skip if adjacent vertical tile exists
            if not has_adjacent_tile_in_direction(row, col, "V"):
                up_spaces = 0
                down_spaces = 0

                for i in range(1, 8):
                    if is_valid_extension(row - i, col, row - i - 1, col, crosscheck_board_v):
                        up_spaces += 1
                    else:
                        break

                for i in range(1, 8):
                    if is_valid_extension(row + i, col, row + i + 1, col, crosscheck_board_v):
                        down_spaces += 1
                    else:
                        break

                lane_size_v = max(0, up_spaces + down_spaces - 6)
                if lane_size_v > 0:
                    bingo_lanes.append((row, col, "V", lane_size_v))

    return bingo_lanes


In [95]:
compute_8_letter_bingo_lanes(board, cs_h, cs_v)

[(0, 8, 'H', 7),
 (1, 8, 'H', 1),
 (4, 9, 'H', 6),
 (5, 9, 'H', 1),
 (7, 6, 'V', 8),
 (7, 7, 'V', 4),
 (7, 11, 'V', 3)]

In [101]:
pretty_print_board(board)

 ★  .  .  ◆  .  .  .  ★  A  .  .  ◆  .  .  ★ 
 .  ✦  .  .  .  ▲  .  .  N  ▲  .  .  .  ✦  . 
 .  .  ✦  .  .  .  ◆  .  O  F  .  .  ✦  .  . 
 ◆  .  .  ✦  .  .  .  ◆  A  R  P  A  .  .  ◆ 
 .  .  .  .  ✦  .  .  .  .  I  ✦  .  .  .  . 
 .  ▲  .  .  .  ▲  .  .  .  V  .  .  .  ▲  . 
 .  .  ◆  .  .  .  ◆  .  ◆  O  H  .  ◆  .  . 
 ★  .  .  ◆  .  .  G  A  L  L  O  W  .  .  ★ 
 .  .  ◆  .  .  .  ◆  .  ◆  E  M  .  ◆  .  . 
 .  ▲  .  .  .  ▲  .  .  .  D  I  .  .  ▲  . 
 .  .  .  .  ✦  .  .  .  .  .  E  .  .  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  .  ✦  .  .  ◆ 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [97]:
pretty_print_board_with_crosschecks(board, cs_v)

 ★  .  .  ◆  .  .  .  15 A  16 .  ◆  .  .  ★ 
 .  ✦  .  .  .  ▲  .  5  N  5  .  .  .  ✦  . 
 .  .  ✦  .  .  .  ◆  3  O  F  2  ○  ✦  .  . 
 ◆  .  .  ✦  .  .  .  0  A  R  P  A  1  .  ◆ 
 .  .  .  .  ✦  .  .  .  14 I  6  ○  .  .  . 
 .  ▲  .  .  .  ▲  .  .  0  V  0  .  .  ▲  . 
 .  .  ◆  .  .  .  ○  ○  8  O  H  3  ◆  .  . 
 ★  .  .  ◆  .  0  G  A  L  L  O  W  1  .  ★ 
 .  .  ◆  .  .  .  ○  ○  6  E  M  4  ◆  .  . 
 .  ▲  .  .  .  ▲  .  .  0  D  I  11 .  ▲  . 
 .  .  .  .  ✦  .  .  .  .  15 E  13 .  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  ○  ✦  .  .  ◆ 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [98]:
from tqdm import tqdm

# Compute bingo lanes for each board in the dataset
df_raw["8_letter_bingo_lanes_list"] = [
    compute_8_letter_bingo_lanes(board, cs_v, cs_h)
    for board, cs_v, cs_h in tqdm(zip(df_raw["board"], df_raw["cs_v"], df_raw["cs_h"]), total=len(df_raw))
]

df_raw["8_letter_bingos"] = df_raw["8_letter_bingo_lanes_list"].apply(
    lambda lanes: sum(lane[3] for lane in lanes) if lanes else 0
)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 395715/395715 [00:45<00:00, 8698.27it/s]


In [107]:
from typing import List, Tuple
from game_logic.types import Board, CrossCheckBoard

def compute_7_letter_bingo_lanes(
    board: Board, crosscheck_board_h: CrossCheckBoard, crosscheck_board_v: CrossCheckBoard
) -> List[Tuple[int, int, str, int]]:
    """
    Computes 7-letter bingo lanes by checking valid cross-check spaces.
    A play can only go through one cross-check constraint before stopping.

    Args:
        board (Board): The Scrabble board (15x15 grid).
        crosscheck_board_h (CrossCheckBoard): Horizontal cross-check constraints.
        crosscheck_board_v (CrossCheckBoard): Vertical cross-check constraints.

    Returns:
        List[Tuple[int, int, str, int]]: A list of (row, col, direction, lane_size) tuples.
    """

    # If board is empty, the only valid lane is (7, 7, "H", 1)
    if board[7][7] is None:
        return [(7, 7, "H", 1)]

    bingo_lanes = []

    def is_valid_bingo_start(r, c, crosscheck_board, direction):
        """Checks if a space is a valid starting point for a 7-letter bingo."""
        if board[r][c] is not None:
            return False  # Cannot play through an existing tile
        if crosscheck_board[r][c] is None or crosscheck_board[r][c].is_open_square:
            return False  # Must be a constrained space
        if len(crosscheck_board[r][c].valid_letters) == 0:
            return False  # No valid letters to play through

        # Ensure the space is not trapped between two tiles
        if direction == "H":
            if (c > 0 and board[r][c - 1] is not None) or (c < 14 and board[r][c + 1] is not None):
                return False  # Tile on both left & right
        elif direction == "V":
            if (r > 0 and board[r - 1][c] is not None) or (r < 14 and board[r + 1][c] is not None):
                return False  # Tile above & below

        return True  # Valid starting point

    def count_empty_spaces(r, c, dr, dc, crosscheck_board):
        """Counts empty spaces until hitting a tile or another cross-check."""
        count = 0

        for i in range(1, 7):  # Maximum 7-letter word placement
            r_next, c_next = r + i * dr, c + i * dc

            # Out of bounds check
            if not (0 <= r_next < 15 and 0 <= c_next < 15):
                break  

            # Stop at the first occupied tile
            if board[r_next][c_next] is not None:
                break  

            # Stop at the first cross-check (we only play through one)
            if crosscheck_board[r_next][c_next] is not None:
                break  

            count += 1  # Valid empty space

        return count

    # Iterate through the board looking for valid cross-check spaces
    for row in range(15):
        for col in range(15):
            # Check horizontal lanes
            if is_valid_bingo_start(row, col, crosscheck_board_h, "H"):
                left_spaces = count_empty_spaces(row, col, 0, -1, crosscheck_board_h)
                right_spaces = count_empty_spaces(row, col, 0, 1, crosscheck_board_h)
                lane_size_h = max(0, left_spaces + right_spaces - 5)  # Need 6 open spaces around a tile
                if lane_size_h > 0:
                    bingo_lanes.append((row, col, "H", lane_size_h))

            # Check vertical lanes
            if is_valid_bingo_start(row, col, crosscheck_board_v, "V"):
                up_spaces = count_empty_spaces(row, col, -1, 0, crosscheck_board_v)
                down_spaces = count_empty_spaces(row, col, 1, 0, crosscheck_board_v)
                lane_size_v = max(0, up_spaces + down_spaces - 5)  # Need 6 open spaces around a tile
                if lane_size_v > 0:
                    bingo_lanes.append((row, col, "V", lane_size_v))

    return bingo_lanes


In [112]:
row = 324751
board = df_raw["board"][row]
cs_v = df_raw["cs_v"][row]
cs_h = df_raw["cs_h"][row]

compute_7_letter_bingo_lanes(board, cs_h, cs_v)

[(4, 1, 'V', 1)]

In [113]:
pretty_print_board(board)

 ★  F  .  B  .  .  .  D  I  S  S  o  L  V  E 
 .  A  K  A  .  ▲  .  .  .  T  .  .  .  ✦  . 
 .  G  A  T  .  .  U  N  B  O  I  L  E  D  . 
 ◆  .  O  H  M  .  G  Y  A  N  .  ✦  .  .  ◆ 
 .  .  N  .  I  .  .  .  .  Y  ✦  .  .  .  . 
 .  ▲  .  .  g  ▲  .  .  .  I  .  .  N  ▲  . 
 .  .  ◆  .  R  .  ◆  A  W  N  .  .  A  .  . 
 ★  .  .  W  A  R  T  H  O  G  .  ◆  I  .  ★ 
 .  .  ◆  .  I  .  ◆  .  P  .  .  .  L  .  . 
 .  ▲  .  .  N  ▲  .  .  .  J  A  P  E  ▲  . 
 .  .  .  D  E  V  I  A  T  O  R  .  R  .  . 
 ◆  .  .  ✦  .  .  .  U  .  E  C  U  S  .  ◆ 
 .  .  ✦  .  .  .  ◆  R  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  E  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  I  .  .  .  ◆  .  .  ★ 


In [114]:

# Compute bingo lanes for each board in the dataset
df_raw["7_letter_bingo_lanes_list"] = [
    compute_7_letter_bingo_lanes(board, cs_v, cs_h)
    for board, cs_v, cs_h in tqdm(zip(df_raw["board"], df_raw["cs_v"], df_raw["cs_h"]), total=len(df_raw))
]

df_raw["7_letter_bingos"] = df_raw["7_letter_bingo_lanes_list"].apply(
    lambda lanes: sum(lane[3] for lane in lanes) if lanes else 0
)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 395715/395715 [00:44<00:00, 8882.81it/s]


#### Misc: Pickle + Data Obs

In [117]:
df_raw.to_pickle("scrabble_data_rnd.pkl")

In [118]:
df_raw.memory_usage(deep=True).sum() / (1024**2)  # Convert bytes to MB

np.float64(501.7918510437012)

In [120]:
df_raw.memory_usage(deep=True).sort_values(ascending=False)[:10] / (1024**2)  # MB per column


board                        72.457581
cs_v                         72.457581
cs_h                         72.457581
board_rep                    61.036774
8_letter_bingo_lanes_list    58.829193
7_letter_bingo_lanes_list    24.166458
unseen_J                      3.019066
unseen_O                      3.019066
unseen_N                      3.019066
unseen_M                      3.019066
dtype: float64

In [122]:
len(df_raw)

395715

### Special Squares

#### Single access

In [123]:
pretty_print_board(board)

 ★  F  .  B  .  .  .  D  I  S  S  o  L  V  E 
 .  A  K  A  .  ▲  .  .  .  T  .  .  .  ✦  . 
 .  G  A  T  .  .  U  N  B  O  I  L  E  D  . 
 ◆  .  O  H  M  .  G  Y  A  N  .  ✦  .  .  ◆ 
 .  .  N  .  I  .  .  .  .  Y  ✦  .  .  .  . 
 .  ▲  .  .  g  ▲  .  .  .  I  .  .  N  ▲  . 
 .  .  ◆  .  R  .  ◆  A  W  N  .  .  A  .  . 
 ★  .  .  W  A  R  T  H  O  G  .  ◆  I  .  ★ 
 .  .  ◆  .  I  .  ◆  .  P  .  .  .  L  .  . 
 .  ▲  .  .  N  ▲  .  .  .  J  A  P  E  ▲  . 
 .  .  .  D  E  V  I  A  T  O  R  .  R  .  . 
 ◆  .  .  ✦  .  .  .  U  .  E  C  U  S  .  ◆ 
 .  .  ✦  .  .  .  ◆  R  ◆  .  .  .  ✦  .  . 
 .  ✦  .  .  .  ▲  .  E  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  I  .  .  .  ◆  .  .  ★ 


In [163]:
from typing import List, Tuple
from game_logic.types import Board, CrossCheckBoard
from game_logic.utils import SPECIAL_TILES_LOCATIONS

def is_accessible_special_tile(
    row: int, col: int, board: Board, crosscheck_board_h: CrossCheckBoard, crosscheck_board_v: CrossCheckBoard
) -> bool:
    """
    Determines if a TWS or DWS tile is accessible to an opponent.

    Args:
        row (int): Row index of the special square.
        col (int): Column index of the special square.
        board (Board): 15x15 Scrabble board.
        crosscheck_board_h (CrossCheckBoard): Horizontal cross-check constraints.
        crosscheck_board_v (CrossCheckBoard): Vertical cross-check constraints.

    Returns:
        bool: True if the special square is accessible, False otherwise.
    """
    # 1️⃣ If a cross-check exists here and has a non-zero valid_letter set → Immediately accessible!
    if ((crosscheck_board_h[row][col] and crosscheck_board_h[row][col].valid_letters) or 
        (crosscheck_board_v[row][col] and crosscheck_board_v[row][col].valid_letters)):
        return True  

    # 2️⃣ Check if the special tile is unoccupied
    if board[row][col] is not None:
        return False  # Tile already occupied

    # 3️⃣ Ensure it's not blocked by **groups of tiles**
    def has_blocking_group(r: int, c: int) -> bool:
        """
        Checks if a tile is blocked by two adjacent tiles in a given direction.
        """
        directions = {
            "left":  [(0, -2), (0, -1)],  # Two spaces left
            "right": [(0, 1), (0, 2)],   # Two spaces right
            "up":    [(-1, 0), (-2, 0)],  # Two spaces above
            "down":  [(1, 0), (2, 0)]    # Two spaces below
        }

        for positions in directions.values():
            if all(0 <= r+dr < 15 and 0 <= c+dc < 15 and board[r+dr][c+dc] is not None for dr, dc in positions):
                return True  # Blocked by at least two tiles in one direction

        return False  # No blocking groups

    def has_immediate_two_adjacent(r: int, c: int, board: Board) -> bool:
        """
        Checks if the special tile is directly next to at least two adjacent tiles, even if they mix directions.
        """
        adjacent_positions = [(0, -1), (0, 1), (-1, 0), (1, 0)]  # Left, Right, Up, Down
    
        adjacent_count = sum(
            1 for dr, dc in adjacent_positions
            if 0 <= r + dr < 15 and 0 <= c + dc < 15 and board[r + dr][c + dc] is not None
        )
    
        return adjacent_count >= 2  # Blocked if at least two adjacent tiles exist

    if has_blocking_group(row, col) or has_immediate_two_adjacent(row, col, board):
        return False

    # 4️⃣ Look up to 7 spaces away for isolated tiles or cross-check access
    if (
        can_reach_within_7(row, col, 0, -1, board, crosscheck_board_h) or  # Left
        can_reach_within_7(row, col, 0, 1, board, crosscheck_board_h) or   # Right
        can_reach_within_7(row, col, -1, 0, board, crosscheck_board_v) or  # Up
        can_reach_within_7(row, col, 1, 0, board, crosscheck_board_v)      # Down
    ):
        return True

    return False  # No valid access route


def can_reach_within_7(r: int, c: int, dr: int, dc: int, board: Board, crosscheck_board: CrossCheckBoard) -> bool:
    """
    Determines if the special square can be reached within 7 spaces.

    Conditions:
    - A **single tile** is fine **if the next space is empty**.
    - A **cross-check** is fine **if no two tiles immediately follow it**.

    Args:
        r (int): Row index of the special square.
        c (int): Column index of the special square.
        dr (int): Row direction (-1 for up, 1 for down, 0 for horizontal).
        dc (int): Column direction (-1 for left, 1 for right, 0 for vertical).
        board (Board): 15x15 Scrabble board.
        crosscheck_board (CrossCheckBoard): Cross-check constraints.

    Returns:
        bool: True if accessible within 7 spaces, False otherwise.
    """
    for i in range(1, 8):  # Max 7 spaces
        r_next, c_next = r + i * dr, c + i * dc

        if not (0 <= r_next < 15 and 0 <= c_next < 15):
            return False  # Out of bounds

        current_tile = board[r_next][c_next]

        # Case 1️⃣: If we hit a tile, check if the next space is empty
        if current_tile is not None:
            r_next_next, c_next_next = r_next + dr, c_next + dc
            if 0 <= r_next_next < 15 and 0 <= c_next_next < 15 and board[r_next_next][c_next_next] is not None:
                return False  # Tile immediately after → Blocked
            return True  # Otherwise, it's accessible

        # Case 2️⃣: If we hit a cross-check, check if the next two spaces are not both occupied
        crosscheck = crosscheck_board[r_next][c_next]
        if crosscheck and crosscheck.is_open_square:
            crosscheck = None

        if(crosscheck and len(crosscheck.valid_letters) == 0):
            return False
            
        if crosscheck and len(crosscheck.valid_letters) > 0:
            r_next_next, c_next_next = r_next + dr, c_next + dc
            r_next_next2, c_next_next2 = r_next_next + dr, c_next_next + dc

            if (0 <= r_next_next < 15 and 0 <= c_next_next < 15 and board[r_next_next][c_next_next] is not None and
                0 <= r_next_next2 < 15 and 0 <= c_next_next2 < 15 and board[r_next_next2][c_next_next2] is not None):
                return False  # Two tiles immediately after cross-check → Blocked
            
            return True  # Otherwise, it's accessible

    return False  # No valid access found


def compute_accessible_special_tiles(board: Board, crosscheck_board_h: CrossCheckBoard, crosscheck_board_v: CrossCheckBoard) -> Tuple[int, int]:
    """
    Computes the number of accessible TWS and DWS tiles on the board.

    Args:
        board (Board): 15x15 Scrabble board.
        crosscheck_board_h (CrossCheckBoard): Horizontal cross-check constraints.
        crosscheck_board_v (CrossCheckBoard): Vertical cross-check constraints.

    Returns:
        (int, int): Tuple containing (accessible TWS count, accessible DWS count)
    """
    tws_count = 0
    dws_count = 0

    for row in range(15):
        for col in range(15):
            tile_type = SPECIAL_TILES_LOCATIONS[row][col]
            if tile_type in {"TWS", "DWS"}:
                if is_accessible_special_tile(row, col, board, crosscheck_board_h, crosscheck_board_v):
                    if tile_type == "TWS":
                        tws_count += 1
                    elif tile_type == "DWS":
                        dws_count += 1

    return tws_count, dws_count


In [164]:
row = 124751
board = df_raw["board"][row]
cs_v = df_raw["cs_v"][row]
cs_h = df_raw["cs_h"][row]


In [165]:
compute_accessible_special_tiles(board, cs_h, cs_v)

(2, 9)

In [157]:
pretty_print_board(board)

 ★  .  .  ◆  .  .  .  ★  .  .  .  T  .  .  ★ 
 .  ✦  .  .  .  ▲  .  .  .  ▲  F  Y  .  ✦  . 
 .  .  ✦  .  .  .  ◆  .  ◆  .  U  N  ✦  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  R  E  D  .  ◆ 
 .  .  .  .  ✦  .  .  .  T  W  O  .  O  .  . 
 .  ▲  .  .  .  ▲  .  .  O  A  R  .  U  ▲  . 
 .  .  ◆  .  .  .  ◆  .  O  U  .  .  c  .  . 
 Q  .  .  G  .  .  D  U  N  K  .  L  I  .  ★ 
 A  .  ◆  Y  .  V  A  G  I  .  T  E  N  .  . 
 T  R  E  N  A  I  L  .  E  ▲  R  E  E  ▲  . 
 .  .  W  A  R  M  T  h  S  .  I  T  .  .  . 
 ◆  .  .  E  .  .  .  ◆  .  P  A  L  .  .  ◆ 
 .  .  ✦  C  .  .  ◆  .  ◆  O  D  E  ✦  .  . 
 .  ✦  .  I  .  ▲  .  .  .  M  I  .  .  ✦  . 
 ★  .  .  A  .  .  .  Z  O  E  C  I  A  .  ★ 


In [135]:
pretty_print_board_with_crosschecks(board, cs_v)

 3  F  4  B  5  .  0  D  I  S  S  o  L  V  E 
 4  A  K  A  1  ▲  ○  ○  5  T  4  ○  ○  ○  ○ 
 0  G  A  T  3  0  U  N  B  O  I  L  E  D  0 
 ◆  0  O  H  M  0  G  Y  A  N  2  ○  ○  ○  ◆ 
 .  5  N  0  I  6  ○  ○  7  Y  4  .  ○  .  . 
 .  ▲  ○  2  g  3  .  ○  14 I  6  5  N  5  . 
 .  .  ◆  4  R  1  9  A  W  N  2  15 A  16 . 
 ★  .  0  W  A  R  T  H  O  G  1  14 I  6  ★ 
 .  .  ◆  14 I  6  ○  2  P  4  ○  2  L  3  . 
 .  ▲  .  5  N  5  ○  ○  0  J  A  P  E  3  . 
 .  .  0  D  E  V  I  A  T  O  R  0  R  1  . 
 ◆  .  .  ○  ○  ○  6  U  0  E  C  U  S  0  ◆ 
 .  .  ✦  .  .  .  4  R  1  ○  ○  ○  ○  .  . 
 .  ✦  .  .  .  ▲  15 E  13 ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  14 I  6  .  .  ◆  .  .  ★ 


In [136]:
pretty_print_board_with_crosschecks(board, cs_h)

 ○  F  0  B  ○  .  ○  D  I  S  S  o  L  V  E 
 ○  A  K  A  ○  ▲  14 1  1  T  5  2  3  1  13
 ○  G  A  T  0  ○  U  N  B  O  I  L  E  D  ○ 
 ◆  1  O  H  M  ○  G  Y  A  N  6  3  13 4  ◆ 
 .  ○  N  2  I  ○  2  3  16 Y  ○  .  0  .  . 
 .  ▲  1  ○  g  ○  .  10 1  I  ○  ○  N  ○  . 
 .  .  ◆  3  R  4  5  A  W  N  ○  ○  A  ○  . 
 ★  .  ○  W  A  R  T  H  O  G  ○  ○  I  ○  ★ 
 .  .  ◆  2  I  1  4  3  P  0  2  2  L  ○  . 
 .  ▲  .  4  N  0  14 0  0  J  A  P  E  ○  . 
 .  .  ○  D  E  V  I  A  T  O  R  2  R  ○  . 
 ◆  .  .  4  1  0  6  U  4  E  C  U  S  ○  ◆ 
 .  .  ✦  .  .  .  ○  R  ○  2  3  8  0  .  . 
 .  ✦  .  .  .  ▲  ○  E  ○  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  ○  I  ○  .  .  ◆  .  .  ★ 


In [166]:
# Compute accessible special tiles for each board in the dataset
df_raw[["accessible_TWS_count", "accessible_DWS_count"]] = [
    compute_accessible_special_tiles(board, cs_h, cs_v)
    for board, cs_h, cs_v in tqdm(zip(df_raw["board"], df_raw["cs_h"], df_raw["cs_v"]), total=len(df_raw))
]



  0%|                                                                                                                                                         | 0/395715 [00:00<?, ?it/s][A
  0%|▎                                                                                                                                            | 785/395715 [00:00<00:50, 7838.61it/s][A
  1%|▋                                                                                                                                          | 1995/395715 [00:00<00:38, 10338.47it/s][A
  1%|█▏                                                                                                                                         | 3302/395715 [00:00<00:33, 11584.66it/s][A
  1%|█▋                                                                                                                                         | 4641/395715 [00:00<00:31, 12295.51it/s][A
  2%|██                                               

In [173]:
cache_features_df(df_raw, cached_features_path)

Filtered DataFrame cached successfully at: ../data/numerical_features.pkl


#### Multiple access

In [239]:
SPECIAL_TILES_CONNECTIONS = {
    ('TWS', 'TWS'): [
        ((0, 0), (0, 7)), ((0, 7), (0, 14)),
        ((14, 0), (14, 7)), ((14, 7), (14, 14)),
        ((0, 0), (7, 0)), ((7, 0), (14, 0)),
        ((0, 14), (7, 14)), ((7, 14), (14, 14))
    ],
    ('DWS', 'TLS'): [
        ((1, 1), (1, 5)), ((1, 9), (1, 13)),
        ((13, 1), (13, 5)), ((13, 9), (13, 13)),
        ((1, 1), (5, 1)), ((9, 1), (13, 1)),
        ((1, 13), (5, 13)), ((9, 13), (13, 13))
    ],
    ('DWS', 'DWS'): [
        ((4, 4), (4, 10)), ((10, 4), (10, 10)),
        ((4, 4), (10, 4)), ((4, 10), (10, 10)),
    ],
    ('DLS', 'TWS'): [
        ((0, 0), (0, 3)), ((0, 3), (0, 7)), ((0, 7), (0, 11)), ((0, 11), (0, 14)),
        ((7, 0), (7, 3)), ((7, 11), (7, 14)), ((14, 0), (14, 3)), ((14, 3), (14, 7)),
        ((14, 7), (14, 11)), ((14, 11), (14, 14)), ((0, 0), (3, 0)), ((3, 0), (7, 0)),
        ((7, 0), (11, 0)), ((11, 0), (14, 0)), ((0, 7), (3, 7)), ((11, 7), (14, 7)),
        ((0, 14), (3, 14)), ((3, 14), (7, 14)), ((7, 14), (11, 14)), ((11, 14), (14, 14))
    ]
}

def is_accessible_connection(
    start: Tuple[int, int],
    end: Tuple[int, int],
    board: Board,
    crosscheck_board_h: CrossCheckBoard,
    crosscheck_board_v: CrossCheckBoard,
    special_type: str
) -> bool:
    """
    Determines if there is an accessible connection between two special tiles.

    Args:
        start (Tuple[int, int]): Coordinates of the first special tile.
        end (Tuple[int, int]): Coordinates of the second special tile.
        board (Board): 15x15 Scrabble board.
        crosscheck_board_h (CrossCheckBoard): Horizontal cross-check constraints.
        crosscheck_board_v (CrossCheckBoard): Vertical cross-check constraints.
        special_type (str): The type of special tile combination (e.g., "TWS/TWS").

    Returns:
        bool: True if the special tile connection is accessible, False otherwise.
    """
    r1, c1 = start
    r2, c2 = end

    # If either special tile is occupied, connection is blocked
    if board[r1][c1] is not None or board[r2][c2] is not None:
        return False

    # Determine the direction of the connection
    if r1 == r2:  # Horizontal connection
        r, c_min, c_max = r1, min(c1, c2), max(c1, c2)
        crosscheck_board = crosscheck_board_h
    else:  # Vertical connection
        c, r_min, r_max = c1, min(r1, r2), max(r1, r2)
        crosscheck_board = crosscheck_board_v

    num_tiles_between = 0
    num_valid_cross_checks = 0

    # Check if either start or end position has a cross-check
    start_crosscheck = crosscheck_board[r1][c1] if crosscheck_board[r1][c1] and not crosscheck_board[r1][c1].is_open_square else None
    end_crosscheck = crosscheck_board[r2][c2] if crosscheck_board[r2][c2] and not crosscheck_board[r2][c2].is_open_square else None

    # If either start or end position has a cross-check with an empty valid letter set, it's blocked
    if (start_crosscheck and len(start_crosscheck.valid_letters) == 0) or \
       (end_crosscheck and len(end_crosscheck.valid_letters) == 0):
        return False

    # Count valid cross-checks at start and end positions
    if start_crosscheck and len(start_crosscheck.valid_letters) > 0:
        num_valid_cross_checks += 1
    if end_crosscheck and len(end_crosscheck.valid_letters) > 0:
        num_valid_cross_checks += 1

    # Iterate through all spaces between the two tiles
    if r1 == r2:  # Horizontal connection
        for c in range(c_min + 1, c_max):
            if board[r][c] is not None:
                num_tiles_between += 1
            elif crosscheck_board[r][c]:
                crosscheck = crosscheck_board[r][c]
                if crosscheck.is_open_square:
                    continue
                if len(crosscheck.valid_letters) == 0:
                    return False  # Blocked because cross-check has no valid letters
                num_valid_cross_checks += 1
    else:  # Vertical connection
        for r in range(r_min + 1, r_max):
            if board[r][c] is not None:
                num_tiles_between += 1
            elif crosscheck_board[r][c]:
                crosscheck = crosscheck_board[r][c]
                if crosscheck.is_open_square:
                    continue
                if len(crosscheck.valid_letters) == 0:
                    return False  # Blocked because cross-check has no valid letters
                num_valid_cross_checks += 1

    # Access rules:
    if num_tiles_between > 1 or num_valid_cross_checks > 1:
        return False
    if num_tiles_between == 0 and num_valid_cross_checks == 0:
        return False
    if special_type == "TWS/TWS":
        if num_tiles_between == 1 and num_valid_cross_checks == 0:
            return True
        else:
            return False
    if num_tiles_between == 1 or num_valid_cross_checks == 1:
        return True

    return False  # No valid connection found


def compute_accessible_special_connections(
    board: Board, crosscheck_board_h: CrossCheckBoard, crosscheck_board_v: CrossCheckBoard
) -> Dict[str, int]:
    """
    Computes the number of accessible special tile connections.

    Args:
        board (Board): 15x15 Scrabble board.
        crosscheck_board_h (CrossCheckBoard): Horizontal cross-check constraints.
        crosscheck_board_v (CrossCheckBoard): Vertical cross-check constraints.

    Returns:
        Dict[str, int]: Dictionary with counts of accessible connections for each combination type.
    """
    accessible_connections = {
        "TWS/TWS": 0,
        "DWS/TLS": 0,
        "DWS/DWS": 0,
        "DLS/TWS": 0
    }

    for special_type, pairs in SPECIAL_TILES_CONNECTIONS.items():
        for start, end in pairs:
            if is_accessible_connection(start, end, board, crosscheck_board_h, crosscheck_board_v, "/".join(special_type)):
                accessible_connections["/".join(special_type)] += 1

    return accessible_connections



 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  .  Q  .  .  ◆ 
 .  .  .  .  ✦  .  .  .  .  .  ✦  I  N  .  . 
 .  ▲  .  .  .  ▲  .  .  .  ▲  .  N  O  ▲  . 
 .  .  ◆  .  .  .  ◆  .  ◆  .  .  .  N  .  . 
 ★  .  .  ◆  .  .  .  G  R  A  P  H  s  .  ★ 
 .  .  ◆  .  .  .  ◆  .  ◆  .  O  I  K  .  . 
 .  ▲  .  .  .  ▲  .  .  .  ▲  .  .  I  D  . 
 .  .  .  .  ✦  .  .  .  .  .  ✦  .  D  I  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  .  ✦  .  T  ◆ 
 .  .  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  A  . 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [242]:
for row in range(100000, 200000):
    board = df_raw["board"][row]
    cs_v = df_raw["cs_v"][row]
    cs_h = df_raw["cs_h"][row]
    conns = compute_accessible_special_connections(board, cs_h, cs_v)
    total_conns = 0
    for _, v in conns.items():
        total_conns += v
    if total_conns > 5:
        print(row)
        break

101133


In [243]:
board = df_raw["board"][row]
cs_v = df_raw["cs_v"][row]
cs_h = df_raw["cs_h"][row]

pretty_print_board(board)

 ★  .  .  ◆  .  .  .  ★  .  .  F  ◆  .  .  ★ 
 .  ✦  .  .  .  A  .  .  .  ▲  E  .  .  ✦  . 
 .  .  ✦  .  .  D  ◆  .  ◆  .  U  .  ✦  .  . 
 ◆  .  .  ✦  .  N  .  ◆  .  .  A  D  .  .  ◆ 
 .  .  .  .  ✦  E  .  .  .  .  R  E  .  .  . 
 .  ▲  .  .  .  X  .  .  .  L  .  A  .  ▲  . 
 .  .  ◆  .  .  A  A  .  F  O  G  S  ◆  .  . 
 ★  .  .  ◆  .  .  V  I  E  W  .  O  .  .  ★ 
 .  .  ◆  .  .  .  O  .  ◆  P  H  I  ◆  .  . 
 .  ▲  .  .  .  ▲  .  .  .  ▲  A  L  .  ▲  . 
 .  .  .  .  ✦  .  .  .  .  .  B  .  .  .  . 
 ◆  .  .  ✦  .  .  .  ◆  .  .  U  ✦  .  .  ◆ 
 .  .  ✦  .  .  .  ◆  .  W  E  S  T  I  N  G 
 .  ✦  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  .  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [244]:
compute_accessible_special_connections(board, cs_h, cs_v)

{'TWS/TWS': 2, 'DWS/TLS': 2, 'DWS/DWS': 0, 'DLS/TWS': 2}

In [232]:
pretty_print_board_with_crosschecks(board, cs_h)

 0  14 15 0  N  E  t  S  U  K  E  ○  .  .  ★ 
 V  I  A  T  O  R  4  4  8  4  13 .  .  ✦  . 
 0  6  16 R  13 9  ◆  .  ◆  .  .  1  4  14 0 
 ◆  .  ○  I  ○  .  .  ◆  1  14 0  B  R  I  M 
 .  .  ○  N  6  0  17 0  D  E  F  O  1  6  E 
 .  ▲  ○  A  u  T  O  C  A  D  E  16 .  ○  L 
 .  .  ○  R  8  I  2  2  0  2  13 .  ◆  ○  A 
 ★  .  ○  Y  ○  P  H  I  Z  0  .  ◆  .  ○  M 
 .  .  ◆  0  .  0  0  F  E  W  ○  .  ◆  ○  E 
 .  ▲  .  .  ○  C  U  S  P  Y  ○  .  .  ○  D 
 .  .  .  0  6  1  0  0  0  L  ○  .  .  .  0 
 ◆  17 3  T  R  I  A  B  L  E  ○  ✦  .  .  ◆ 
 ○  O  W  I  E  0  X  I  1  0  4  .  ✦  .  . 
 .  17 2  12 18 V  E  N  G  E  R  ○  .  ✦  . 
 ★  .  .  ◆  .  0  3  G  O  A  1  ◆  .  .  ★ 


In [250]:
from tqdm import tqdm  # For proper behavior in Jupyter

results = []
for board, cs_h, cs_v in tqdm(zip(df_raw["board"], df_raw["cs_h"], df_raw["cs_v"]), total=len(df_raw)):
    results.append(compute_accessible_special_connections(board, cs_h, cs_v))

# Assign values explicitly
df_raw["available_TWS_TWS"] = [r["TWS/TWS"] for r in results]
df_raw["available_DWS_TLS"] = [r["DWS/TLS"] for r in results]
df_raw["available_DWS_DWS"] = [r["DWS/DWS"] for r in results]
df_raw["available_DLS_TWS"] = [r["DLS/TWS"] for r in results]



  0%|                                                                                                                                                         | 0/395715 [00:00<?, ?it/s][A
  0%|▍                                                                                                                                          | 1175/395715 [00:00<00:33, 11732.64it/s][A
  1%|▉                                                                                                                                          | 2597/395715 [00:00<00:29, 13191.01it/s][A
  1%|█▋                                                                                                                                         | 4968/395715 [00:00<00:21, 17991.03it/s][A
  2%|██▊                                                                                                                                        | 7970/395715 [00:00<00:17, 22736.49it/s][A
  3%|███▋                                             

In [254]:
tt_df = df_raw[df_raw["available_TWS_TWS"] > 1]

Unnamed: 0,board,board_rep,score_diff,total_unseen_tiles,leave_A,leave_B,leave_C,leave_D,leave_E,leave_F,...,8_letter_bingo_lanes_list,8_letter_bingos,7_letter_bingo_lanes_list,7_letter_bingos,accessible_TWS_count,accessible_DWS_count,available_TWS_TWS,available_DWS_TLS,available_DWS_DWS,available_DLS_TWS
430,"[[None, L, None, None, None, None, None, None,...",1L9VEND/1I8TI3/1PO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,-112,28,0,0,0,0,0,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",19,[],0,5,3,2,0,0,3
437,"[[None, I, None, None, None, None, None, None,...",1I9VEND/1L8TI3/1LO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,-118,28,0,0,0,0,0,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",19,[],0,5,3,2,0,0,3
439,"[[None, O, None, None, None, None, None, None,...",1O9VEND/1I8TI3/1LO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,-118,28,0,0,0,0,0,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",19,[],0,5,3,2,0,0,3
448,"[[None, L, None, None, None, None, None, None,...",1L9VEND/1O8TI3/1PO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,-112,28,0,0,0,0,0,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",19,[],0,5,3,2,0,0,3
508,"[[None, F, None, None, None, None, None, None,...",1F9VEND/1I8TI3/1BO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,150,20,0,0,0,0,1,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",13,[],0,5,3,2,0,0,3
511,"[[None, F, None, None, None, None, None, None,...",1F9VEND/1E8TI3/1WO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,153,20,0,1,0,0,0,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",13,[],0,5,3,2,0,0,3
513,"[[None, W, None, None, None, None, None, None,...",1W9VEND/1E8TI3/1BO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,150,20,0,0,0,0,0,1,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",13,[],0,5,3,2,0,0,3
515,"[[None, W, None, None, None, None, None, None,...",1W9VEND/1E8TI3/1MO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,150,20,0,1,0,0,0,1,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",13,[],0,5,3,2,0,0,3
516,"[[None, F, None, None, None, None, None, None,...",1F9VEND/1E8TI3/1MO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,150,20,0,1,0,0,0,0,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",13,[],0,5,3,2,0,0,3
518,"[[None, M, None, None, None, None, None, None,...",1M9VEND/1I8TI3/1BO1T4CAGE2/T1DZO4RESH2/o1DOPA3...,148,20,0,0,0,0,1,1,...,"[(0, 1, H, 2), (0, 13, V, 1), (0, 14, V, 1), (...",13,[],0,5,3,2,0,0,3


In [258]:
row = 518
board = df_raw["board"][row]
cs_v = df_raw["cs_v"][row]
cs_h = df_raw["cs_h"][row]

pretty_print_board(board)

 ★  M  .  ◆  .  .  .  ★  .  .  .  V  E  N  D 
 .  I  .  .  .  ▲  .  .  .  ▲  T  I  .  ✦  . 
 .  B  O  .  T  .  ◆  .  ◆  C  A  G  E  .  . 
 T  .  D  Z  O  .  .  ◆  .  R  E  S  H  .  ◆ 
 o  .  D  O  P  A  .  .  .  U  N  .  .  .  . 
 Q  ▲  .  L  O  U  D  .  .  X  I  .  .  ▲  . 
 U  R  B  S  .  L  O  A  F  .  A  .  ◆  .  . 
 E  .  L  ◆  K  A  .  A  Y  E  s  ◆  .  .  ★ 
 T  R  I  O  R  .  ◆  .  ◆  .  .  .  ◆  .  . 
 S  I  P  .  E  W  .  .  .  ▲  .  .  .  ▲  . 
 .  C  .  .  N  O  .  .  .  .  ✦  .  .  .  . 
 ◆  H  I  N  G  E  R  ◆  .  .  .  ✦  .  .  ◆ 
 .  I  ✦  .  .  .  ◆  .  ◆  .  .  .  ✦  .  . 
 .  N  .  .  .  ▲  .  .  .  ▲  .  .  .  ✦  . 
 ★  G  .  ◆  .  .  .  ★  .  .  .  ◆  .  .  ★ 


In [259]:
compute_accessible_special_connections(board, cs_h, cs_v)

{'TWS/TWS': 2, 'DWS/TLS': 0, 'DWS/DWS': 0, 'DLS/TWS': 3}

In [260]:
cache_features_df(df_raw, cached_features_path)

Filtered DataFrame cached successfully at: ../data/numerical_features.pkl


### Quandrant features

In [261]:
from features.quadrant_features import count_tiles_in_quadrants

In [262]:
import time
from tqdm.auto import tqdm

# Track execution time
start_time = time.time()

# Compute tile counts in quadrants for each board in the dataset
quadrant_counts_list = [
    count_tiles_in_quadrants(board) for board in tqdm(df_raw["board"], total=len(df_raw))
]

# Convert list of dictionaries into separate columns in df_raw
df_quadrants = pd.DataFrame(quadrant_counts_list)
df_quadrants.columns = [f"quadrant_counts_{col}" for col in df_quadrants.columns]

# Merge back into df_raw
df_raw = pd.concat([df_raw, df_quadrants], axis=1)

# Print execution time
end_time = time.time()
print(f"Quadrant tile counts computed in {end_time - start_time:.2f} seconds.")


  from .autonotebook import tqdm as notebook_tqdm

  0%|                                                                                                                                                         | 0/395715 [00:00<?, ?it/s][A
  0%|▌                                                                                                                                          | 1669/395715 [00:00<00:23, 16685.32it/s][A
  1%|█▉                                                                                                                                         | 5396/395715 [00:00<00:13, 28790.84it/s][A
  3%|████▍                                                                                                                                     | 12886/395715 [00:00<00:07, 49839.90it/s][A
  5%|███████                                                                                                                                   | 20323/395715 [00:00<00:06, 59316.85it/s][A
  7%

Quadrant tile counts computed in 21.41 seconds.


In [263]:
df_raw.columns

Index(['board', 'board_rep', 'score_diff', 'total_unseen_tiles', 'leave_A',
       'leave_B', 'leave_C', 'leave_D', 'leave_E', 'leave_F', 'leave_G',
       'leave_H', 'leave_I', 'leave_J', 'leave_K', 'leave_L', 'leave_M',
       'leave_N', 'leave_O', 'leave_P', 'leave_Q', 'leave_R', 'leave_S',
       'leave_T', 'leave_U', 'leave_V', 'leave_W', 'leave_X', 'leave_Y',
       'leave_Z', 'leave_?', 'unseen_A', 'unseen_B', 'unseen_C', 'unseen_D',
       'unseen_E', 'unseen_F', 'unseen_G', 'unseen_H', 'unseen_I', 'unseen_J',
       'unseen_K', 'unseen_L', 'unseen_M', 'unseen_N', 'unseen_O', 'unseen_P',
       'unseen_Q', 'unseen_R', 'unseen_S', 'unseen_T', 'unseen_U', 'unseen_V',
       'unseen_W', 'unseen_X', 'unseen_Y', 'unseen_Z', 'unseen_?', 'winProb',
       'expPointDiff', 'cs_h', 'cs_v', '8_letter_bingo_lanes_list',
       '8_letter_bingos', '7_letter_bingo_lanes_list', '7_letter_bingos',
       'accessible_TWS_count', 'accessible_DWS_count', 'available_TWS_TWS',
       'available_DWS_

In [264]:
cache_features_df(df_raw, cached_features_path)

Filtered DataFrame cached successfully at: ../data/numerical_features.pkl
