In [2]:
import numpy as np
import re

In [3]:
# constants
BOARD_SIZE = 9
SECTOR_SIZE = 3
VALID_VALUES = '123456789'

### Numpy vectorized functions

In [4]:
get_length = np.vectorize(len)

cell_contains = np.vectorize(lambda x, y : y in x)

strip_pattern = np.vectorize(lambda x, y : str(x).replace(y,''))

---

### Helper functions

In [28]:
def remove_value(arr, pattern):
    numpy_iterator = np.nditer(arr, flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
    for val in pattern:
        for cell in numpy_iterator:
            if str(cell) != pattern:
                cell[...] = str(cell).replace(val,'')

def get_sector_by_number(arr, sector):
    row = sector // SECTOR_SIZE * SECTOR_SIZE
    col = SECTOR_SIZE * (sector % SECTOR_SIZE)
    return arr[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE]

def get_sector_by_coord(arr, row, col):
    r_start = row // SECTOR_SIZE * SECTOR_SIZE
    c_start = col // SECTOR_SIZE * SECTOR_SIZE
    r_end = r_start + SECTOR_SIZE
    c_end = c_start + SECTOR_SIZE
    return arr[r_start:r_end,c_start:c_end]

def update_neighbors(arr, row, col, val):
    # operate on the specified row
    remove_value(arr[row,:], val)
    # operate on the specified column
    remove_value(arr[:,col], val)

    # operate on the sector that the row and col belong to
    r_start = row // SECTOR_SIZE * SECTOR_SIZE
    c_start = col // SECTOR_SIZE * SECTOR_SIZE
    r_end = r_start + SECTOR_SIZE
    c_end = c_start + SECTOR_SIZE
    remove_value(arr[r_start:r_end,c_start:c_end], val)

def pretty_flat(board):
    width = np.max(get_length(board)) + 2
    line = '+' + '-'*(width*3) + '+' + '-'*(width*3) + '+' + '-'*(width*3) + '+'
    for idx, cell in enumerate(board):
        if not idx % 27:
            print(line)
        if not idx % 3:
            print('|', end='')
        print(F'{cell:^{width}s}', end='')
        if not ((idx + 1) % 9):
            print('|')
    print(line)

# this will run recursively until there are no more singletons left
# 'solved' is the final board and 'wip' is work in progress
def clean_singletons(solved,wip):
    clean_counter = 0
    coordinate_list = np.argwhere(get_length(wip)==1)
    while coordinate_list.size:
        clean_counter += 1
        for idc in coordinate_list:
            row, col = idc
            singleton = wip[row,col]
            # update the solved array with the singleton value
            solved[row,col] = singleton

            # empty this cell in the wip as it has been solved
            wip[row,col] = '' 

            # remove the singleton from every neighbor
            update_neighbors(wip, row, col, singleton)

        # check for more singletons
        coordinate_list = np.argwhere(get_length(wip)==1)

    return clean_counter

def clean_disguised_singletons(arr):
    altered_coords = set({})
    for val in '123456789':
        # search every row
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(arr[idx,:],val)) == 1:
                col = np.argwhere(cell_contains(arr[idx,:],val))[0][0]
                arr[idx,col] = val
                altered_coords.add((idx,col))

        # search every column
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(arr[:,idx],val)) == 1:
                row = np.argwhere(cell_contains(arr[:,idx],val))[0][0]
                arr[row,idx] = val
                altered_coords.add((row,idx))

        # search every sector
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(get_sector_by_number(arr,idx),val)) == 1:
                coords = np.argwhere(cell_contains(get_sector_by_number(arr,idx),val))
                row = coords[0][0] + SECTOR_SIZE * (idx // SECTOR_SIZE)
                col = coords[0][1] + SECTOR_SIZE * (idx % SECTOR_SIZE)
                arr[row,col] = val
                altered_coords.add((row,col))
    return len(altered_coords)

---

### Load the puzzle

In [7]:
possible = np.full((9,9),'123456789')
board = np.full((9,9),'')

# hard
puzzle = re.sub('\W','0','2.......6.5..8..1...4...9...7.3.1......82.......7.5.3...9...4...8..1..5.6.......2')
# easy
puzzle = re.sub('\W','0','5..98.67.6......31.2.613.4..968.21.7..8..5.9.7.319....962.7..1.1.5...76..7.5..9..')
# medium
puzzle = re.sub('\W','0','.29.71..3..8...6..3...5....5.....97......4...4.75.8..1.6.42.3..2..9....6.916...52')
# hard
puzzle = re.sub('\W','0','..791.5....1.....3..9.4...2.4...83.....3.1....6..5...8.2..9...5...........4.8..7.')


for idx, value in enumerate(puzzle):
    if value != '0':
        row = idx // BOARD_SIZE
        col = idx - row * BOARD_SIZE
        possible[row,col] = value

solved = len(np.argwhere(get_length(np.array(list(re.sub('0','',puzzle))))==1))
print(F"Puzzle starts with {solved} solved locations.")

pretty_flat(possible.flatten())

Puzzle starts with 23 solved locations.
+---------------------------------+---------------------------------+---------------------------------+
| 123456789  123456789      7     |     9          1      123456789 |     5      123456789  123456789 |
| 123456789  123456789      1     | 123456789  123456789  123456789 | 123456789  123456789      3     |
| 123456789  123456789      9     | 123456789      4      123456789 | 123456789  123456789      2     |
+---------------------------------+---------------------------------+---------------------------------+
| 123456789      4      123456789 | 123456789  123456789      8     |     3      123456789  123456789 |
| 123456789  123456789  123456789 |     3      123456789      1     | 123456789  123456789  123456789 |
| 123456789      6      123456789 | 123456789      5      123456789 | 123456789  123456789      8     |
+---------------------------------+---------------------------------+---------------------------------+
| 123456789      2      

---

### Step 1: Handle singletons

Look at every cell that has only one **possible** value. This is the only step that makes modifications to actual board. Therefore, this will be the first and last steps of solving any puzzle.

This cell calls a function that runs repeatedly until it cannot simplify the board any more.

In [24]:
response = clean_singletons(board,possible)
print(F"Performed {response} singleton sweeps.")

locations = len(np.argwhere(get_length(board)==1))
if locations == BOARD_SIZE**2:
    print("Board has been solved!!!")
    pretty_flat(board.flatten())
else:
    print(F"Puzzle currently has {locations} solved locations.")
    pretty_flat(board.flatten())
    pretty_flat(possible.flatten())


Performed 0 singleton sweeps.
Puzzle currently has 56 solved locations.
+---------+---------+---------+
| 2     7 | 9  1    | 5     4 |
| 4     1 |         |       3 |
| 6     9 |    4    |       2 |
+---------+---------+---------+
| 1  4  2 |       8 | 3  5  9 |
| 5  9  8 | 3  2  1 | 6  4  7 |
| 7  6  3 | 4  5  9 |       8 |
+---------+---------+---------+
| 8  2  6 | 1  9  7 | 4  3  5 |
| 9  7  5 |    3    |       1 |
| 3  1  4 |    8    |    7  6 |
+---------+---------+---------+
+---------------------+---------------------+---------------------+
|         38          |                36   |         68          |
|         58          | 25678   67     256  |  78     689         |
|         358         |  578           35   |  178    18          |
+---------------------+---------------------+---------------------+
|                     |  67     67          |                     |
|                     |                     |                     |
|                     |             

---

### Step 2: Handle disguised singletons

In [26]:
response = clean_disguised_singletons(possible)
print(F"Located {response} disguised singletons.")
pretty_flat(possible.flatten())
pretty_flat(board.flatten())
_ = clean_singletons(board,possible)

Located 0 disguised singletons.
+------+------+------+
|      |      |      |
|      |      |      |
|      |      |      |
+------+------+------+
|      |      |      |
|      |      |      |
|      |      |      |
+------+------+------+
|      |      |      |
|      |      |      |
|      |      |      |
+------+------+------+
+---------+---------+---------+
| 2  8  7 | 9  1  3 | 5  6  4 |
| 4  5  1 | 2  7  6 | 8  9  3 |
| 6  3  9 | 8  4  5 | 7  1  2 |
+---------+---------+---------+
| 1  4  2 | 7  6  8 | 3  5  9 |
| 5  9  8 | 3  2  1 | 6  4  7 |
| 7  6  3 | 4  5  9 | 1  2  8 |
+---------+---------+---------+
| 8  2  6 | 1  9  7 | 4  3  5 |
| 9  7  5 | 6  3  4 | 2  8  1 |
| 3  1  4 | 5  8  2 | 9  7  6 |
+---------+---------+---------+


---

### Step 3: Handle matching patterns

In [22]:
def get_common_patterns(arr):
    return_list = []
    pattern_dict = dict()
    numpy_iterator = np.nditer(arr, flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
    for cell in numpy_iterator:
        pattern_dict[str(cell)] = pattern_dict.get(str(cell),0) + 1
    for k,v in pattern_dict.items():
        if len(k) == v:
            return_list.append(k)
    return return_list

def remove_pattern_from_group(arr, patterns):
    for patt in patterns:
        numpy_iterator = np.nditer(arr, flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
        for cell in numpy_iterator:
            if str(cell) != patt:
                for val in list(patt):
                    cell[...] = str(cell).replace(val,'')

for row in range(BOARD_SIZE):
    patterns = get_common_patterns(possible[row,:])
    if len(patterns):
        print(F"row: {patterns}")
    remove_pattern_from_group(possible[row,:], patterns)
for col in range(BOARD_SIZE):
    patterns = get_common_patterns(possible[:,col])
    if len(patterns):
        print(F"col: {patterns}")
    remove_pattern_from_group(possible[:,col], patterns)
for row in range(0,BOARD_SIZE,3):
    for col in range(0,BOARD_SIZE,3):
        patterns = get_common_patterns(possible[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE])
        if len(patterns):
            print(F"sector: {patterns}")
        remove_pattern_from_group(possible[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE], patterns)

pretty_flat(possible.flatten())
pretty_flat(board.flatten())
_ = clean_singletons(board,possible)

row: ['67']
row: ['12']
col: ['24']
col: ['9']
sector: ['24']
sector: ['67']
sector: ['9', '12']
sector: ['6']
+---------------------+---------------------+---------------------+
|  24     38          |                236  |         468    46   |
|  24     58          | 25678   267   2567  | 46789  4689         |
|         358         |  578           357  |  178    18          |
+---------------------+---------------------+---------------------+
|                     |  67     67          |          5      9   |
|                     |          2          |  46     46     467  |
|                     |                     |  12     12          |
+---------------------+---------------------+---------------------+
|                     |  17            47   |  14                 |
|                     |  126           246  | 1248   1248    14   |
|                     |  256           256  |  29             6   |
+---------------------+---------------------+---------------------+
+----

---

### Handle rare

In [27]:
def convert_sector_row(sector, row):
    return sector // SECTOR_SIZE * SECTOR_SIZE + row

def convert_sector_col(sector, col):
    return (sector % SECTOR_SIZE) * SECTOR_SIZE + col

def find_rare(arr):
    # in each sector, look if there are any possible values that exist only in
    # a row or column
    for idx in range(BOARD_SIZE):
        for value in VALID_VALUES:
            # get coordinates for all cells containing this value
            coordinate_list = np.argwhere(cell_contains(get_sector_by_number(arr,idx), value))
            # see if all these coordinates are in one row or one column
            rows = set({})
            cols = set({})
            for coord in coordinate_list:
                rows.add(coord[0])
                cols.add(coord[1])
            if len(rows) == 1:
                print(F"Every {value} in sector {idx} is in one row: {convert_sector_row(idx,coord[0])}")
                actual_row = convert_sector_row(idx, coord[0])
                actual_col_list = [convert_sector_col(idx, x) for x in cols]
                numpy_iterator = np.nditer(arr[actual_row,:], flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
                for cell in numpy_iterator:
                    if numpy_iterator.multi_index[0] not in actual_col_list:
                        cell[...] = str(cell).replace(value,'')
                        # print(F"replaced {value} in {numpy_iterator.multi_index}")
            if len(cols) == 1:
                print(F"Every {value} in sector {idx} is in one column: {convert_sector_col(idx,coord[1])}")
                actual_col = convert_sector_col(idx, coord[1])
                actual_row_list = [convert_sector_row(idx, x) for x in rows]
                numpy_iterator = np.nditer(arr[:,actual_col], flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
                for cell in numpy_iterator:
                    if numpy_iterator.multi_index[0] not in actual_row_list:
                        cell[...] = str(cell).replace(value,'')
                        # print(F"replaced {value} in {numpy_iterator.multi_index}")

find_rare(possible)
pretty_flat(possible.flatten())

+------+------+------+
|      |      |      |
|      |      |      |
|      |      |      |
+------+------+------+
|      |      |      |
|      |      |      |
|      |      |      |
+------+------+------+
|      |      |      |
|      |      |      |
|      |      |      |
+------+------+------+


---

### Test the board

In [None]:
pretty_flat

In [37]:
coordinate_list = np.argwhere(cell_contains(get_sector_by_number(possible,2), '7'))

array([[1, 0],
       [2, 0]])

In [33]:
get_sector_by_number(possible,2)

array([['', '468', '46'],
       ['46789', '4689', ''],
       ['1678', '168', '']], dtype='<U9')