# Sudoku solver using numpy

In [87]:
import numpy as np

In [88]:
# constants
BOARD_SIZE = 9
SECTOR_SIZE = 3
VALID_VALUES = '123456789'

---

## Helper functions

### Numpy vectorized functions

In [89]:
"""Returns the length of the contents of each cell."""
get_length = np.vectorize(len)

"""Looks for the specified pattern in each cell."""
cell_contains = np.vectorize(lambda x, y : y in x)

"""Remove a specific string from every location in a numpy array."""
strip_pattern = np.vectorize(lambda x, y : str(x).replace(y,''))

### Generic helpers

In [90]:
def get_sector_by_number(arr, sector):
    """Returns one sector of an array by index between zero and BOARD_SIZE."""
    row = sector // SECTOR_SIZE * SECTOR_SIZE
    col = SECTOR_SIZE * (sector % SECTOR_SIZE)
    return arr[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE]


def convert_sector_row(sector, row):
    """Returns the absolute row number when given a row number within a sector."""
    return sector // SECTOR_SIZE * SECTOR_SIZE + row


def convert_sector_col(sector, col):
    """Returns the absolute column number when given a column number with a sector."""
    return (sector % SECTOR_SIZE) * SECTOR_SIZE + col


def show_pretty_board(board):
    """Print a 1D board into a nice 2D grid. Use numpy.flatten() to convert to 1D array."""
    width = np.max(get_length(board)) + 2
    line = '+' + '-'*(width*3) + '+' + '-'*(width*3) + '+' + '-'*(width*3) + '+'
    for idx, cell in enumerate(board):
        if not idx % 27:
            print(line)
        if not idx % 3:
            print('|', end='')
        print(F'{cell:^{width}s}', end='')
        if not ((idx + 1) % 9):
            print('|')
    print(line)

### Algorithms

In [91]:
def clean_singletons(solved,wip):
    """Transfers all singletons from the WIP array to the final array, cleans
    that singleton pattern from all neighbors, and returns the number of passes
    performed on the WIP array. This will run until there are no singletons
    left in the WIP array.
    
    Keyword arguments:
    solved -- numpy array containing only solved elements
    wip -- work in progress numpy array containing possible values for each cell
    """
    sweep_counter = 0
    coordinate_list = np.argwhere(get_length(wip)==1)
    while coordinate_list.size:
        sweep_counter += 1
        for idc in coordinate_list:
            row, col = idc
            singleton = wip[row,col]
            # update the solved array with the singleton value
            solved[row,col] = singleton

            # remove the singleton from every neighbor
            wip[row,:] = strip_pattern(wip[row,:],singleton)
            wip[:,col] = strip_pattern(wip[:,col],singleton)
            # operate on the sector that the row and col belong to
            r_start = row // SECTOR_SIZE * SECTOR_SIZE
            c_start = col // SECTOR_SIZE * SECTOR_SIZE
            r_end = r_start + SECTOR_SIZE
            c_end = c_start + SECTOR_SIZE
            wip[r_start:r_end,c_start:c_end] = strip_pattern(
                wip[r_start:r_end,c_start:c_end],singleton
            )

        # check for more singletons
        coordinate_list = np.argwhere(get_length(wip)==1)

    # return the number of times the while loop executed
    return sweep_counter

In [92]:
def identify_disguised_singletons(arr):
    """Sets any cells containing disguised singleton values to the actual singleton
    value and returns the number of cells modified."""
    altered_coords = set({})
    for val in '123456789':
        # search every row
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(arr[idx,:],val)) == 1:
                col = np.argwhere(cell_contains(arr[idx,:],val))[0][0]
                arr[idx,col] = val
                altered_coords.add((idx,col))

        # search every column
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(arr[:,idx],val)) == 1:
                row = np.argwhere(cell_contains(arr[:,idx],val))[0][0]
                arr[row,idx] = val
                altered_coords.add((row,idx))

        # search every sector
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(get_sector_by_number(arr,idx),val)) == 1:
                coords = np.argwhere(cell_contains(get_sector_by_number(arr,idx),val))
                row = coords[0][0] + SECTOR_SIZE * (idx // SECTOR_SIZE)
                col = coords[0][1] + SECTOR_SIZE * (idx % SECTOR_SIZE)
                arr[row,col] = val
                altered_coords.add((row,col))
    return len(altered_coords)

In [93]:
def handle_rc_owners(arr):
    # in each sector, look if there are any possible values that exist only in
    # a row or column
    owner_count = 0
    for idx in range(BOARD_SIZE):
        for value in VALID_VALUES:
            # get coordinates for all cells containing this value
            coordinate_list = np.argwhere(cell_contains(get_sector_by_number(arr,idx), value))
            # see if all these coordinates are in one row or one column
            rows = set({})
            cols = set({})
            for coord in coordinate_list:
                rows.add(coord[0])
                cols.add(coord[1])

            # if len(rows) is 1 then every {value} in the current sector is in one row
            if len(rows) == 1:
                owner_count += 1
                actual_row = convert_sector_row(idx, coord[0])
                actual_col_list = [convert_sector_col(idx, x) for x in cols]
                numpy_iterator = np.nditer(arr[actual_row,:], flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
                for cell in numpy_iterator:
                    if numpy_iterator.multi_index[0] not in actual_col_list:
                        cell[...] = str(cell).replace(value,'')
            
            # if len(cols) is 1 then every {value} in the current sector is in one column
            if len(cols) == 1:
                owner_count += 1
                actual_col = convert_sector_col(idx, coord[1])
                actual_row_list = [convert_sector_row(idx, x) for x in rows]
                numpy_iterator = np.nditer(arr[:,actual_col], flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
                for cell in numpy_iterator:
                    if numpy_iterator.multi_index[0] not in actual_row_list:
                        cell[...] = str(cell).replace(value,'')
                        # print(F"replaced {value} in {numpy_iterator.multi_index}")

    return owner_count

---

## Load the puzzle

| |  |  |  |
|:------:|:------:|:------:|:------:|
|  | 123456789 | 123456789 | 123456789 |
|  | 123456789 | 123456789 | 123456789 |
|  | 123456789 | 123456789 | 123456789 |

When the puzzle is read in, the "possible" cell is overwritten by the given value.

| |  |  |  |
|:------:|:------:|:------:|:------:|
|  | 123456789 | 123456789 | 5 |
|  | 123456789 | 123456789 | 123456789 |
|  | 123456789 | 123456789 | 123456789 |



In [94]:
possible = np.full((9,9),'123456789')
board = np.full((9,9),'')

# hard
puzzle = '2.......6.5..8..1...4...9...7.3.1......82.......7.5.3...9...4...8..1..5.6.......2'
# easy
puzzle = '5..98.67.6......31.2.613.4..968.21.7..8..5.9.7.319....962.7..1.1.5...76..7.5..9..'
# medium
puzzle = '.29.71..3..8...6..3...5....5.....97......4...4.75.8..1.6.42.3..2..9....6.916...52'
# hard
puzzle = '..791.5....1.....3..9.4...2.4...83.....3.1....6..5...8.2..9...5...........4.8..7.'
# hardest ever
puzzle = '1....7.9..3..2...8..96..5....53..9...1..8...26....4...3......1..4......7..7...3..'
# hard via sudoku.com
puzzle = '..5........24.1.7.3.4...56...............79848...9..1....2..1...9..7...2.183...4.'
# expert via sudoku.com
puzzle = '.....2..3.4...16.7..1.....4.8.1.....43.....62...7..............6.98.73.....3.47..'


for idx, value in enumerate(puzzle):
    if value in VALID_VALUES:
        row = idx // BOARD_SIZE
        col = idx - row * BOARD_SIZE
        possible[row,col] = value

solved = len(np.argwhere(get_length(possible)==1))
print(F"Puzzle starts with {solved} solved locations.")

show_pretty_board(possible.flatten())

Puzzle starts with 23 solved locations.
+---------------------------------+---------------------------------+---------------------------------+
| 123456789  123456789  123456789 | 123456789  123456789      2     | 123456789  123456789      3     |
| 123456789      4      123456789 | 123456789  123456789      1     |     6      123456789      7     |
| 123456789  123456789      1     | 123456789  123456789  123456789 | 123456789  123456789      4     |
+---------------------------------+---------------------------------+---------------------------------+
| 123456789      8      123456789 |     1      123456789  123456789 | 123456789  123456789  123456789 |
|     4          3      123456789 | 123456789  123456789  123456789 | 123456789      6          2     |
| 123456789  123456789  123456789 |     7      123456789  123456789 | 123456789  123456789  123456789 |
+---------------------------------+---------------------------------+---------------------------------+
| 123456789  123456789  

---

## Solve the puzzle

### Step 1: Handle singletons

Look at every cell that has only one **possible** value. This is the only step that makes modifications to actual board. Therefore, this will be the first and last steps of solving any puzzle.

This cell calls a function that runs repeatedly until it cannot simplify the board any more.

In [130]:
response = clean_singletons(board,possible)
print(F"Performed {response} singleton sweeps.")

locations = len(np.argwhere(get_length(board)==1))
if locations == BOARD_SIZE**2:
    print("Board has been solved!!!")
    show_pretty_board(board.flatten())
else:
    print(F"Puzzle currently has {locations} solved locations.")
    show_pretty_board(board.flatten())
    show_pretty_board(possible.flatten())

Performed 0 singleton sweeps.
Puzzle currently has 53 solved locations.
+---------+---------+---------+
|    6    | 4  7  2 |    1  3 |
| 2  4  3 |       1 | 6     7 |
| 7     1 | 6  3    | 2     4 |
+---------+---------+---------+
|    8  6 | 1  2  3 | 4  7    |
| 4  3  7 |         | 1  6  2 |
|       2 | 7  4  6 |    3  8 |
+---------+---------+---------+
| 3  7  4 | 2       | 8       |
| 6  2  9 | 8     7 | 3  4    |
|         | 3     4 | 7  2    |
+---------+---------+---------+
+---------------+---------------+---------------+
| 589       58  |               | 59            |
|               | 59   589      |      589      |
|      59       |           589 |      589      |
+---------------+---------------+---------------+
| 59            |               |           59  |
|               | 59   589  589 |               |
| 19   159      |               | 59            |
+---------------+---------------+---------------+
|               |      16   59  |      59   16  |
|           

### Step 2: Identify disguised singletons

In [131]:
show_pretty_board(possible.flatten())
while disguised := identify_disguised_singletons(possible):
    print(F"Located {disguised} disguised singletons.")
    _ = clean_singletons(board, possible)

show_pretty_board(possible.flatten())

+---------------+---------------+---------------+
| 589       58  |               | 59            |
|               | 59   589      |      589      |
|      59       |           589 |      589      |
+---------------+---------------+---------------+
| 59            |               |           59  |
|               | 59   589  589 |               |
| 19   159      |               | 59            |
+---------------+---------------+---------------+
|               |      16   59  |      59   16  |
|               |      15       |           15  |
| 18   15   58  |      69       |           69  |
+---------------+---------------+---------------+
+---------------+---------------+---------------+
| 589       58  |               | 59            |
|               | 59   589      |      589      |
|      59       |           589 |      589      |
+---------------+---------------+---------------+
| 59            |               |           59  |
|               | 59   589  589 |               |


### Step 3: Handle row and column owners

In [132]:
response = clean_singletons(board,possible)
print(F"Performed {response} singleton sweeps.")

show_pretty_board(possible.flatten())
response = handle_rc_owners(possible)
show_pretty_board(possible.flatten())
print(F"Identified {response} row and/or column owners.")

Performed 0 singleton sweeps.
+---------------+---------------+---------------+
| 589       58  |               | 59            |
|               | 59   589      |      589      |
|      59       |           589 |      589      |
+---------------+---------------+---------------+
| 59            |               |           59  |
|               | 59   589  589 |               |
| 19   159      |               | 59            |
+---------------+---------------+---------------+
|               |      16   59  |      59   16  |
|               |      15       |           15  |
| 18   15   58  |      69       |           69  |
+---------------+---------------+---------------+
+---------------+---------------+---------------+
| 589       58  |               | 59            |
|               | 59   589      |      589      |
|      59       |           589 |      589      |
+---------------+---------------+---------------+
| 59            |               |           59  |
|               | 59

### Step 4: Handle matching patterns

In [134]:
def get_common_patterns(arr):
    return_list = []
    pattern_dict = dict()
    numpy_iterator = np.nditer(arr, flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
    for cell in numpy_iterator:
        pattern_dict[str(cell)] = pattern_dict.get(str(cell),0) + 1
    for k,v in pattern_dict.items():
        if len(k) == v:
            return_list.append(k)
    return return_list

def remove_pattern_from_group(arr, patterns):
    for patt in patterns:
        numpy_iterator = np.nditer(arr, flags=['multi_index', 'refs_ok'], op_flags=['readwrite'])
        for cell in numpy_iterator:
            if str(cell) != patt:
                for val in list(patt):
                    cell[...] = str(cell).replace(val,'')

for row in range(BOARD_SIZE):
    patterns = get_common_patterns(possible[row,:])
    if len(patterns):
        print(F"row: {patterns}")
    remove_pattern_from_group(possible[row,:], patterns)
for col in range(BOARD_SIZE):
    patterns = get_common_patterns(possible[:,col])
    if len(patterns):
        print(F"col: {patterns}")
    remove_pattern_from_group(possible[:,col], patterns)
for row in range(0,BOARD_SIZE,3):
    for col in range(0,BOARD_SIZE,3):
        patterns = get_common_patterns(possible[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE])
        if len(patterns):
            print(F"sector: {patterns}")
        remove_pattern_from_group(possible[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE], patterns)

show_pretty_board(possible.flatten())
show_pretty_board(board.flatten())
_ = clean_singletons(board,possible)

row: ['59']
row: ['16', '59']
row: ['15']
row: ['69']
col: ['58']
col: ['59']
col: ['59']
sector: ['59']
+---------------+---------------+---------------+
| 589       58  |               | 59            |
|               | 59   589      |      589      |
|      59       |           589 |      589      |
+---------------+---------------+---------------+
| 59            |               |           59  |
|               | 59   589  589 |               |
| 19   159      |               | 59            |
+---------------+---------------+---------------+
|               |      16   59  |      59   16  |
|               |      15       |           15  |
| 18   15   58  |      69       |           69  |
+---------------+---------------+---------------+
+---------+---------+---------+
|    6    | 4  7  2 |    1  3 |
| 2  4  3 |       1 | 6     7 |
| 7     1 | 6  3    | 2     4 |
+---------+---------+---------+
|    8  6 | 1  2  3 | 4  7    |
| 4  3  7 |         | 1  6  2 |
|       2 | 7  4  6 |


|   |   |   |   |   |   |   |   |   |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 
| `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | `123` <br> `456` <br> `789` | 




---

## Test the board

In [117]:
def board_valid(arr):
    """Returns true if the board is solved and also displays error messages
    if it is not solved correctly."""
    flag = True
    for pattern in '123456789':
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(cell_contains(board[idx,:],pattern)) != 1:
                count = np.count_nonzero(cell_contains(board[idx,:],pattern))
                print(F"There are {count} instances of '{pattern}' in zero-based row {idx}.")
                flag = False
            if np.count_nonzero(cell_contains(board[:,idx],pattern)) != 1:
                count = np.count_nonzero(cell_contains(board[:,idx],pattern))
                print(F"There are {count} instances of '{pattern}' in zero-based column {idx}.")
                flag = False
            if np.count_nonzero(cell_contains(get_sector_by_number(board,idx),pattern)) != 1:
                count = np.count_nonzero(cell_contains(get_sector_by_number(board,idx),pattern))
                print(F"There are {count} instances of '{pattern}' in zero-based sector {idx}.")
                flag = False
    return flag

show_pretty_board(board.flatten())
if board_valid(board):
    print("Solved!")
else:
    print("ERROR!")

+---------+---------+---------+
|    6    | 4  7  2 |    1  3 |
| 2  4  3 |       1 | 6     7 |
| 7     1 | 6  3    | 2     4 |
+---------+---------+---------+
|    8  6 | 1  2  3 | 4  7    |
| 4  3  7 |         | 1  6  2 |
|       2 | 7  4  6 |    3  8 |
+---------+---------+---------+
| 3  7  4 | 2       | 8       |
| 6  2  9 | 8     7 | 3  4    |
|         | 3     4 | 7  2    |
+---------+---------+---------+
There are 0 instances of '1' in zero-based column 0.
There are 0 instances of '1' in zero-based column 1.
There are 0 instances of '1' in zero-based sector 3.
There are 0 instances of '1' in zero-based column 4.
There are 0 instances of '1' in zero-based row 5.
There are 0 instances of '1' in zero-based row 6.
There are 0 instances of '1' in zero-based sector 6.
There are 0 instances of '1' in zero-based row 7.
There are 0 instances of '1' in zero-based sector 7.
There are 0 instances of '1' in zero-based row 8.
There are 0 instances of '1' in zero-based column 8.
There are 0 i