# Sudoku solver using NumPy

The most common Sudoku solving algorithm is great at solving easy Sudoku puzzles. To solve more challenging puzzles requires more than one strategy, which requires more than one algorithm. This notebook implements several algorithms. The benefit of a Jupyter notebook is the ability to execute each algorithm separately and in a different order while immediately viewing the results.

This notebook is separated into three main parts:

1. Helpers and algorithms
1. Solve muliple puzzles
1. Solve a single puzzle

Sudoku is traditionally a 9x9 puzzle. The code in this notebook was written and tested with a 9x9 grid in place, although it *should* support 16x16 and 25x25 by changing the constants. I have not tested it.

In [181]:
import numpy as np
import pandas as pd
from itertools import combinations

In [182]:
# ----- CONSTANTS -----

# traditional Sudoku boards are 9x9
# this constant is the number of rows (or columns) in a board
BOARD_SIZE = 9

# there are BOARD_SIZE number of sectors in a Sudoku board
# this constant is the number of rows (or columns) in a sector
SECTOR_SIZE = 3     # always the SQRT of BOARD_SIZE

# this constant represents the values allowed on a Sudoku board
# larger boards would require a longer string
VALID_VALUES = '123456789ABCDEFGHIJKLMNOP'[:BOARD_SIZE]

---

## Helper functions and algorithms

### Numpy vectorized functions

These vectorized functions are used in the helpers and algorithms that follow. The purpose of a vectorized function is to be easier to use, not be more perfomant than a common `while` loop.

In [183]:
"""Returns the length of the contents of each cell."""
vect_length = np.vectorize(len)

"""Looks for the specified string pattern in each cell."""
vect_contains = np.vectorize(lambda x, y : y in x)

"""Remove a specific string from every location in a numpy array. 
Used primarily to remove a single character from a string."""
vect_replace = np.vectorize(lambda x, y : x.replace(y,''))

"""Remove each character of a pattern from every location in a
numpy array unless that cell exactly matches the pattern. For example,
when given the argument '14' the function will remove '1' and '4' from
every cell that does not exactly equal '14'."""
vect_conditional_replace = np.vectorize(lambda x, y : x if x == y else ''.join([c for c in x if c not in y]))

### Generic helpers

In [184]:
def clean_neighbors(arr, row, col, value):
    """Removes the specified value from the row, column, and sector of the
    specified coordinates."""

    # remove the value from every cell in the row
    arr[row,:] = vect_replace(arr[row,:],value)

    # remove the value from every cell in the column
    arr[:,col] = vect_replace(arr[:,col],value)

    # remove the value from every cell in the sector
    r_start = row // SECTOR_SIZE * SECTOR_SIZE
    c_start = col // SECTOR_SIZE * SECTOR_SIZE
    r_end = r_start + SECTOR_SIZE
    c_end = c_start + SECTOR_SIZE
    arr[r_start:r_end,c_start:c_end] = vect_replace(
        arr[r_start:r_end,c_start:c_end],value
    )


def get_sector_by_number(arr, sector):
    """Returns one sector of an array by index between zero and BOARD_SIZE."""
    row = sector // SECTOR_SIZE * SECTOR_SIZE
    col = SECTOR_SIZE * (sector % SECTOR_SIZE)
    return arr[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE]


def convert_sector_row(sector, row):
    """Returns the absolute row number when given a row number within a sector."""
    return sector // SECTOR_SIZE * SECTOR_SIZE + row


def convert_sector_col(sector, col):
    """Returns the absolute column number when given a column number with a sector."""
    return (sector % SECTOR_SIZE) * SECTOR_SIZE + col


def print_one_board(arr):
    """Print a single Sudoku board with sector borders."""
    board = arr.flatten()
    width = np.max(vect_length(board)) + 2
    line = '+' + '-'*(width*3) + '+' + '-'*(width*3) + '+' + '-'*(width*3) + '+'
    for idx, cell in enumerate(board):
        if not idx % (SECTOR_SIZE**3):
            print(line)
        if not idx % SECTOR_SIZE:
            print('|', end='')
        print(F'{cell:^{width}s}', end='')
        if not ((idx + 1) % BOARD_SIZE):
            print('|')
    print(line)

def print_two_boards(arr1, arr2):
    """Prints two Sudoku boards side by side with sector borders."""

    # determine the max width of a cell in each array so each table is sized for its contents
    width1 = np.max(vect_length(arr1)) + 1
    width1 = 3 if width1 < 3 else width1
    width2 = np.max(vect_length(arr2)) + 1
    width2 = 3 if width1 < 3 else width2
    line1 = '+' + '-'*(width1*3) + '+' + '-'*(width1*3) + '+' + '-'*(width1*3) + '+'
    line2 = '+' + '-'*(width2*3) + '+' + '-'*(width2*3) + '+' + '-'*(width2*3) + '+'

    for row in range(BOARD_SIZE):
        if not row % SECTOR_SIZE:
            print(line1 + '  ' + line2)
        
        for col in range(BOARD_SIZE):
            if not col % SECTOR_SIZE:
                print('|', end='')
            print(F'{arr1[row,col]:^{width1}s}', end='')

        print('|  ', end='')

        for col in range(BOARD_SIZE):
            if not col % SECTOR_SIZE:
                print('|', end='')
            print(F'{arr2[row,col]:^{width2}s}', end='')

        print('|')
    print(line1 + '  ' + line2)
                
    

In [185]:
def check_board(arr, verbose=False):
    """Returns the number of unsolved cells and number of errors in a board."""
    unsolved = np.sum(np.where(arr=='',1,0))
    errors = []
    for digit in VALID_VALUES:
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(vect_contains(arr[idx,:],digit)) > 1:
                count = np.count_nonzero(vect_contains(arr[idx,:],digit))
                errors.append(F"There are {count} instances of '{digit}' in zero-based row {idx}.")
            if np.count_nonzero(vect_contains(arr[:,idx],digit)) > 1:
                count = np.count_nonzero(vect_contains(arr[:,idx],digit))
                errors.append(F"There are {count} instances of '{digit}' in zero-based column {idx}.")
            if np.count_nonzero(vect_contains(get_sector_by_number(arr,idx),digit)) > 1:
                count = np.count_nonzero(vect_contains(get_sector_by_number(arr,idx),digit))
                errors.append(F"There are {count} instances of '{digit}' in zero-based sector {idx}.")
    if verbose and len(errors):
        print('\n'.join(errors))
    return unsolved,len(errors)

For the purposes of illustration (and to save space), I'll show a 3x3 array to describe the initialization.

If the puzzle looks like this:

| 5 |   |   |
|---|---|---|
|   |   | **3** |
|   | **7** |   |

Then we will create from that **two** NumPy arrays. The first array will be empty because no cells have been solved yet.

| &nbsp; | &nbsp; | &nbsp; |
|---|---|---|
| &nbsp; | &nbsp; | &nbsp;|
| &nbsp; | &nbsp; | &nbsp;|

The second array starts containing every *possible* value as shown here:

| 123456789 | 123456789 | 123456789 |
|:---:|:---:|:---:|
| **123456789** | **123456789** | **123456789** |
| **123456789** | **123456789** | **123456789** |

Then actual puzzle values overwrite the cells where they belong. Because this array represents all *possible* values for a given cell, and we know that the only *possible* value for the first cell is 5 (because the puzzle said so).

| 5 | 123456789 | 123456789 |
|:---:|:---:|:---:|
| **123456789** | **123456789** | **3** |
| **123456789** | **7** | **123456789** |

In [186]:
def initialize_puzzle_arrays(puzzle):
    """Create two arrays: one that is empty and one that contains the puzzle values and
    all valid values what would be considered empty cells."""

    # create an empty array
    arr1 = np.full((BOARD_SIZE,BOARD_SIZE),'')
    # create an array full of all valid values
    arr2 = np.full((BOARD_SIZE,BOARD_SIZE),VALID_VALUES)

    # for each location where the puzzle has a valid value,
    # overwrite that spot in arr2, which contains ALL values
    for idx, value in enumerate(puzzle):
        if value in VALID_VALUES:
            row = idx // BOARD_SIZE
            col = idx - row * BOARD_SIZE
            arr2[row,col] = value

    return (arr1,arr2)

### Algorithms

This section contains all of the algorithms used to solve puzzles. There are **two** NumPy arrays used in these algorithms.

1. One array contains the **solved** values. It starts off empty. Cells are only populated when we are certain that is the value for that cell. A fully solved Sudoku puzzle means that this array is full.
1. One array contains all **possible** values for an array. It start off with every cell populated with every possible value in Sudoku. The algorithms in this notebook work towards eliminating possible values in each cell until a cell only has one value. Then that value can be transferred to the array of solved values and cleared. A fully solved Sudoku puzzle means that this array is empty.

This first algorithm handles cells that have only one *possible* value. If there is only one value left in the cell, then that cell is **solved**. The following actions are then taken.

1. Assign that value to the corresponding cell of the *solved* array.
1. Remove that value from every cell in that row. Sudoku rules say that a value can exist only once in a given row.
1. Remove that value from every cell in that column. Sudoku rules say that a value can exist only once in a given column.
1. Remove that value from every cell in that sector. Sudoku rules say that a value can exist only once in a given sector.

This routine runs repeatedly until there are no changes made to the array containing possible values.

In [187]:
def handle_singletons(solved,wip):
    """Transfers all singletons from the WIP array to the final array, cleans
    that singleton pattern from all neighbors, and returns the number of singletons
    found. This will run until there are no singletons left in the WIP array.
    
    Keyword arguments:
    solved -- numpy array containing only solved elements
    wip -- work in progress numpy array containing possible values for each cell
    """
    change_counter = 0
    coordinate_list = np.argwhere(vect_length(wip)==1)
    while coordinate_list.size:
        for idc in coordinate_list:
            row, col = idc
            singleton = wip[row,col]
            # update the solved array with the singleton value
            solved[row,col] = singleton

            clean_neighbors(wip,row,col,singleton)
            change_counter += 1

        # check for more singletons
        coordinate_list = np.argwhere(vect_length(wip)==1)

    # return the number of times the while loop executed
    return change_counter

This algorithm has a simple concept. If a cell in any row, column, or sector is the only cell to contain a specific value, then that cell is a disguised singleton and is **solved**. Consider the simple example below. The last cell in this sector is the only cell in this sector to contain a possible value of 3. That means that cell must be a 3.

| 124567 | 12456789 | 12456789 |
|:---:|:---:|:---:|
| **124589** | **12456789** | **156789** |
| **456789** | **124789** | **123456** |

This function runs recursively until no more disguised singletons are detected.

In [188]:
def handle_disguised_singletons(arr1, wip, discovered=0):
    """Sets any cells containing disguised singleton values to the actual singleton
    value and returns the number of singletons found. Runs recursively until no
    additional disguised singletons are found."""
    altered_coords = set({})
    for val in VALID_VALUES:
        # search every row
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(vect_contains(wip[idx,:],val)) == 1:
                col = np.argwhere(vect_contains(wip[idx,:],val))[0][0]
                wip[idx,col] = ''
                arr1[idx,col] = val
                altered_coords.add((idx,col))
                clean_neighbors(wip,idx,col,val)
                # wip[idx,col] = val

        # search every column
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(vect_contains(wip[:,idx],val)) == 1:
                row = np.argwhere(vect_contains(wip[:,idx],val))[0][0]
                wip[row,idx] = ''
                arr1[row,idx] = val
                altered_coords.add((row,idx))
                clean_neighbors(wip,row,idx,val)
                # wip[row,idx] = val

        # search every sector
        for idx in range(BOARD_SIZE):
            if np.count_nonzero(vect_contains(get_sector_by_number(wip,idx),val)) == 1:
                coords = np.argwhere(vect_contains(get_sector_by_number(wip,idx),val))
                row = coords[0][0] + SECTOR_SIZE * (idx // SECTOR_SIZE)
                col = coords[0][1] + SECTOR_SIZE * (idx % SECTOR_SIZE)
                wip[row,col] = ''
                arr1[row,col] = val
                altered_coords.add((row,col))
                clean_neighbors(wip,row,col,val)
                # wip[row,col] = val

    if len(altered_coords) != discovered:
        return len(altered_coords) + handle_disguised_singletons(arr1,wip,len(altered_coords))
    else:
        return len(altered_coords)

This algorithm examines each sector. If any value in a sector exists only in one row or only one column of that sector, then we know it cannot exist in that row or column in other sectors of the puzzle. Consider the example below. The value '1' exists only in the first column of this sector. We don't yet know which cell will contain the value '1', but it will definitely be one of those two. That means that there cannot be a '1' anywhere else in that column of the puzzle. This algorithm removes '1' from other cells in that column.

| 178 | 2456789 | 2456789 |
|:---:|:---:|:---:|
| **179** | **2456789** | **56789** |
| **456789** | **24789** | **23456** |

In [189]:
def handle_rowcol_owners(arr):
    """Finds sector values that exist only in one row or only in one column and removes
    that value from that row or column in other sectors. Returns the number of values
    changed. Runs recursively."""
    # take a snapshot for later comparison to count changes
    snapshot = np.copy(arr)

    for idx in range(BOARD_SIZE):
        for value in VALID_VALUES:
            # get coordinates for all cells containing this value
            coordinate_list = np.argwhere(vect_contains(get_sector_by_number(arr,idx), value))
            # see if all these coordinates are in one row or one column
            rows = set({})
            cols = set({})
            for coord in coordinate_list:
                rows.add(coord[0])
                cols.add(coord[1])

            # if len(rows) is 1 then every {value} in the current sector is in one row
            if len(rows) == 1:
                # print(F"sector {idx} row {coord[0]}: {value}")
                actual_row = convert_sector_row(idx, coord[0])
                actual_col_list = [convert_sector_col(idx, x) for x in cols]
                numpy_iterator = np.nditer(arr[actual_row,:], flags=['multi_index'], op_flags=['readwrite'])
                for cell in numpy_iterator:
                    if numpy_iterator.multi_index[0] not in actual_col_list:
                        cell[...] = str(cell).replace(value,'')
            
            # if len(cols) is 1 then every {value} in the current sector is in one column
            if len(cols) == 1:
                # print(F"sector {idx} col {coord[1]}: {value}")
                actual_col = convert_sector_col(idx, coord[1])
                actual_row_list = [convert_sector_row(idx, x) for x in rows]
                numpy_iterator = np.nditer(arr[:,actual_col], flags=['multi_index'], op_flags=['readwrite'])
                for cell in numpy_iterator:
                    if numpy_iterator.multi_index[0] not in actual_row_list:
                        cell[...] = str(cell).replace(value,'')

    # compare the [maybe] updated array with the snapshot to detect changes
    altered_cells = np.sum(arr != snapshot)
    if altered_cells:
        return altered_cells + handle_rowcol_owners(arr)
    else:
        return 0

In [190]:
def handle_naked_groups(arr):
    """Identifies naked groups (twins, triplets, quadruplets, etc.) in a row, column, or sector
    and removes those group values from other cells in that row, column, or sector."""

    def get_special_patterns(arr):
        """Returns valid patterns for existing naked groups. A pattern of length N
        must exist exactly N times in the array to be considered a naked group."""
        return_list = []
        pattern_dict = dict()
        numpy_iterator = np.nditer(arr, flags=['multi_index'], op_flags=['readwrite'])
        for cell in numpy_iterator:
            pattern_dict[str(cell)] = pattern_dict.get(str(cell),0) + 1
        for k,v in pattern_dict.items():
            if len(k) == v:
                return_list.append(k)
        return return_list

    # take a snapshot for later comparison to count changes
    snapshot = np.copy(arr)
    # print(get_special_patterns(arr[:,3]))
    for row in range(BOARD_SIZE):
        for pattern in get_special_patterns(arr[row,:]):
            arr[row,:] = vect_conditional_replace(arr[row,:],pattern)
    for col in range(BOARD_SIZE):
        for pattern in get_special_patterns(arr[:,col]):
            arr[:,col] = vect_conditional_replace(arr[:,col],pattern)
    for row in range(0,BOARD_SIZE,3):
        for col in range(0,BOARD_SIZE,3):
            for pattern in get_special_patterns(arr[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE]):
                arr[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE] = vect_conditional_replace(arr[row:row+SECTOR_SIZE,col:col+SECTOR_SIZE],pattern)

    altered_cells = np.sum(arr != snapshot)
    if altered_cells:
        return altered_cells + handle_naked_groups(arr)
    else:
        return 0


In [191]:
def handle_hidden_groups(arr):
    """Identifies hidden groups (twins, triples, etc.) in a row, column, or sector
    and sets those cells equal to the pattern, effectively unhiding the groups. This
    does not need to run recursively."""

    # take a snapshot for later comparison to count changes
    snapshot = np.copy(arr)

    # pattern length of 2 = twins
    # pattern length of 3 = triples
    for pattern_length in [2,3]:

        # for every possible combination of twins or triples...
        for comb in list(combinations(VALID_VALUES,pattern_length)):
            pattern = "".join(comb)

            """
            The following three for loops step through each row, column, and sector looking
            for the existence of the current pattern. For example, the pattern could be '56'.
            The logic in all three for loops is similar.

            First, let's look at this line of code
            where_is_char = [vect_contains(arr[:,col],x) for x in pattern]

            This will generate a 2D array where every row represents a character in the pattern.
            For a pattern like '56', the result will be a 2-row array. Each row will be an array
            of boolean values representing whether or not that row's character exists in that cell.

            The following result for a pattern of '56' shows that '5' exists in the first and
            second cells and that '6' exists in the first and last cells.

            [[True, True, False, False, False, False, False, False, False],
             [True, True, False, False, False, False, False, False, False ]]

            Now that we know where each character in the pattern exists, we need to know if those
            characters exist in the exact same locations. If the '5' and '6' don't exist in the same
            locations (as shown in the example above), then this is not a pattern we can act upon
            in the portion of the puzzle we are currently examing (the current row, col, or sector). 
            We use the following line of code to determine that. Every row in our 2D array must be
            equal to the first row. This is an easy way to test that in NumPy.

            np.all(where_is_char == where_is_char[0])

            The second test is if the number of True values in any row equals the pattern length.
            If '56' shows up in 3 cells, we can't act upon that. But if '56' shows up in exactly two
            cells, then we can act upon that. Since we already tested that all rows are identical,
            we can now just count the number of True values in the first row and confirm that number
            equals the length of the current pattern.

            np.sum(where_is_char[0]) == pattern_length

            If both of these tests pass, then we set every cell that matched to be equal to the pattern.
            Consider this simple example that ignores empty cells. Looking at the pattern '48', we can
            see it exists in exactly two locations: 0 and 2.

            [ 489, 19, 348, 139, 139 ]

            Because it meets both of our conditions (the two characters appear exactly 2 cells and they
            are the same cells), we can modify the array to:

            [ 48, 19, 48, 139, 139 ]
            """

            for col in range(BOARD_SIZE):
                where_is_char = [vect_contains(arr[:,col],x) for x in pattern]
                if np.all(where_is_char == where_is_char[0]) and np.sum(where_is_char[0]) == pattern_length:
                    for coord in np.argwhere(where_is_char[0]):
                        arr[coord[0],col] = pattern
            for row in range(BOARD_SIZE):
                where_is_char = [vect_contains(arr[row,:],x) for x in pattern]
                if np.all(where_is_char == where_is_char[0]) and np.sum(where_is_char[0]) == pattern_length:
                    for coord in np.argwhere(where_is_char[0]):
                        arr[row,coord[0]] = pattern
            for sector in range(BOARD_SIZE):
                where_is_char = [vect_contains(get_sector_by_number(arr,sector),x) for x in pattern]
                if np.all(where_is_char == where_is_char[0]) and np.sum(where_is_char[0]) == pattern_length:
                    for coord in np.argwhere(where_is_char[0]):
                        arr[convert_sector_row(sector,coord[0]),convert_sector_col(sector,coord[1])] = pattern

    altered_cells = np.sum(arr != snapshot)
    return altered_cells

In [356]:
def hidden_groups_new(arr):


    def get_special_patterns(arr, pattern_length):
        special = set({})
        for cell in arr.flatten():
            for digit in list(cell):
                special.add(digit)
        return combinations(sorted(special),pattern_length)
    
    snapshot = np.copy(arr)
    
    for pattern_length in [2,3]:

        for col in range(BOARD_SIZE):

            for comb in get_special_patterns(arr[:,col], pattern_length):
                pattern = "".join(comb)

                where_is_char = [vect_contains(arr[:,col],x) for x in pattern]
                if np.all(where_is_char == where_is_char[0]) and np.sum(where_is_char[0]) == pattern_length:
                    for coord in np.argwhere(where_is_char[0]):
                        arr[coord[0],col] = pattern

        for row in range(BOARD_SIZE):

            for comb in get_special_patterns(arr[row,:], pattern_length):
                pattern = "".join(comb)

                where_is_char = [vect_contains(arr[row,:],x) for x in pattern]
                if np.all(where_is_char == where_is_char[0]) and np.sum(where_is_char[0]) == pattern_length:
                    for coord in np.argwhere(where_is_char[0]):
                        arr[row,coord[0]] = pattern

        for sector in range(BOARD_SIZE):

            for comb in get_special_patterns(get_sector_by_number(arr,sector), pattern_length):
                pattern = "".join(comb)
            
                where_is_char = [vect_contains(get_sector_by_number(arr,sector),x) for x in pattern]
                if np.all(where_is_char == where_is_char[0]) and np.sum(where_is_char[0]) == pattern_length:
                    for coord in np.argwhere(where_is_char[0]):
                        arr[convert_sector_row(sector,coord[0]),convert_sector_col(sector,coord[1])] = pattern


    altered_cells = np.sum(arr != snapshot)
    return altered_cells

# print_one_board(possible_arr)
# hidden_groups_new(possible_arr)
# print_one_board(possible_arr)

In [355]:
sum([1 for x in combinations(VALID_VALUES,3)])

36

In [368]:
special = set({})
for cell in possible_arr[4,:].flatten():
    for digit in list(cell):
        special.add(digit)

retval = []
for length in [2,3]:
    for comb in combinations(sorted(special),length):
        retval.append("".join(comb))

print(len(retval))

35


---

## Solve multiple puzzles

The `for` loop below steps through each of the puzzles in `puzzle_dict` and attempts to solve each one. The success/failure is reported below. You can experiment with changing the order of the algorithms to look for efficiencies. Remember that each algorithm is recursive, so if a puzzle is solved in a single iteration, it could have been solved by a single algorithm that ran 6 times.

In [192]:
puzzle_dict = dict(
    easy1 = '5..98.67.6......31.2.613.4..968.21.7..8..5.9.7.319....962.7..1.1.5...76..7.5..9..',
    easy2 = '...8.74...58.41...7.......25326.894.48.12937.....5.26..279....4....1.8..86....5.9',
    easy3 = '81.7.6....6.3....14.78......584..3...4.16..58.9..3..7432.6.719..8691..47..9.5...3',
    easy4 = '4.2.....9.96.34......2...1....47..83.6......48...12.9.61894.2.57.43519.8.3....7.1',
    easy5 = '64...5...53.9482.6.27..19.5.9......1...75....753.....2.7.51.82...549...7.62387.1.',
    medium1 = '.29.71..3..8...6..3...5....5.....97......4...4.75.8..1.6.42.3..2..9....6.916...52',
    medium2 = '5.61.2......65..1781.....5....2...9.9..5.7..8.51.39.4...8...4.9765984.....9......',
    medium3 = '.5.....79..3..81..29..645..5.64...13.2.9.56.7.....3.........34......972.4..2....1',
    medium4 = '..59.46...462.7831.2..1......7.9...........5...2.453.6....78...4.3...92.5.....7.3',
    medium5 = '.....49.3..4.196..1.9.862.4.1.94..2..3.6.2.9..6..7....65...1.7..9......8........2',
    hard1 = '..791.5....1.....3..9.4...2.4...83.....3.1....6..5...8.2..9...5...........4.8..7.',
    hard2 = '.6......4.5..61.8..1..9...32...8...7...6.4...9..7...4..9..7.5..3...1...8.........',
    hard3 = '...34..1..5...6...47....56.6.........341...8....5.8..756...3......68.1..1.7254...',
    hard4 = '.7.8..3..9...7.5......69...8..6.4..2.9......6.1.5....4.8.4..1.............27....5',
    hard5 = '82.5.........3.257...67.9..4.61...3...........5..8419.9.2.1......57...2.......561',
    # expert1 = '.....2..3.4...16.7..1.....4.8.1.....43.....62...7..............6.98.73.....3.47..',
    expert1 = '.2...38.5..56....3......79.8...4....3.....1.8..6.9....13..7.5.......2..7..4......',
    expert2 = '.8....9.....7..1....6..2..475...9...........6..9.48..3.48....3.....1.....3.5..8..',
    expert3 = '..8....4.9.....1......39.2....5..........15.74..7....8.1..6.....4.21....875......',
    expert4 = '438........1.....7..5....21...83.1.........3.5..4...8......26.9.4...5...6...1....',
    expert5 = '4......787..1..4.9..237....1..4.6....6........4..531.....5.....813.....7....2....',
    evil1 = '.86.1..47..15.............2.6....5......4.2..7..9...64.7..5..81......6..9....3...',
    evil2 = '..9.4...663..2.7....8..3.....3......41...2..8....7..5....4..8..26...1..49........',
    evil3 = '.8.37.9.24...5...........7...5.3.7.1.1.....6....7...8..2.9..1.3........6..8..2...',
    evil4 = '.5..8.......3...9.21..9...862.7..1....5.2......3.....6.....47..89..3...1..6......',
    evil5 = '9.......4.3..7.69...28......5......1....4...38..7..45.3....9..........1..9..6.57.'
)

In [348]:
df = pd.DataFrame(
    index=puzzle_dict.keys(),
    columns=['solved','iterations','singletons','rowcol owners','naked groups','hidden groups','disguised singletons']
)

for name, puzzle in puzzle_dict.items():

    solved_arr,possible_arr = initialize_puzzle_arrays(puzzle)

    loop_counter = -1
    current = -1
    previous = 0

    change_counter = np.zeros(7, dtype=int)

    while previous != current:
        previous = current
        loop_counter += 1
        change_counter[2] += handle_singletons(solved_arr,possible_arr)
        change_counter[3] += handle_rowcol_owners(possible_arr)
        change_counter[4] += handle_naked_groups(possible_arr)
        # change_counter[5] += handle_hidden_groups(possible_arr)
        change_counter[5] += hidden_groups_new(possible_arr)
        change_counter[6] += handle_disguised_singletons(solved_arr,possible_arr)
        current = np.sum(change_counter)

    unsolved, _ = check_board(solved_arr)
    change_counter[0] = False if unsolved else True
    change_counter[1] = loop_counter

    df.loc[name] = change_counter

# change the 'solved' column from integer 1 and 0 to boolean for readability
df = df.astype({"solved": bool})
# create row of totals
pd.concat([df,df.sum().to_frame(name='Total').T])
        

Unnamed: 0,solved,iterations,singletons,rowcol owners,naked groups,hidden groups,disguised singletons
easy1,True,1,81,0,0,0,0
easy2,True,1,42,7,45,0,39
easy3,True,1,81,0,0,0,0
easy4,True,1,42,7,45,0,39
easy5,True,1,81,0,0,0,0
medium1,True,1,81,0,0,0,0
medium2,True,1,35,8,8,0,46
medium3,True,1,62,0,20,0,19
medium4,True,1,81,0,0,0,0
medium5,True,1,81,0,0,0,0


In [310]:
pd.concat([df,df.sum().to_frame(name='Total').T])

Unnamed: 0,solved,iterations,singletons,rowcol owners,naked groups,hidden groups,disguised singletons
easy1,True,1,81,0,0,0,0
easy2,True,1,42,7,45,0,39
easy3,True,1,81,0,0,0,0
easy4,True,1,42,7,45,0,39
easy5,True,1,81,0,0,0,0
medium1,True,1,81,0,0,0,0
medium2,True,1,35,8,8,0,46
medium3,True,1,62,0,20,0,19
medium4,True,1,81,0,0,0,0
medium5,True,1,81,0,0,0,0


---

## Load the puzzle

Before the puzzle is imported, two 2D numpy arrays are created. Both are BOARD_SIZE x BOARD_SIZE (traditional Sudoku is 9x9). The arrays are:

1. An array representing all "solved" cells is blank.
2. An array representing the possible values for each cell. Before the puzzle is imported, every value is possible for every cell, so the array looks something like the table below (only the first sector is shown).

| |  |  |  |
|:------:|:------:|:------:|:------:|
|  | 123456789 | 123456789 | 123456789 |
|  | 123456789 | 123456789 | 123456789 |
|  | 123456789 | 123456789 | 123456789 |

When the puzzle is read in, the "possible" cell is overwritten by the value specified in the puzzle. Most cells will remain set to every value as shown below. In the example below, the puzzle specifies only one value for the first sector while the rest of the cells are blank (so every value is possible).

| |  |  |  |
|:------:|:------:|:------:|:------:|
|  | 123456789 | 123456789 | 5 |
|  | 123456789 | 123456789 | 123456789 |
|  | 123456789 | 123456789 | 123456789 |



In [359]:
# solved_arr,possible_arr = initialize_puzzle_arrays(puzzle_dict['expert5'])
solved_arr,possible_arr = initialize_puzzle_arrays(puzzle_dict['evil4'])

solved = len(np.argwhere(vect_length(possible_arr)==1))
print(F"Puzzle starts with {solved} solved locations.")

print_one_board(possible_arr)

Puzzle starts with 23 solved locations.
+---------------------------------+---------------------------------+---------------------------------+
| 123456789      5      123456789 | 123456789      8      123456789 | 123456789  123456789  123456789 |
| 123456789  123456789  123456789 |     3      123456789  123456789 | 123456789      9      123456789 |
|     2          1      123456789 | 123456789      9      123456789 | 123456789  123456789      8     |
+---------------------------------+---------------------------------+---------------------------------+
|     6          2      123456789 |     7      123456789  123456789 |     1      123456789  123456789 |
| 123456789  123456789      5     | 123456789      2      123456789 | 123456789  123456789  123456789 |
| 123456789  123456789      3     | 123456789  123456789  123456789 | 123456789  123456789      6     |
+---------------------------------+---------------------------------+---------------------------------+
| 123456789  123456789  

---

## Solve the puzzle

### Step 1: Handle singletons

Look at every cell that has only one **possible** value.

This cell calls a function that runs repeatedly until it cannot simplify the board any more.

In [360]:
print_two_boards(solved_arr,possible_arr)
updates = handle_singletons(solved_arr,possible_arr)
print(F"Updated {updates} singletons.")
if updates:
    print_two_boards(solved_arr,possible_arr)

+---------+---------+---------+  +------------------------------+------------------------------+------------------------------+
|         |         |         |  |123456789     5     123456789 |123456789     8     123456789 |123456789 123456789 123456789 |
|         |         |         |  |123456789 123456789 123456789 |    3     123456789 123456789 |123456789     9     123456789 |
|         |         |         |  |    2         1     123456789 |123456789     9     123456789 |123456789 123456789     8     |
+---------+---------+---------+  +------------------------------+------------------------------+------------------------------+
|         |         |         |  |    6         2     123456789 |    7     123456789 123456789 |    1     123456789 123456789 |
|         |         |         |  |123456789 123456789     5     |123456789     2     123456789 |123456789 123456789 123456789 |
|         |         |         |  |123456789 123456789     3     |123456789 123456789 123456789 |12345678

### Step 2: Handle row and column owners

In [361]:
print_two_boards(solved_arr,possible_arr)
updates = handle_rowcol_owners(possible_arr)
print(F"Updated {updates} cells based on row and/or column owners.")
if updates:
    # _ = handle_singletons(board, possible_arr)
    print_two_boards(solved_arr,possible_arr)

+---------+---------+---------+  +---------------------+---------------------+---------------------+
|    5    |    8    |         |  | 3479           479  | 1246          1267  | 2346  123467  2347  |
|         | 3       |    9    |  |  47    4678    478  |        14567  12567 | 2456          2457  |
| 2  1    |    9    |       8 |  |                47   |  456           567  | 3456   34567        |
+---------+---------+---------+  +---------------------+---------------------+---------------------+
| 6  2    | 7       | 1       |  |                489  |         45    3589  |        3458   3459  |
|       5 |    2    |         |  | 1479    478         | 14689         13689 | 3489   3478   3479  |
|       3 |         |       6 |  | 1479    478         | 14589   145   1589  | 24589  24578        |
+---------+---------+---------+  +---------------------+---------------------+---------------------+
|    3    |       4 | 7       |  |  15            12   |125689   156         |        2568 

### Step 3: Handle hidden groups

In [362]:
print_two_boards(solved_arr,possible_arr)
# changed = handle_hidden_groups(possible_arr)
changed = hidden_groups_new(possible_arr)
print(F"Changed {changed} cells based on hidden groups.")
if changed:
    print_two_boards(solved_arr,possible_arr)

+---------+---------+---------+  +---------------------+---------------------+---------------------+
|    5    |    8    |         |  | 3479           479  |  246           267  |  246   12467   247  |
|         | 3       |    9    |  |  47    4678    478  |        1457   1257  |  245          2457  |
| 2  1    |    9    |       8 |  |                47   |  456           567  | 3456   34567        |
+---------+---------+---------+  +---------------------+---------------------+---------------------+
| 6  2    | 7       | 1       |  |                489  |         45    3589  |        3458   3459  |
|       5 |    2    |         |  | 1479    478         | 14689         13689 | 3489   3478   3479  |
|       3 |         |       6 |  | 1479    478         | 14589   145   1589  | 24589  24578        |
+---------+---------+---------+  +---------------------+---------------------+---------------------+
|    3    |       4 | 7       |  |   5            12   | 25689   56          |        2568 

### Step 4: Handle naked groups

In [363]:
print_two_boards(solved_arr,possible_arr)
updates = handle_naked_groups(possible_arr)
print(F"Updated {updates} cells based on naked groups.")
if updates:
    print_two_boards(solved_arr,possible_arr)

+---------+---------+---------+  +---------------------+---------------------+---------------------+
|    5    |    8    |         |  | 3479           479  |  246           267  |  246   12467   247  |
|         | 3       |    9    |  |  47    4678    478  |        1457   1257  |  245          2457  |
| 2  1    |    9    |       8 |  |                47   |  456           567  | 3456   34567        |
+---------+---------+---------+  +---------------------+---------------------+---------------------+
| 6  2    | 7       | 1       |  |                489  |         45    3589  |        3458   3459  |
|       5 |    2    |         |  | 1479    478         | 14689         13689 | 3489   3478   3479  |
|       3 |         |       6 |  | 1479    478         | 14589   145   1589  | 24589  24578        |
+---------+---------+---------+  +---------------------+---------------------+---------------------+
|    3    |       4 | 7       |  |   5            12   | 25689   56          |        2568 

### Step 5: Handle disguised singletons

In [364]:
print_two_boards(solved_arr,possible_arr)
disguised = handle_disguised_singletons(solved_arr,possible_arr)
print(F"Located {disguised} disguised singletons.")
if disguised:
    print_two_boards(solved_arr,possible_arr)

+---------+---------+---------+  +---------------+---------------+---------------+
|    5    |    8    |         |  |  3         9  | 246       26  | 46    1    7  |
|         | 3       |    9    |  |  4    6    8  |       7    1  | 25        25  |
| 2  1    |    9    |       8 |  |            7  | 46         5  |  3   46       |
+---------+---------+---------+  +---------------+---------------+---------------+
| 6  2    | 7       | 1       |  |            4  |       5   389 |      38   39  |
|       5 |    2    |         |  | 19   78       |1689      3689 | 89   378  349 |
|       3 |         |       6 |  | 19   78       | 189   4   89  |2589 2578      |
+---------+---------+---------+  +---------------+---------------+---------------+
|    3    |       4 | 7       |  |  5         1  | 289   6       |      28   29  |
| 8  9    |    3    |       1 |  |            2  |  5         7  | 46   46       |
|       6 |         |         |  |  7    4       | 289   1   289 |2589 2358 2359 |
+---

In [365]:
unsolved,errors = check_board(solved_arr)
print(F"Unsolved: {unsolved}")
print(F"Errors:   {errors}")

Unsolved: 32
Errors:   0
