Copyright **`(c)`** 2021 Giovanni Squillero `<squillero@polito.it>`  
`https://github.com/squillero/computational-intelligence`  
Free for personal or classroom use; see 'LICENCE.md' for details.

In [71]:
import logging
from collections import deque
from pprint import pprint
import numpy as np

logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', datefmt='%H:%M:%S', level=logging.INFO)

In [72]:
def _contains_duplicates(X):
    return np.sum(np.unique(X)) != np.sum(X)

def contains_duplicates(sol):
    return any(_contains_duplicates(sol[r,:]) for r in range(9)) or \
           any(_contains_duplicates(sol[:,r]) for r in range(9)) or \
           any(_contains_duplicates(sol[r:r+3:,c:c+3]) for r in range(0,9,3) for c in range(0,9,3))

def valid_solution(sol):
    return not contains_duplicates(sol) and np.sum(sol) == (1+2+3+4+5+6+7+8+9) * 9

# the function write down digits in the cells that have only one possible value
# and returns a matrix where, for each cell, is saved the number of possible digits
def pre_process(sol, n):
    # in each cell is saved the number of possible digits
    actions = np.empty(sol.shape)
    # I use nan instead of zeros to exploit nanargmin function inside dfsolve_revisited
    actions[:] = np.nan
    for i, j in zip(*np.where(sol == 0)):
        digit = 0
        for c in range(1, 10):
            sol[i, j] = c
            if not contains_duplicates(sol):
                digit = c
                if np.isnan(actions[i, j]):
                    actions[i, j] = 1
                else:  
                    actions[i, j] += 1
        if actions[i, j] != 1:
            sol[i, j] = 0
        # if there is only one possible digit, I already write it down
        else:
            sol[i, j] = digit
            actions[i, j] = np.nan
    return actions

def print_sudoku(sudoku):
    print("+-------+-------+-------+")
    for b in range(0, 9, 3):
        for r in range(3):
            print("|", " | ".join(" ".join(str(_) for _ in sudoku[b+r, c:c+3]) for c in range(0, 9, 3)), "|")
        print("+-------+-------+-------+")


In [73]:
def dfsolve(sudoku):
    """Vanilla depth-first solver for sudoku puzzles"""
    frontier = deque([sudoku.copy()])
    num_nodes = 0
    while frontier:
        node = frontier.popleft()
        num_nodes += 1

        if valid_solution(node):
            logging.info(f"Solved after expanding {num_nodes:,} nodes")
            return node

        for i, j in zip(*np.where(node == 0)):
            for c in range(1, 10):
                node[i, j] = c
                if not contains_duplicates(node):
                    frontier.appendleft(node.copy())
    logging.info(f"Giving up after expanding {num_nodes:,} nodes")
    return None

In [74]:
# For variable ordering (which square I try first), I choose the (or one of the) square with the minimum number of possible values.
# to reduce the probability of guessing wrong
# For value ordering (which digit I try first), I consider the digits in numeric order
# Lastly, at each expansion iteration I immediately write down digits in those cells that have only one possible digit
def dfsolve_revisited(sudoku):
    """Revisited depth-first solver for sudoku puzzles"""
    frontier = deque([sudoku.copy()])
    num_nodes = 0
    while frontier:
        num_nodes += 1
        node = frontier.popleft()
        actions = pre_process(node, num_nodes)

        if valid_solution(node):
            logging.info(f"Solved after expanding {num_nodes:,} nodes")
            return node
        
        # if empty cells with at least one possible digit are less than the number of empty cells, backtracking occurs
        not_nan_count = np.count_nonzero(~np.isnan(actions))
        zeros_count = np.count_nonzero(node == 0)
        if not_nan_count >= zeros_count : 
            # i, j are the index of the cell with less valid digits  
            i, j = np.unravel_index(np.nanargmin(actions), actions.shape)
            for c in range(1, 10):
                node[i, j] = c
                if not contains_duplicates(node):
                    frontier.appendleft(node.copy())

    logging.info(f"Giving up after expanding {num_nodes:,} nodes")
    return None


In [75]:
# Before writing dfsolve_revisited I tried solving the sudoku with a breadth first algorithm
# As it was immaginable it behaved very poorly
def bfsolve(sudoku):
    """breadth-first solver for sudoku puzzles"""
    frontier = deque([sudoku.copy()])
    num_nodes = 0
    while len(frontier) > 0:
        node = frontier.popleft()
        num_nodes += 1

        if valid_solution(node):
            logging.info(f"Solved after expanding {num_nodes:,} nodes")
            return node

        for i, j in zip(*np.where(node == 0)):
            for c in range(1, 10):
                node[i, j] = c
                if not contains_duplicates(node):
                    frontier.append(node.copy())
    logging.info(f"Giving up after expanding {num_nodes:,} nodes")
    return None

In [76]:
simple_sudoku = np.array([[6, 0, 4,    0, 7, 0,    0, 0, 1],
                          [0, 5, 0,    0, 0, 0,    0, 7, 0], 
                          [7, 0, 0,    5, 9, 6,    8, 3, 4], 
       
                          [0, 8, 0,    0, 0, 2,    4, 9, 0], 
                          [1, 0, 0,    0, 0, 0,    0, 0, 3], 
                          [0, 6, 9,    7, 0, 0,    0, 5, 0], 
       
                          [9, 1, 8,    3, 6, 7,    0, 0, 5], 
                          [0, 4, 0,    0, 0, 0,    0, 6, 0], 
                          [2, 0, 0,    0, 5, 0,    7, 0, 8]], dtype=np.int8)

In [77]:
def sudoku_generator(sudokus=1, *, kappa=5, random_seed=None):
    if random_seed:
        np.random.seed(random_seed)
    for puzzle in range(sudokus):
        sudoku = np.zeros((9, 9), dtype=np.int8)
        for cell in range(np.random.randint(kappa)):
            for p, val in zip(np.random.randint(0, 8, size=(9, 2)), range(1, 10)):
                tmp = sudoku.copy()
                sudoku[tuple(p)] = val
                if contains_duplicates(sudoku):
                    sudoku = tmp
        yield sudoku.copy()

In [78]:
for sudoku in sudoku_generator(random_seed=44):
    print_sudoku(sudoku)
    print('vanilla dfsolve')
    solution = dfsolve(sudoku)
    if solution is not None:
        print_sudoku(solution)
    print('')
    print('-----------------------------------------------------------------------------------------------')
    print('')
    print('revisited dfsolve')
    solution = dfsolve_revisited(sudoku)
    if solution is not None:
        print_sudoku(solution)

+-------+-------+-------+
| 0 7 0 | 0 0 0 | 0 6 0 |
| 0 4 0 | 8 0 0 | 0 2 0 |
| 6 5 0 | 0 9 0 | 0 0 0 |
+-------+-------+-------+
| 8 1 0 | 0 0 0 | 0 0 0 |
| 0 0 3 | 4 0 1 | 0 0 0 |
| 0 0 0 | 2 0 7 | 0 0 0 |
+-------+-------+-------+
| 0 0 0 | 0 0 0 | 0 0 0 |
| 0 0 5 | 9 6 0 | 7 0 0 |
| 0 0 0 | 0 0 0 | 0 0 0 |
+-------+-------+-------+
vanilla dfsolve


[18:15:53] INFO: Solved after expanding 803 nodes


+-------+-------+-------+
| 9 7 8 | 5 4 2 | 3 6 1 |
| 3 4 1 | 8 7 6 | 9 2 5 |
| 6 5 2 | 1 9 3 | 8 7 4 |
+-------+-------+-------+
| 8 1 7 | 6 5 9 | 4 3 2 |
| 5 2 3 | 4 8 1 | 6 9 7 |
| 4 9 6 | 2 3 7 | 1 5 8 |
+-------+-------+-------+
| 7 6 4 | 3 1 5 | 2 8 9 |
| 2 8 5 | 9 6 4 | 7 1 3 |
| 1 3 9 | 7 2 8 | 5 4 6 |
+-------+-------+-------+

-----------------------------------------------------------------------------------------------

revisited dfsolve


[18:15:56] INFO: Solved after expanding 31 nodes


+-------+-------+-------+
| 2 7 8 | 5 1 4 | 9 6 3 |
| 3 4 9 | 8 7 6 | 5 2 1 |
| 6 5 1 | 3 9 2 | 8 7 4 |
+-------+-------+-------+
| 8 1 2 | 6 5 9 | 3 4 7 |
| 7 9 3 | 4 8 1 | 6 5 2 |
| 5 6 4 | 2 3 7 | 1 8 9 |
+-------+-------+-------+
| 9 8 7 | 1 4 5 | 2 3 6 |
| 4 2 5 | 9 6 3 | 7 1 8 |
| 1 3 6 | 7 2 8 | 4 9 5 |
+-------+-------+-------+
