In [1]:
# Download sudoku dataset from:
# https://www.kaggle.com/datasets/radcliffe/3-million-sudoku-puzzles-with-ratings

Import necessary libraries

In [2]:
import pandas as pd
import numpy as np
import time

### Backtracking algorithm

In [3]:
# function used in AIMA book code
def flatten(seqs):
    return sum(seqs, [])

In [4]:
# to convert input string into grid
def string_to_grid(puzzle_str):
    """
    Converts a string representation of a sudoku puzzle into a 4D array grid.

    Parameters
    ----------
    puzzle_str : str
        A string representing a sudoku puzzle, where each digit represents a cell 
        value (0 or "." for empty cells) and the digits are ordered left-to-right and top-to-bottom.

    Returns
    -------
    grid : list of lists of lists of lists
        A 4D array representing the sudoku puzzle, with dimensions (3, 3, 3, 3).
        The first two dimensions represent the 3x3 blocks of the puzzle (each containing 9 cells).
        The last two dimensions represent the cells within each block (each containing a single value
        between 0 and 9).
    """
    # empty grid to fill in 
    grid = [
        [[[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]]],
        [[[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]]],
        [[[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]]]]

    for rowblock in range(3):
        for row in range(3):
            for triple in range(3):
                for cell in range(3):
                    rown = (rowblock*3)+row
                    coln = (triple*3)+cell
                    celln = rown*9+coln
                    value = puzzle_str[celln]
                    if value == '.':
                        value = 0
                    else:
                        value = int(value)
                    grid[rowblock][row][triple][cell] = value
    return grid

In [5]:
def is_valid(grid, row, col, num):
    """
    Checks if a number fits in a Sudoku grid.

    Parameters
    ----------
    grid : list of lists
        A 9x9 Sudoku grid, represented as a list of lists, where each inner list 
        corresponds to a row of the grid.
    row : int
        The index of the row where the number will be placed.
    col : int
        The index of the column where the number will be placed.
    num : int
        The number to be placed in the Sudoku grid at position (row, col).

    Returns
    -------
    bool
        Returns True if the number can be placed in the specified position 
        without violating the rules of Sudoku. Returns False otherwise.
    """
    # create lists for rows, cols, boxes
    rows = flatten([list(map(flatten, brow)) for brow in grid])
    cols = [list(col) for col in zip(*rows)]
    boxes = flatten([list(map(flatten, zip(*brow))) for brow in grid])
    # create box index
    # (3 x row block number) + column block number
    # block: 3 rows or cols together
    box = (3*(row//3))+(col//3)
    # check if num is already used in the row
    for i in range(9):
        if rows[row][i] == num:
            return False
    # check if num is already used in the column
    for i in range(9):
        if cols[col][i] == num:
            return False
    # check if num is already used in the 3x3 box
    for i in range(9):
        if boxes[box][i] == num:
            return False
    return True

In [6]:
def find_empty_space(grid):
    """
    Find the next empty space in the given Sudoku grid.

    Parameters
    ----------
    grid : list of lists
        The Sudoku grid to search for empty spaces.
    
    Returns
    -------
    tuple or None
        If an empty space is found, the function returns a tuple of the row and column
        indices of the empty space. If no empty spaces are found, the function returns
        None.
    """
    # create list of rows
    rows = flatten([list(map(flatten, brow)) for brow in grid])
    # for each row and col if cell = 0 return cell coordinates
    for row in range(9):
        for col in range(9):
            if rows[row][col] == 0:
                return row, col
    return None

In [7]:
def backtracking(grid, counter=0, backtrack_counter=0):
    """
    Tries to solve a Sudoku puzzle using the backtracking algorithm.

    Parameters
    ----------
    grid : list of lists of lists of lists of int
        The grid to be solved. The first two levels represent the rows and columns of the Sudoku
        grid, respectively. The third level represents the blocks of the grid (3x3 sub-grids), and
        the fourth level represents the cells of each block. A value of 0 means the cell is empty.
    counter : int, optional
        The current count of cell assignments during the backtracking process. Default is 0.
    backtrack_counter : int, optional
        The current count of backtrackings during the backtracking process. Default is 0.

    Returns
    -------
    tuple
        A tuple with three values: the solved grid (as a list of lists of lists of lists of int),
        the number of cell assignments made during the backtracking process, and the number of
        times the algorithm backtracked.

    Notes
    -----
    This function works by finding the first empty cell of the Sudoku grid, and trying to fill it
    with a number from 1 to 9 that doesn't violate the Sudoku constraints (i.e., no repeated
    numbers in the same row, column, or block). If a number is found that fits the empty cell, the
    function recursively calls itself with the updated grid. If the function is unable to find a
    valid number for the current cell, it backtracks to the previous cell and tries a different
    number until a solution is found or all possibilities have been exhausted.
    """
    
    # find first empty cell
    empty_cell = find_empty_space(grid)
    # if there are no empty cells left, the puzzle is solved
    if empty_cell is None:  
        return grid, counter, backtrack_counter
    else:
        # coordinates of empty cell
        row, col = empty_cell
        # find row and col block of empty cell
        row_block = row // 3
        col_block = col // 3
        # for each number in domain (1-9)
        for num in range(1, 10):
            # if the number fits in coordinates
            if is_valid(grid, row, col, num):
                # cell = num
                counter += 1
                grid[row_block][row % 3][col_block][col % 3] = num
                # recursively call solve with the updated grid
                result = backtracking(grid, counter, backtrack_counter)
                if result is not None:
                    return result
                # if not valid (backtrack) set cell to 0
                grid[row_block][row % 3][col_block][col % 3] = 0
                backtrack_counter += 1
    # if no solution is found return none -> goes to above line (set cell to 0)
    return None

### AC-3 algorithm

In [8]:
def create_grid():
    """
    Creates a sudoku grid with coordinates.

    Returns:
    --------
    coord_grid : list of lists of lists of lists of tuples
        A nested list of subgrids, where each subgrid represents a 3x3 block of the sudoku puzzle.
        The values in the subgrid are tuples representing the (row, column) coordinates of each cell in the subgrid.
    """
    
    # create sudoku grid with coordinates
    coords = []
    for r in range(9):
        for c in range(9):
            coords += [(r,c)]

    coord_grid = [
        [[[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]]],
        [[[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]]],
        [[[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]],
        [[0,0,0],[0,0,0],[0,0,0]]]]

    for rowblock in range(3):
        for row in range(3):
            for triple in range(3):
                for cell in range(3):
                    rown = (rowblock*3)+row
                    coln = (triple*3)+cell
                    celln = rown*9+coln
                    coord_grid[rowblock][row][triple][cell] = coords[celln]
    return coord_grid

In [9]:
def create_domains(grid):
    """
    Given a Sudoku grid, create a dictionary of domains for all cells.

    Parameters
    ----------
    grid : list of lists of lists of lists
        A sudoku grid

    Returns
    -------
    domains : dict
        A dictionary where the keys are the coordinates of empty cells in the format "(row, col)" 
        The values are sets of possible values for that cell. 
        Any empty cell will have a domain of {1,2,3,4,5,6,7,8,9}
        Any cell that has a value will have a domain of that value.

    Example:
    >>> grid =  [[[[0, 0, 3], [0, 2, 0], [6, 0, 0]],
                  [[9, 0, 0], [3, 0, 5], [0, 0, 1]],
                  [[0, 0, 1], [8, 0, 6], [4, 0, 0]]],
                 [[[0, 0, 8], [1, 0, 2], [9, 0, 0]],
                  [[7, 0, 0], [0, 0, 0], [0, 0, 8]],
                  [[0, 0, 6], [7, 0, 8], [2, 0, 0]]],
                 [[[0, 0, 2], [6, 0, 9], [5, 0, 0]],
                  [[8, 0, 0], [2, 0, 3], [0, 0, 9]],
                  [[0, 0, 5], [0, 1, 0], [3, 0, 0]]]]
    >>> create_domains(grid)
    {'(0, 0)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 1)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 2)': {3},
     '(0, 3)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 4)': {2},
     '(0, 5)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 6)': {6}, ... }
     
     """
    # create domains for all cells
    # any empty cell will have a domain of 1,2,3,4,5,6,7,8,9
    # any cell that has a value will have a domain of that value
    domains = {}
    rows = flatten([list(map(flatten, brow)) for brow in grid])

    for ir, r in enumerate(rows):
        for ic, value in enumerate(r):
            if value == 0:
                coord = f"({ir}, {ic})"
                domains[coord] = {1, 2, 3, 4, 5, 6, 7, 8, 9}
            else:
                coord = f"({ir}, {ic})"
                domains[coord] = {value}
    return domains

In [10]:
def create_constraints():
    """
    Creates a dictionary of constraints for a basic Sudoku game. 
    The constraints are represented as functions that take two arguments `x` and `y` 
    and return `True` if `x` and `y` are valid according to the Sudoku rules, and `False` otherwise.
    
    The rules are:
    - All cells in the same row must be different.
    - All cells in the same column must be different.
    - All cells in the same box must be different.

    Returns:
        A dictionary where each key is a tuple of two coordinates (e.g. `('0,0', '1,1')`)
        representing two cells in the Sudoku grid, and each value is a function representing
        the constraint between the two cells.

    """
    # creates list of basic sudoku constraints
    # all cells in same rows must be different
    # all cells in same columns must be different
    # all cells in same boxes must be different
    
    coord_grid = create_grid()

    # create lists for rows, cols, boxes
    rows_coord = flatten([list(map(flatten, brow)) for brow in coord_grid])
    cols_coord = [list(col) for col in zip(*rows_coord)]
    boxes_coord = flatten([list(map(flatten, zip(*brow))) for brow in coord_grid])

    # create constraints
    
    row_constraints = {}
    for row in rows_coord:
        for i in range(9):
            for j in range(i+1, 9):
                coord1 = f"{row[i]}"
                coord2 = f"{row[j]}"
                row_constraints[(coord1, coord2)] = lambda x, y: x != y
                row_constraints[(coord2, coord1)] = lambda x, y: x != y

    col_constraints = {}
    for col in cols_coord:
        for i in range(9):
            for j in range(i+1, 9):
                coord1 = f"{col[i]}"
                coord2 = f"{col[j]}"
                col_constraints[(coord1, coord2)] = lambda x, y: x != y
                col_constraints[(coord2, coord1)] = lambda x, y: x != y


    box_constraints = {}
    for box in boxes_coord:
        for i in range(9):
            for j in range(i+1, 9):
                coord1 = f"{box[i]}"
                coord2 = f"{box[j]}"
                box_constraints[(coord1, coord2)] = lambda x, y: x != y
                box_constraints[(coord2, coord1)] = lambda x, y: x != y

    constraints = {}
    constraints.update(row_constraints)
    constraints.update(col_constraints)
    constraints.update(box_constraints)
    return constraints

In [11]:
def ac3(domains):
    """
    AC-3 algorithm for constraint satisfaction problems.
    
    1. Turn each constraint into 2 ex: (A!= B) & (B != A)
    2. Add all arcs to agenda (included in constraints)
    3. Repeat below until agenda (constraints) is empty
        - take an arc (xi,xj) off the agenda and check it
        - for EVERY value of xi there must be ANY value of xj that satisfies it
        - remove any inconsistent value from xi
        - if xi has changed add all arcs of the form (xk,xi) to the agenda

    Parameters
    ----------
    constraints : dict
        A dictionary where,
        Keys are pairs of cell coordinates 
        Values are lambda functions that test the consistency of values for those cells.
    domains : dict
        A dictionary where,
        Keys are cell coordinates 
        Values are sets of possible values for that cell.

    Returns
    -------
    d : dict
        A dictionary where the keys are the coordinates of empty cells in the format "(row, col)" 
        The values are sets of possible values for that cell. 
        Any empty cell will have a domain of {1,2,3,4,5,6,7,8,9}
        Any cell that has a value will have a domain of that value.
    counter : int
        A dictionary where the keys are the coordinates of empty cells in the format "(row, col)" 
        The values are sets of possible values for that cell. 
        Any empty cell will have a domain of {1,2,3,4,5,6,7,8,9}
        Any cell that has a value will have a domain of that value.

    Example:
    >>> grid =  [[[[0, 0, 3], [0, 2, 0], [6, 0, 0]],
                  [[9, 0, 0], [3, 0, 5], [0, 0, 1]],
                  [[0, 0, 1], [8, 0, 6], [4, 0, 0]]],
                 [[[0, 0, 8], [1, 0, 2], [9, 0, 0]],
                  [[7, 0, 0], [0, 0, 0], [0, 0, 8]],
                  [[0, 0, 6], [7, 0, 8], [2, 0, 0]]],
                 [[[0, 0, 2], [6, 0, 9], [5, 0, 0]],
                  [[8, 0, 0], [2, 0, 3], [0, 0, 9]],
                  [[0, 0, 5], [0, 1, 0], [3, 0, 0]]]]
    >>> create_domains(grid)
    {'(0, 0)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 1)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 2)': {3},
     '(0, 3)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 4)': {2},
     '(0, 5)': {1, 2, 3, 4, 5, 6, 7, 8, 9},
     '(0, 6)': {6}, ... }
     
     """
    # counter to keep track of the number of changes made to the domain
    counter = 0
    # full list of constraints
    full_constraints = create_constraints()
    # make a copy of constraints and domains that will be changed
    c = full_constraints.copy()
    d = domains.copy()
    while c:
        # (xi,xj) first pair of coordinates from constraints
        (xi,xj) = list(c.keys())[0]
        # pops the constraint function related to those coordinates
        f = c.pop((xi,xj)) 
        # for EVERY value of xi there must be ANY value of xj that satisfies it
        for x in d[xi].copy():
            if not any(f(x, y) for y in d[xj]):
                # if there is no value xj that satisfies any x from xi, remove x from domain of xi
                counter += 1
                d[xi].remove(x)
                # if xi has changed all any constraints of the form (xk,xi)
                # match original constraints where xi is the second element of the coordinates
                add_constraints_keys = list(filter(lambda k: k[1] == xi, full_constraints.keys()))
                # create dict with constraints we need to add back
                add_constraints = {key: value for key, value in full_constraints.items() if key in add_constraints_keys}
                # update c with add_constraints
                c.update(add_constraints)
    return d, counter

In [12]:
def domain_to_grid(domains):
    """
    INPUT:
    domains = {
        '(0, 0)': {4},
        '(0, 1)': {8},
        '(0, 2)': {3},
        '(0, 3)': {9},
        '(0, 4)': {2},
        '(0, 5)': {1},
        '(0, 6)': {6}, ...}
        
    OUTPUT:
    grid = 
    [[[[4, 8, 3], [9, 2, 1], [6, 5, 7]],
      [[9, 6, 7], [3, 4, 5], [8, 2, 1]],
      [[2, 5, 1], [8, 7, 6], [4, 9, 3]]],
     [[[5, 4, 8], [1, 3, 2], [9, 7, 6]],
      [[7, 2, 9], [5, 6, 4], [1, 3, 8]],
      [[1, 3, 6], [7, 9, 8], [2, 4, 5]]],
     [[[3, 7, 2], [6, 8, 9], [5, 1, 4]],
      [[8, 1, 4], [2, 5, 3], [7, 6, 9]],
      [[6, 9, 5], [4, 1, 7], [3, 8, 2]]]]
     
    """
    coord_grid = create_grid()
    for i in range(3):
        for j in range(len(coord_grid[i])):
            for k in range(len(coord_grid[i][j])):
                for z in range(len(coord_grid[i][j][k])):
                    coord = coord_grid[i][j][k][z]
                    value = domains[str(coord)] # domain of coordinate
                    # if domain has more than one value fill cell with 0
                    if len(list(value)) > 1:
                        coord_grid[i][j][k][z] = 0
                    else:
                        coord_grid[i][j][k][z] = list(value)[0] # first item in domain
    return coord_grid

In [13]:
def priority_variable(grid):
    """
    Find an unassigned cell which has the largest number of constraints
    with the assigned cells in the given Sudoku grid.

    Parameters
    ----------
    grid : list of lists
        The Sudoku grid to search for empty spaces.
    
    Returns
    -------
    tuple or None
        If an empty space is found, the function returns a tuple of the row and column
        indices of the empty space. If no empty spaces are found, the function returns
        None.
    """
    
    rows = flatten([list(map(flatten, brow)) for brow in grid])
    cols = [list(col) for col in zip(*rows)]
    boxes = flatten([list(map(flatten, zip(*brow))) for brow in grid])
    unassigned_cells = {}
    for ir, r in enumerate(rows):
        for ic, c in enumerate(r):
            # if cell is not assigned
            if c == 0:
                # number of conflicts in row, col, box
                row_conflicts = len(set(rows[ir]))-1
                col_conflicts = len(set(cols[ic]))-1
                ib = (3*(ir//3))+(ic//3)
                box_conflicts = len(set(boxes[ib]))-1
                conflicts = row_conflicts+col_conflicts+box_conflicts
                # add coordinates and number of conflicts
                coord = (ir, ic)
                unassigned_cells[coord] = conflicts
    sorted_cells = sorted(unassigned_cells.items(), key=lambda x:x[1], reverse=True)
    # if grid was already full, return None
    if sorted_cells == []:
        return None
    else:
        # return first priority cell coordinates (the one with most conflicts)
        return sorted_cells[0][0]

In [14]:
def backtracking_with_variable_ordering(grid, counter=0, backtrack_counter=0):
    """
    Tries to solve a Sudoku puzzle using the backtracking algorithm, with variable ordering.
    First choose the cell which has the largest number of constraints with the assigned cells

    Parameters
    ----------
    grid : list of lists of lists of lists of int
        The grid to be solved. The first two levels represent the rows and columns of the Sudoku
        grid, respectively. The third level represents the blocks of the grid (3x3 sub-grids), and
        the fourth level represents the cells of each block. A value of 0 means the cell is empty.
    counter : int, optional
        The current count of cell assignments during the backtracking process. Default is 0.
    backtrack_counter : int, optional
        The current count of backtrackings during the backtracking process. Default is 0.

    Returns
    -------
    tuple
        A tuple with three values: the solved grid (as a list of lists of lists of lists of int),
        the number of cell assignments made during the backtracking process, and the number of
        times the algorithm backtracked.

    Notes
    -----
    This function works by finding the first empty cell of the Sudoku grid, and trying to fill it
    with a number from 1 to 9 that doesn't violate the Sudoku constraints (i.e., no repeated
    numbers in the same row, column, or block). If a number is found that fits the empty cell, the
    function recursively calls itself with the updated grid. If the function is unable to find a
    valid number for the current cell, it backtracks to the previous cell and tries a different
    number until a solution is found or all possibilities have been exhausted.
    """
    
    # find first empty cell
    empty_cell = priority_variable(grid)
    # if there are no empty cells left, the puzzle is solved
    if empty_cell is None:  
        return grid, counter, backtrack_counter
    else:
        # coordinates of empty cell
        row, col = empty_cell
        # find row and col block of empty cell
        row_block = row // 3
        col_block = col // 3
        # for each number in domain (1-9)
        for num in range(1, 10):
            # if the number fits in coordinates
            if is_valid(grid, row, col, num):
                # cell = num
                counter += 1
                grid[row_block][row % 3][col_block][col % 3] = num
                # recursively call solve with the updated grid
                result = backtracking(grid, counter, backtrack_counter)
                if result is not None:
                    return result
                # if not valid (backtrack) set cell to 0
                grid[row_block][row % 3][col_block][col % 3] = 0
                backtrack_counter += 1
    # if no solution is found return none -> goes to above line (set cell to 0)
    return None

# Sudoku Puzzle

Load sudoku puzzles from dataset

In [15]:
# load sudoku dataset
puzzles_df = pd.read_csv('sudoku-3m.csv')
puzzles_df.head(3)

Unnamed: 0,id,puzzle,solution,clues,difficulty
0,1,1..5.37..6.3..8.9......98...1.......8761.........,1985437266432785915276198439147352688761924352...,27,2.2
1,2,...81.....2........1.9..7...7..25.934.2..........,9348172567286534196159427381764258934523981673...,23,0.0
2,3,..5...74.3..6...19.....1..5...7...2.9....58..7...,2159837463876542194692713855387169249413258677...,25,2.6


Add "level" column, which is the discrete difficulty level

In [16]:
# add column for discrete difficulty level
num_levels = 9
puzzles_df['level'] = pd.cut(puzzles_df['difficulty'], num_levels, precision=1, include_lowest=True, labels=False)
puzzles_df.head(3)

Unnamed: 0,id,puzzle,solution,clues,difficulty,level
0,1,1..5.37..6.3..8.9......98...1.......8761.........,1985437266432785915276198439147352688761924352...,27,2.2,2
1,2,...81.....2........1.9..7...7..25.934.2..........,9348172567286534196159427381764258934523981673...,23,0.0,0
2,3,..5...74.3..6...19.....1..5...7...2.9....58..7...,2159837463876542194692713855387169249413258677...,25,2.6,2


Take a sample of 20 puzzles from each level

In [17]:
sample_df = pd.DataFrame(columns=puzzles_df.columns.tolist())

for i in range(num_levels):
    level_df = puzzles_df[puzzles_df['level']==i].sample(n=20)
    sample_df = pd.concat([sample_df, level_df])

# Backtracking Algorithm

Apply backtracking to unsolved sudoku puzzles

In [19]:
backtracking_results = pd.DataFrame(columns=['puzzle','solved puzzle','counter', 'backtrack counter','time (secs)','level'])
# for each puzzle in sample_df
for index, row in sample_df.iterrows():
    # unsolved puzzle
    puzzle = row['puzzle']
    # difficulty level
    level = row['level']
    # convert puzzle from string to grid
    puzzle_grid = string_to_grid(puzzle)
    start_time = time.time()
    solved_puzzle, counter, backtrack_counter = backtracking(puzzle_grid)
    end_time = time.time()
    time_taken = round(end_time-start_time,1)
    puzzle_grid = string_to_grid(puzzle)
    backtracking_results_i = pd.DataFrame(
        [[puzzle_grid,solved_puzzle,counter,backtrack_counter,time_taken,level]],
        columns=['puzzle','solved puzzle','counter', 'backtrack counter','time (secs)','level'])
    backtracking_results = pd.concat([backtracking_results,backtracking_results_i])

### The grid below is one of the unsolved sudoku puzzles we need to solve:


In [20]:
p = backtracking_results.reset_index()['puzzle'][0]
p

[[[[0, 0, 8], [0, 0, 1], [0, 0, 0]],
  [[0, 6, 0], [3, 0, 0], [4, 0, 0]],
  [[2, 0, 1], [0, 0, 4], [0, 6, 0]]],
 [[[0, 5, 0], [0, 3, 0], [7, 0, 0]],
  [[0, 0, 0], [0, 9, 7], [0, 1, 0]],
  [[7, 0, 3], [0, 1, 0], [0, 9, 8]]],
 [[[0, 0, 0], [8, 0, 2], [0, 0, 9]],
  [[0, 3, 0], [0, 0, 0], [0, 0, 0]],
  [[0, 0, 0], [0, 0, 0], [0, 8, 0]]]]

### The solution produced by backtracking:

In [21]:
s = backtracking_results.reset_index()['solved puzzle'][0]
s

[[[[3, 4, 8], [6, 2, 1], [9, 5, 7]],
  [[5, 6, 7], [3, 8, 9], [4, 2, 1]],
  [[2, 9, 1], [7, 5, 4], [8, 6, 3]]],
 [[[1, 5, 9], [2, 3, 8], [7, 4, 6]],
  [[6, 8, 4], [5, 9, 7], [3, 1, 2]],
  [[7, 2, 3], [4, 1, 6], [5, 9, 8]]],
 [[[4, 1, 5], [8, 7, 2], [6, 3, 9]],
  [[8, 3, 2], [9, 6, 5], [1, 7, 4]],
  [[9, 7, 6], [1, 4, 3], [2, 8, 5]]]]

In [22]:
# checking performance of algorithm on the different levels
backtracking_results['cell assignments'] = backtracking_results['counter'].astype(int)
backtracking_results['number of backtracks'] = backtracking_results['backtrack counter'].astype(int)
counts = backtracking_results.groupby('level').size()
grouped_results = backtracking_results.groupby('level').mean()
grouped_results['puzzles solved'] = counts
bt_total_time = sum(backtracking_results['time (secs)'])
bt_count = sum(grouped_results['puzzles solved'])
grouped_results = grouped_results.astype(int)

### Performance of backtracking by difficulty level

In [23]:
grouped_results

Unnamed: 0_level_0,time (secs),cell assignments,number of backtracks,puzzles solved
level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,5,72,15,20
1,9,72,16,20
2,16,73,16,20
3,4,71,15,20
4,5,72,15,20
5,9,73,16,20
6,6,72,16,20
7,13,74,17,20
8,13,73,16,20


In [24]:
print('It took ',round(bt_total_time/60,2),' minutes to solve all ',bt_count,' puzzles using backtracking')

It took  28.44  minutes to solve all  180  puzzles using backtracking


# Backtracking with Variable Ordering

Apply backtracking with variable ordering to unsolved sudoku puzzles

In [25]:
backtracking_vo_results = pd.DataFrame(columns=['puzzle','solved puzzle','counter', 'backtrack counter','time (secs)','level'])
# for each puzzle in sample_df
for index, row in sample_df.iterrows():
    # unsolved puzzle
    puzzle = row['puzzle']
    # difficulty level
    level = row['level']
    # convert puzzle from string to grid
    puzzle_grid = string_to_grid(puzzle)
    start_time = time.time()
    solved_puzzle, counter, backtrack_counter = backtracking_with_variable_ordering(puzzle_grid)
    end_time = time.time()
    time_taken = round(end_time-start_time,1)
    puzzle_grid = string_to_grid(puzzle)
    backtracking_vo_results_i = pd.DataFrame(
        [[puzzle_grid,solved_puzzle,counter,backtrack_counter,time_taken,level]],
        columns=['puzzle','solved puzzle','counter', 'backtrack counter','time (secs)','level'])
    backtracking_vo_results = pd.concat([backtracking_vo_results,backtracking_vo_results_i])

In [26]:
# checking performance of algorithm on the different levels
backtracking_vo_results['cell assignments'] = backtracking_vo_results['counter'].astype(int)
backtracking_vo_results['number of backtracks'] = backtracking_vo_results['backtrack counter'].astype(int)
counts = backtracking_vo_results.groupby('level').size()
grouped_vo_results = backtracking_vo_results.groupby('level').mean()
grouped_vo_results['puzzles solved'] = counts
bt_vo_total_time = sum(backtracking_vo_results['time (secs)'])
bt_vo_count = sum(grouped_vo_results['puzzles solved'])
grouped_vo_results = grouped_vo_results.astype(int)

### Performance of backtracking with Variable Ordering by difficulty level

In [27]:
grouped_vo_results

Unnamed: 0_level_0,time (secs),cell assignments,number of backtracks,puzzles solved
level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,6,72,15,20
1,9,72,16,20
2,20,73,16,20
3,7,71,15,20
4,6,71,15,20
5,9,73,16,20
6,10,72,16,20
7,8,74,17,20
8,15,73,16,20


In [28]:
print('It took ',round(bt_vo_total_time/60,2),' minutes to solve all ',bt_vo_count,' puzzles using backtracking with variable ordering')

It took  31.44  minutes to solve all  180  puzzles using backtracking with variable ordering


# AC-3 Algorithm

### Create sudoku constraints
#### Cells in the same row/column/box should all be different

In [29]:
constraints = create_constraints()

### Use AC-3 to solve sample puzzles

In [31]:
ac3_results = pd.DataFrame(columns=['puzzle','solved puzzle','counter','time (secs)','level'])
# for each puzzle in sample_df
for index, row in sample_df.iterrows():
    # unsolved puzzle
    puzzle = row['puzzle']
    # difficulty level
    level = row['level']
    # convert puzzle from string to grid
    puzzle_grid = string_to_grid(puzzle)
    puzzle_domains = create_domains(puzzle_grid)
    start_time = time.time()
    solved_puzzle_domains, counter = ac3(puzzle_domains)
    end_time = time.time()
    time_taken = round(end_time-start_time,1)
    puzzle_grid = string_to_grid(puzzle)
    solved_puzzle = domain_to_grid(solved_puzzle_domains)
    ac3_results_i = pd.DataFrame(
        [[puzzle_grid,solved_puzzle,counter,time_taken,level]],
        columns=['puzzle','solved puzzle','counter', 'time (secs)','level'])
    ac3_results = pd.concat([ac3_results,ac3_results_i])

In [32]:
# checking performance of algorithm on the different levels
ac3_results['num domain reductions'] = ac3_results['counter'].astype(int)
# ac3_results['number of backtracks'] = ac3_results['backtrack counter'].astype(int)
counts = ac3_results.groupby('level').size()
ac3_grouped_results = ac3_results.groupby('level').mean()
ac3_grouped_results['puzzles solved'] = counts
ac3_total_time = sum(ac3_results['time (secs)'])
ac3_count = sum(ac3_grouped_results['puzzles solved'])
ac3_grouped_results = ac3_grouped_results.astype(int)

### Performance of AC-3 by difficulty level

In [33]:
ac3_grouped_results

Unnamed: 0_level_0,time (secs),num domain reductions,puzzles solved
level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,323,20
1,0,303,20
2,0,303,20
3,0,300,20
4,0,300,20
5,0,291,20
6,0,292,20
7,0,292,20
8,0,290,20


In [34]:
print('It took ',round(ac3_total_time/60,2),' minutes to solve all ',ac3_count,' puzzles using AC-3')

It took  0.82  minutes to solve all  180  puzzles using AC-3


## Summary

In [38]:
data = [['BackTracking',round(bt_total_time/60,2),bt_count],
        ['BackTracking with with Variable Ordering',round(bt_vo_total_time/60,2),bt_vo_count],
        ['AC-3',round(ac3_total_time/60,2),ac3_count]]
df = pd.DataFrame(data,columns=['Algorithm','Time (mins)','Solved Puzzles'])

In [42]:
df

Unnamed: 0,Algorithm,Time (mins),Solved Puzzles
0,BackTracking,28.44,180
1,BackTracking with with Variable Ordering,31.44,180
2,AC-3,0.82,180
