# Data cleaning

Author: Frankie Inguanez <br />
Date: 13/01/2023<br /><br />

A series of utility functions to clean and check a sudoku puzzle.

In [48]:
# Convert a string to a 2D 9x9 array.
def to2DArray(n):
    return [list(map(int, n[i:i+9])) for i in range(0, 81, 9)]

In [49]:
# Get column values
def getColValues(puzzle, col):
    lst = []
    for row in puzzle:
        lst.append(row[col])

    return lst

In [50]:
# Get box values. Boxes are 3x3 sub-grids enumerates from top left in a raster fashion
# 0, 1, 2
# 3, 4, 5
# 6, 7, 8
def getBoxValues(puzzle, box):
    return [puzzle[x][y] for x in range((box//3)*3,((box//3)*3)+3) for y in range((box%3)*3, ((box%3)*3)+3)]

In [51]:
puzzle = to2DArray('207408609100090007000070000370602098600000001580719063800524006005060800906837102')
puzzle

[[2, 0, 7, 4, 0, 8, 6, 0, 9],
 [1, 0, 0, 0, 9, 0, 0, 0, 7],
 [0, 0, 0, 0, 7, 0, 0, 0, 0],
 [3, 7, 0, 6, 0, 2, 0, 9, 8],
 [6, 0, 0, 0, 0, 0, 0, 0, 1],
 [5, 8, 0, 7, 1, 9, 0, 6, 3],
 [8, 0, 0, 5, 2, 4, 0, 0, 6],
 [0, 0, 5, 0, 6, 0, 8, 0, 0],
 [9, 0, 6, 8, 3, 7, 1, 0, 2]]

In [52]:
puzzle[1]

[1, 0, 0, 0, 9, 0, 0, 0, 7]

In [53]:
getColValues(puzzle, 1)

[0, 0, 0, 7, 0, 8, 0, 0, 0]

In [54]:
getBoxValues(puzzle,8)

[0, 0, 6, 8, 0, 0, 1, 0, 2]

In [55]:
# Check if a list of digits contain all values from 1 to 9
def checkList(lst):
    return set(lst) == set(range(1,10))

In [56]:
# Check if a puzzle has been solved
def isSolved(puzzle):
    # Check rows
    for row in puzzle:
        if not checkList(row):
            return False

    # Check columns
    for i in range(0,9):
        if not checkList(getColValues(puzzle, i)):
            return False;

    # Check box
    for i in range(0,9):
        if not checkList(getBoxValues(puzzle, i)):
            return False;

    return True

In [57]:
# Checks if a number can be added to a specific position
def isValid(puzzle, num, pos):
    # Check the row
    for i in range(len(puzzle[0])):
        if puzzle[pos[0]][i]==num and pos[1] != i:
            return False

    # Check the column
    for i in range(len(puzzle[1])):
        if puzzle[i][pos[1]]==num and pos[0] != i:
            return False

    # Check box
    box_x = pos[1] // 3
    box_y = pos[0] // 3

    for i in range(box_y*3, box_y*3 + 3):
        for j in range(box_x * 3, box_x*3 + 3):
            if puzzle[i][j] == num and (i,j) != pos:
                return False

    return True

In [58]:
puzzle = to2DArray('207408609100090007000070000370602098600000001580719063800524006005060800906837102')
puzzle

[[2, 0, 7, 4, 0, 8, 6, 0, 9],
 [1, 0, 0, 0, 9, 0, 0, 0, 7],
 [0, 0, 0, 0, 7, 0, 0, 0, 0],
 [3, 7, 0, 6, 0, 2, 0, 9, 8],
 [6, 0, 0, 0, 0, 0, 0, 0, 1],
 [5, 8, 0, 7, 1, 9, 0, 6, 3],
 [8, 0, 0, 5, 2, 4, 0, 0, 6],
 [0, 0, 5, 0, 6, 0, 8, 0, 0],
 [9, 0, 6, 8, 3, 7, 1, 0, 2]]

In [59]:
puzzle = to2DArray('207408609100090007000070000370602098600000001580719063800524006005060800906837102')
isSolved(puzzle)

False

In [60]:
puzzle = to2DArray('237458619158296347469173285371642598694385721582719463813524976725961834946837152')
isSolved(puzzle)

True

In [61]:
puzzle = to2DArray('207408609100090007000070000370602098600000001580719063800524006005060800906837102')
isValid(puzzle, 3, [0,1])

True