In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.models import Model, Sequential
from keras.layers import Reshape, Dense, Dropout, Flatten, Activation, Conv2D, BatchNormalization, LayerNormalization
from keras.optimizers import Adam

In [2]:
data = pd.read_csv("/kaggle/input/d/bryanpark/sudoku/sudoku.csv")

In [7]:
X = np.array([np.fromiter(map(int, x), dtype=np.float32) for x in data.puzzles]).reshape(-1, 9, 9, 1)
y = np.array([np.fromiter(map(int, x), dtype=np.float32) for x in data.solutions]).reshape(-1, 9, 9)

In [9]:
X = X / 9
X -= 0.5
y -= 1

In [12]:
idx = 850_000
X_train, X_test = X[:idx], X[idx:]
y_train, y_test = y[:idx], y[idx:]

In [13]:
LEARNING_RATE = 0.001

In [14]:
def create_model(height, width, depth, classes):
    model = Sequential()

    model.add(Conv2D(512, kernel_size=(3,3), activation='relu', padding='same', input_shape=(height, width, depth)))
    model.add(BatchNormalization())
    
    for _ in range(7):
        model.add(Conv2D(512, kernel_size=(3,3), activation='relu', padding='same'))
        model.add(BatchNormalization())
    
    model.add(Conv2D(512, kernel_size=(3,3), activation='relu', padding='same'))
    
    model.add(Flatten())
    model.add(Dense(height*width*classes)) # 9 possiblities for each cell in the 9x9 grid
    model.add(Dropout(0.1))
    model.add(LayerNormalization(axis=-1))
    model.add(Reshape((height, width, classes)))
    model.add(Activation('softmax'))
    
    optimizer = Adam(LEARNING_RATE)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [15]:
model = create_model(9, 9, 1, 9)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 9, 9, 512)         5120      
                                                                 
 batch_normalization (BatchN  (None, 9, 9, 512)        2048      
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 9, 9, 512)         2359808   
                                                                 
 batch_normalization_1 (Batc  (None, 9, 9, 512)        2048      
 hNormalization)                                                 
                                                                 
 conv2d_2 (Conv2D)           (None, 9, 9, 512)         2359808   
                                                                 
 batch_normalization_2 (Batc  (None, 9, 9, 512)        2

In [None]:
BATCH_SIZE = 64
EPOCHS = 5

In [18]:
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7e52e786ce80>

In [19]:
from keras.models import save_model

save_model(model, 'sudoku_solver.h5')

In [1]:
from keras.models import load_model
 
model = load_model('sudoku_solver.h5')

In [4]:
df = pd.read_csv("../data/puzzles.csv")

In [5]:
def convert_string_to_array(string_data):
    string_data = string_data.replace("[", "").replace("]", "")
    array_data = [[int(num) for num in row.split(", ")] for row in string_data.split(",")]
    return np.array(array_data)

In [11]:
import time
start_time = time.monotonic()
correct = total = 0
for puzzle, solution in zip(df["quizzes"], df["solutions"]):
    solution = convert_string_to_array(solution).reshape(-1,9,9)
    puzzle = convert_string_to_array(puzzle).reshape(-1,9,9,1)
    puzzle = puzzle / 9
    puzzle -= 0.5
    pred = model.predict(puzzle, verbose=0)
    pred = np.argmax(pred, axis=-1) + 1
    if np.array_equal(pred, solution):
        correct += 1
    total += 1
end_time = time.monotonic()

print(f"Total number of puzzles: {total}")
print(f"Correct solutions: {correct}")  # Note these are all relatively simple Sudoku's
print(f"Accuracy: {correct/total*100}%")
print("Total time: {:.2f}s".format(end_time-start_time))

Total number of puzzles: 1000
Correct solutions: 1000
Accuracy: 100.0%
Total time: 75.41s


In [12]:
def arrays_equal(arr1, arr2):
    for i in range(len(arr1)):
        for j in range(len(arr1[i])):
            if arr1[i][j] != arr2[i][j]:
                return False

    return True

In [14]:
import sys
sys.path.append('../')
from src.test import solve
import ast

start_time = time.monotonic()
for puzzle, solution in zip(df["quizzes"], df["solutions"]):
    if not solve(ast.literal_eval(puzzle)) == ast.literal_eval(solution):
        assert("Solution is wrong")

end_time = time.monotonic()
print("Total time: {:.2f}s".format(end_time-start_time))  # Backtracking much faster

Total time: 1.09s
