In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
data = pd.read_csv("sudoku.csv")

In [3]:
data.head()

Unnamed: 0,quizzes,solutions
0,0043002090050090010700600430060020871900074000...,8643712593258497619712658434361925871986574322...
1,0401000501070039605200080000000000170009068008...,3461792581875239645296483719658324174729168358...
2,6001203840084590720000060050002640300700800069...,6951273841384596727248369158512647392739815469...
3,4972000001004000050000160986203000403009000000...,4972583161864397252537164986293815473759641828...
4,0059103080094030600275001000300002010008200070...,4659123781894735623275681497386452919548216372...


In [4]:
X_raw = data['quizzes']
y_raw = data['solutions']

In [5]:
print("X shape:",X_raw.shape)
print("Y shape:", y_raw.shape)

X shape: (1000000,)
Y shape: (1000000,)


In [6]:
def str_to_array(str, shape):
    return np.array([int(i) for i in str]).reshape(shape)

In [7]:
X = []
y = []

In [8]:
for i in tqdm(range(len(X_raw))):
    X.append(str_to_array(X_raw[i], shape=(9,9,1)))
    y.append(str_to_array(y_raw[i], shape=(81,1)))
X = np.array(X)/4.5 - 1
y = np.array(y)-1

100%|██████████████████████████████████████████████████████████████████████| 1000000/1000000 [03:07<00:00, 5320.39it/s]


In [9]:
if 'data' in locals():
    del data
if 'X_raw' in locals():
    del X_raw
if 'y_raw' in locals():
    del y_raw

In [10]:
X.shape

(1000000, 9, 9, 1)

In [11]:
y.shape

(1000000, 81, 1)

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.0005, random_state=101)

In [14]:
if 'X' in locals():
    del X
if 'y' in locals():
    del y

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Reshape, BatchNormalization, Flatten, Softmax
from tensorflow.keras.optimizers import Adam

In [16]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(3,3), activation="relu", padding="same", input_shape=(9,9,1)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation="relu", padding="same"))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=(1,1), activation="relu", padding="same"))
model.add(Flatten())
model.add(Dense(81*9))
model.add(Reshape((81,9)))
model.add(Softmax())
model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam())
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 9, 9, 64)          640       
_________________________________________________________________
batch_normalization (BatchNo (None, 9, 9, 64)          256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 64)          36928     
_________________________________________________________________
batch_normalization_1 (Batch (None, 9, 9, 64)          256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 128)         8320      
_________________________________________________________________
flatten (Flatten)            (None, 10368)             0         
_________________________________________________________________
dense (Dense)                (None, 729)               7

In [17]:
model.fit(X_train, y_train, batch_size=64, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x1e5c3136310>

In [18]:
model.save('final.h5')

In [19]:
if 'X_train' in locals():
    del X_train
if 'y_train' in locals():
    del y_train

In [20]:
# from tensorflow.keras.models import load_model
# model = load_model("final.h5")

In [21]:
import copy
def sudoku_predict(puzzle):
    sudoku = copy.copy(puzzle)
    while True:
        predict = -1
        probability = -1
        index = -1
        output = model.predict(sudoku.reshape(1,9,9,1))
        output = output.squeeze()
        for i in range(81):
            if sudoku[i//9, i%9] == -1:
                proba = np.max(output[i])
                if(proba > probability):
                    probability = proba
                    predict = np.argmax(output[i])
                    index = i
        sudoku[index//9, index%9] = (predict+1)/4.5-1
        remains = (sudoku==-1).sum()
        if remains==0:
            return ((sudoku+1)*4.5).astype('int32')

In [22]:
def check_correction(X, y):
    sudoku = copy.copy(X)
    remains = (sudoku==-1).sum()
    while True:
        predict = -1
        probability = -1
        index = -1
        output = model.predict(sudoku.reshape(1,9,9,1))
        output = output.squeeze()
        for i in range(81):
            if sudoku[i//9, i%9] == -1:
                proba = np.max(output[i])
                if(proba > probability):
                    probability = proba
                    predict = np.argmax(output[i])
                    index = i
        if y[index, 0] != predict:
            return False
        remains -= 1
        if remains==0:
            return True

In [32]:
count = 0
error = []
for i in tqdm(range(len(X_test))):
    if check_correction(X_test[i], y_test[i]):
        count += 1
    else:
        error.append(i)

100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [09:47<00:00,  1.17s/it]


In [37]:
acc = count/len(X_test)*100
print("Tested on %d sudoku puzzles:"%(len(X_test)))
print("Accuracy =",acc,"%")

Tested on 500 sudoku puzzles:
Accuracy = 100.0 %
