In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
data = pd.read_csv("sudoku.csv")

In [3]:
data.head()

Unnamed: 0,quizzes,solutions
0,0043002090050090010700600430060020871900074000...,8643712593258497619712658434361925871986574322...
1,0401000501070039605200080000000000170009068008...,3461792581875239645296483719658324174729168358...
2,6001203840084590720000060050002640300700800069...,6951273841384596727248369158512647392739815469...
3,4972000001004000050000160986203000403009000000...,4972583161864397252537164986293815473759641828...
4,0059103080094030600275001000300002010008200070...,4659123781894735623275681497386452919548216372...


In [4]:
X_raw = data['quizzes']
y_raw = data['solutions']

In [5]:
print("X shape:",X_raw.shape)
print("Y shape:", y_raw.shape)

X shape: (1000000,)
Y shape: (1000000,)


In [6]:
def str_to_array(str, shape):
    return np.array([int(i) for i in str]).reshape(shape)

In [12]:
X = []
y = []

In [13]:
for i in tqdm(range(len(X_raw))):
    X.append(str_to_array(X_raw[i], shape=(9,9,1)))
    y.append(str_to_array(y_raw[i], shape=(81,1)))
X = np.array(X)/4.5 - 1
y = np.array(y)-1

100%|█████████████████████████████████████████████████████████████████████| 1000000/1000000 [01:04<00:00, 15409.33it/s]


In [14]:
if 'data' in locals():
    del data
if 'X_raw' in locals():
    del X_raw
if 'y_raw' in locals():
    del y_raw

In [15]:
X.shape

(1000000, 9, 9, 1)

In [16]:
y.shape

(1000000, 81, 1)

In [17]:
from sklearn.model_selection import train_test_split

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [19]:
if 'X' in locals():
    del X
if 'y' in locals():
    del y

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Reshape, BatchNormalization, Flatten, Softmax
from tensorflow.keras.optimizers import Adam

In [21]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(3,3), activation="relu", padding="same", input_shape=(9,9,1)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation="relu", padding="same"))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=(1,1), activation="relu", padding="same"))
model.add(Flatten())
model.add(Dense(81*9))
model.add(Reshape((81,9)))
model.add(Softmax())
model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam())
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 9, 9, 64)          640       
_________________________________________________________________
batch_normalization (BatchNo (None, 9, 9, 64)          256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 64)          36928     
_________________________________________________________________
batch_normalization_1 (Batch (None, 9, 9, 64)          256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 128)         8320      
_________________________________________________________________
flatten (Flatten)            (None, 10368)             0         
_________________________________________________________________
dense (Dense)                (None, 729)               7

In [22]:
model.fit(X_train, y_train, batch_size=64, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x2078f91c2b0>

In [23]:
model.save('final.h5')

In [57]:
if 'X_train' in locals():
    del X_train
if 'y_train' in locals():
    del y_train

In [49]:
predict = model.predict(X_test[2].reshape(1,9,9,1))

In [50]:
predict.shape

(1, 81, 9)

In [51]:
predict = predict.squeeze()

In [52]:
predict.shape

(81, 9)

In [53]:
X_test[2,:,:,0].shape

(9, 9)

In [93]:
print((X_test[2,:,:,0]+1)*4.5)
print(np.argmax(predict, axis=1).reshape(9,9)+1)
print(y_test[2,:,0].reshape(9,9)+1)

[[1. 6. 0. 4. 0. 0. 3. 0. 0.]
 [0. 2. 9. 3. 7. 0. 0. 0. 5.]
 [3. 0. 0. 0. 5. 6. 0. 0. 8.]
 [0. 8. 0. 1. 0. 0. 0. 2. 0.]
 [0. 0. 0. 8. 0. 0. 6. 3. 9.]
 [7. 0. 6. 0. 0. 0. 0. 4. 0.]
 [0. 3. 0. 0. 0. 0. 0. 8. 0.]
 [0. 1. 0. 9. 0. 4. 7. 0. 0.]
 [5. 0. 8. 7. 2. 0. 0. 9. 0.]]
[[1 6 7 4 9 8 3 7 2]
 [8 2 9 3 7 1 4 6 5]
 [3 4 4 2 5 6 2 7 8]
 [9 8 3 1 4 7 5 2 7]
 [2 4 1 8 4 7 6 3 9]
 [7 9 6 2 9 2 8 4 1]
 [9 3 7 5 1 5 2 8 4]
 [2 1 2 9 8 4 7 5 3]
 [5 4 8 7 2 1 4 9 3]]
[[1 6 5 4 9 8 3 7 2]
 [8 2 9 3 7 1 4 6 5]
 [3 7 4 2 5 6 9 1 8]
 [4 8 3 1 6 9 5 2 7]
 [2 5 1 8 4 7 6 3 9]
 [7 9 6 5 3 2 8 4 1]
 [9 3 7 6 1 5 2 8 4]
 [6 1 2 9 8 4 7 5 3]
 [5 4 8 7 2 3 1 9 6]]


In [59]:
sudoku_predict((X_test[2,:,:,0]+1)*4.5)

[[False False  True False  True  True False  True  True]
 [ True False False False False  True  True  True False]
 [False  True  True  True False False  True  True False]
 [ True False  True False  True  True  True False  True]
 [ True  True  True False  True  True False False False]
 [False  True False  True  True  True  True False  True]
 [ True False  True  True  True  True  True False  True]
 [ True False  True False  True False False  True  True]
 [False  True False False False  True  True False  True]]


In [103]:
def sudoku_predict(sudoku):
    while True:
        predict = -1
        probability = -1
        index = -1
        output = model.predict(sudoku.reshape(1,9,9,1))
        output = output.squeeze()
        for i in range(81):
            if sudoku[i//9, i%9] == -1:
                proba = np.max(output[i])
                if(proba > probability):
                    probability = proba
                    predict = np.argmax(output[i])
                    index = i
        sudoku[index//9, index%9] = predict+1
        remains = (sudoku==0).sum()
        print(remains)
        if remains==0:
            return sudoku

In [104]:
print((sudoku_predict(X_test[2,:,:,0])+1)*4.5)

0
[[ 1.   6.   0.   4.   0.   0.   3.   0.   0. ]
 [ 0.   2.   9.   3.   7.   0.   0.   0.   5. ]
 [ 3.   0.   0.   0.   5.   6.   0.   0.   8. ]
 [ 0.   8.   0.   1.   0.   0.   0.   2.   0. ]
 [ 0.   0.   0.   8.   0.   0.   6.   3.   9. ]
 [ 7.   0.   6.   0.   0.   0.   0.   4.   0. ]
 [45.   3.   0.   0.   0.   0.   0.   8.   0. ]
 [ 0.   1.   0.   9.   0.   4.   7.   0.   0. ]
 [ 5.   0.   8.   7.   2.   0.   0.   9.  22.5]]


In [105]:
(y_test[2,:,0]+1).reshape(9,9)

array([[1, 6, 5, 4, 9, 8, 3, 7, 2],
       [8, 2, 9, 3, 7, 1, 4, 6, 5],
       [3, 7, 4, 2, 5, 6, 9, 1, 8],
       [4, 8, 3, 1, 6, 9, 5, 2, 7],
       [2, 5, 1, 8, 4, 7, 6, 3, 9],
       [7, 9, 6, 5, 3, 2, 8, 4, 1],
       [9, 3, 7, 6, 1, 5, 2, 8, 4],
       [6, 1, 2, 9, 8, 4, 7, 5, 3],
       [5, 4, 8, 7, 2, 3, 1, 9, 6]])

In [106]:
import copy
def inference_sudoku(sample):
    feat = copy.copy(sample)
    while True:
        out = model.predict(feat.reshape(1,9,9,1))
        out = out.squeeze()
        pred = np.argmax(out, axis=1).reshape(9,9)+1
        prob = np.around(np.max(out, axis=1).reshape(9,9), 2)
        feat = (feat+1)*4.5
        feat = feat.reshape(9,9)
        mask = feat==0
        if mask.sum() == 0:
            break
        prob_new = prob*mask
        ind = np.argmax(prob_new)
        x, y = (ind//9), (ind%9)
        val = pred[x,y]
        feat[x,y] = val
        feat = feat/4.5 - 1
    return pred

In [109]:
print(inference_sudoku(X_test[2]))

[[1 6 9 4 1 9 3 9 2]
 [7 2 9 3 7 8 4 9 5]
 [7 9 9 2 5 6 1 7 8]
 [6 8 4 1 3 5 9 2 7]
 [2 1 3 2 4 7 6 3 6]
 [2 7 9 6 8 2 5 4 1]
 [9 2 2 5 6 5 7 1 7]
 [8 2 7 9 9 9 2 5 6]
 [5 5 6 7 2 1 3 8 9]]
