<a href="https://colab.research.google.com/github/emrealtinok/sudoku_solvers/blob/main/SudokuDenseNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import datetime, os

In [None]:
# Imports 3 million sudoku puzzles and their solutions

sudoku_3mil = 'drive/My Drive/sudoku3m.csv'
puzzles = np.zeros((3000000, 81), np.int32)
solutions = np.zeros((3000000, 81), np.int32)

for i, line in enumerate(open(sudoku_3mil, 'r').read().splitlines()):
    puzzle, solution = line.split(',')
    for j, p_s in enumerate(zip(puzzle, solution)):
        p, s = p_s
        puzzles[i, j] = p
        solutions[i, j] = s
puzzles = puzzles.reshape((-1, 9, 9))
solutions = solutions.reshape((-1, 9, 9))

X = puzzles
X = np.expand_dims(X, axis=-1) 
Y = solutions
Y = np.expand_dims(Y, axis=-1)

In [None]:
# Sets the initializer, the callbacks and the optimizer

initializer = tf.keras.initializers.HeUniform()

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2, 
                                                  restore_best_weights=True)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint('drive/My Drive/sudoku_densenet_model',
                                                      save_best_only=True)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.003)


# Defines a function that combines Batch Normalization and ReLU activation

def batchnorm_relu(x):
  x = tf.keras.layers.BatchNormalization()(x)
  x = tf.keras.layers.ReLU()(x)  
  return x

# Defines a function that forms densely connected convolution blocks

def densenet_block(x, d=None):

  if d is None:
    dense = x
  else:
    dense = d

  conv1 = tf.keras.layers.Conv2D(10, (1, 1), padding='same', kernel_initializer=initializer)(x)
  conv1 = tf.keras.layers.Conv2D(81, (3, 3), padding='same', kernel_initializer=initializer)(conv1)
  conv1 = tf.keras.layers.Conv2D(10, (1, 1), padding='same', kernel_initializer=initializer)(conv1)
  con1 = tf.keras.layers.Concatenate()([dense, conv1])
  act1 = batchnorm_relu(con1)

  conv2 = tf.keras.layers.Conv2D(10, (1, 1), padding='same', kernel_initializer=initializer)(act1)
  conv2 = tf.keras.layers.Conv2D(81, (1, 9), padding='same', kernel_initializer=initializer)(conv2)
  conv2 = tf.keras.layers.Conv2D(10, (1, 1), padding='same', kernel_initializer=initializer)(conv2)
  con2 = tf.keras.layers.Concatenate()([con1, conv2])
  act2 = batchnorm_relu(con2)
  
  conv3 = tf.keras.layers.Conv2D(10, (1, 1), padding='same', kernel_initializer=initializer)(act2)
  conv3 = tf.keras.layers.Conv2D(81, (9, 1), padding='same', kernel_initializer=initializer)(conv3)
  conv3 = tf.keras.layers.Conv2D(10, (1, 1), padding='same', kernel_initializer=initializer)(conv3)
  con3 = tf.keras.layers.Concatenate()([con2, conv3])
  act3 = batchnorm_relu(con3)
  
  return act3, con3

# Defines the input, the layers and the output of the model

input = tf.keras.Input((9, 9, 1))

act_con, dense_con = densenet_block(input)
for i in range(80):
  act_con, dense_con = densenet_block(act_con, dense_con)

output = tf.keras.layers.Conv2D(10, (1, 1), activation='softmax')(act_con)

# Initiates the model

model = tf.keras.Model(inputs=input, outputs=output)

# Compiles the model

model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
)

# Trains the model

model.fit(X, Y,
          batch_size=64,
          epochs=100,
          callbacks=[early_stopping, model_checkpoint],
          validation_split=0.01)

Epoch 1/100
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: drive/My Drive/sudoku_densenet_model/assets
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100

In [None]:
# Loads the trained model

loaded_model = tf.keras.models.load_model('drive/My Drive/sudoku_densenet_model_cont')

In [None]:
# Continues training the model

# Sets the initializer, the callbacks and the optimizer

model_checkpoint_cont = tf.keras.callbacks.ModelCheckpoint('drive/My Drive/sudoku_densenet_model_cont',
                                                      save_best_only=True)
early_stopping_cont = tf.keras.callbacks.EarlyStopping(patience=2, 
                                                  restore_best_weights=True)
optimizer_cont = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.1)

# Compiles the model

loaded_model.compile(
    optimizer=optimizer_cont,
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
)

# Trains the model

loaded_model.fit(X, Y,
          batch_size=64,
          epochs=100,
          callbacks=[early_stopping_cont, model_checkpoint_cont],
          validation_split=0.01)

Epoch 1/100
INFO:tensorflow:Assets written to: drive/My Drive/sudoku_densenet_model_cont/assets
Epoch 2/100
 7118/46407 [===>..........................] - ETA: 5:10:16 - loss: 0.1652 - sparse_categorical_accuracy: 0.9325Buffered data was truncated after reaching the output size limit.

In [None]:
# Imports 1000 Sudoku puzzles and their solutions

sudoku_1000 = 'drive/My Drive/1000Sudokus - Sheet1.csv'
puzzles_test = np.zeros((1000, 81), np.int32)
solutions_test = np.zeros((1000, 81), np.int32)
for i, line in enumerate(open(sudoku_1000, 'r').read().splitlines()):
    puzzle, solution = line.split(",")
    for j, q_s in enumerate(zip(puzzle, solution)):
        q, s = q_s
        puzzles_test[i, j] = q
        solutions_test[i, j] = s
puzzles_test = puzzles_test.reshape((-1, 9, 9))
solutions_test = solutions_test.reshape((-1, 9, 9))

X_test = puzzles_test
X_test = np.expand_dims(X_test, axis=-1)
Y_test = solutions_test
Y_test = np.expand_dims(Y_test, axis=-1)

In [None]:
# Evaluates model (sparse_categorical_accuracy: 0.9402)

loaded_model.evaluate(X_test, Y_test)



[0.1475314497947693, 0.9401851892471313]

In [None]:
# Solves 1000 Sudoku puzzles through step-by-step inference (907/1000)

for j in range(2):  
  count = 0
  for i in range(1000):
    X_temp = X_test[i]
    X_temp = np.expand_dims(X_temp, axis=0)
    while np.count_nonzero(X_temp) < 81:
      pred = loaded_model.predict(X_temp)
      prediction = np.argmax(pred, axis=-1)
      prediction_prob = np.amax(pred, axis=-1)
      k, l = 0, 0
      prob = 0
      for i in range(9):
        for j in range(9):
          if X_temp[0, i, j, 0] == 0:
            if prediction_prob[0, i, j] > prob:
              prob = prediction_prob[0, i, j]
              k, l = i, j
      X_temp[0, k, l, 0] = prediction[0, k, l]
    comparison = X_temp.reshape(1, 9, 9) == Y_test[i].reshape(1, 9, 9)
    if comparison.all():
      count += 1
  print(count)

1
907
