<a href="https://colab.research.google.com/github/guru3/soduku_solver/blob/master/3.%20One%20last%20time%3A%20Can%20a%20neural%20network%20solve%20the%20soduku%20game%3F.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Let's give it another shot!

In [1]:
! pip install -q kaggle
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"guru333","key":"695a60acdd9541c267fbd36712d5ffcd"}'}

In [2]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download bryanpark/sudoku
!unzip sudoku.zip

Downloading sudoku.zip to /content
 84% 57.0M/68.1M [00:00<00:00, 56.7MB/s]
100% 68.1M/68.1M [00:00<00:00, 126MB/s] 
Archive:  sudoku.zip
  inflating: sudoku.csv              


In [1]:
import csv
import random
import pickle
import numpy as np
from keras.callbacks import Callback
from keras.models import Sequential,Model
from keras.layers import Conv2D,Dense,Flatten,Input,Dropout
import matplotlib.pyplot as plt
from keras.utils import to_categorical

Using TensorFlow backend.


In [0]:
def read_raw_data(total_to_read):
    FILE_PATH = './sudoku.csv';
    soduku_games = csv.reader(open(FILE_PATH,'r'))
    next(soduku_games);

    quizzes = [];
    solutions = [];
    index = 0;
    for game in soduku_games:
        index = index+ 1;
        if( index == total_to_read ):
            break;
        
        quizzes.append( np.reshape([int(d) for d in game[0]], (9, 9)) )
        solutions.append( np.reshape([int(d) for d in game[1]], (9, 9)) )
        
    permutation = np.random.permutation(len(quizzes));
    quizzes = np.array(quizzes)[permutation];
    solutions = np.array(solutions)[permutation];
    return quizzes, solutions;

In [0]:
def change_output(Y):
    return [ Y[:,i,j,:] for i in range(9) for j in range(9) ]

def remove_clues(inputArray, remove_clue=0):
    #very inefficient method - please perform walk of shame
    for inputs in inputArray:
      can_remove = [];
      for row in range(9):
        for col in range(9):
          if( np.sum(inputs[row,col,:]) != 0 ):
            can_remove.append( [row,col] )
      
      can_remove = np.array(can_remove)[ np.random.permutation(len(can_remove)) ];
      for i in range(remove_clue):
        [row,col] = can_remove[i];
        inputs[row,col,:] = 0;
    return inputArray;

def data_to_categorical(): # we will use one hot encoding of input and output  - this breakdown is more intuitive in the given case of discrete numbers
    X, Y = read_raw_data(20000);
    
    X = to_categorical(X, num_classes=10)
    X_also = to_categorical(Y, num_classes=10);
    Y = to_categorical(Y-1, num_classes=9)
    total_ex = X.shape[0];
    training_ex = (int)(total_ex*0.8);
    validation_ex= (int)(total_ex*0.1);
    train_X, train_Y = X_also[:training_ex], change_output(Y[:training_ex]);
    valid_X, valid_Y = X[training_ex:training_ex+validation_ex], change_output(Y[training_ex:training_ex+validation_ex]);
    test_X, test_Y = X[training_ex+validation_ex:], change_output(Y[training_ex+validation_ex:]);
    
    return X_also[:training_ex], train_Y, valid_X, valid_Y, test_X, test_Y;

In [0]:
train_X,train_Y,valid_X,valid_Y,test_X,test_Y = data_to_categorical();

In [8]:
print(train_X.shape)
print(len(train_Y),train_Y[0].shape)

(15999, 9, 9, 10)
81 (15999, 9)


In [0]:
def get_model():
    model = Sequential();
    model.add( Dense(128, input_shape=(9,9,10)) );
    model.add( Dropout(0.5) );
    model.add( Dense(64) );
    model.add( Dense(32) );
    model.add( Flatten() )
    
    input_ = Input(shape=(9,9,10))
    intermediate_output = model(input_)
    
    outputs = [ Dense(9, activation='sigmoid')(intermediate_output) for i in range(81) ] #for each cell in sudoku table
    model = Model(input_, outputs);
    model.compile( optimizer='RMSProp', loss='categorical_crossentropy', metrics= ['accuracy'] )
    return model

In [0]:
model = get_model();

In [11]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 9, 9, 10)     0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 2592)         11744       input_1[0][0]                    
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 9)            23337       sequential_1[1][0]               
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 9)            23337       sequential_1[1][0]               
____________________________________________________________________________________________

In [0]:
class EarlyStoppingByLossVal(Callback):
    def __init__(self, monitor='loss', value=0.01, verbose=0):
        super(Callback, self).__init__()
        self.monitor = monitor
        self.value = value
        self.verbose = verbose

    def on_epoch_end(self, epoch, logs={}):
        current = logs.get(self.monitor)
        if current is None:
            warnings.warn("Early stopping requires %s available!" % self.monitor, RuntimeWarning)

        if current < self.value:
            if self.verbose > 0:
                print("Epoch %05d: early stopping" % epoch)
            self.model.stop_training = True

callbacks = [
    EarlyStoppingByLossVal(monitor='loss', value=1.0, verbose=1),
]

In [13]:
history = model.fit(train_X, train_Y, epochs=5000, batch_size=256, validation_data=(valid_X, valid_Y), callbacks=callbacks);1,

Train on 15999 samples, validate on 1999 samples
Epoch 1/5000
Epoch 2/5000
Epoch 3/5000
Epoch 00002: early stopping


(1,)

In [0]:
for i in range(1,20):
  print('Training with missing clues ', i*3);
  train_X_round = remove_clues(train_X, remove_clue=3);
  history = model.fit(train_X_round, train_Y, epochs=5000, batch_size=128, validation_data=(valid_X, valid_Y), callbacks=callbacks);