# Hyperparameters

In [0]:
CONV_LAYERS = 5
FILTERS = 256
FC_LAYERS = 3
NEURONS = 1500
KERNEL = 3
PADDING = 1
learning_rate = .00001

num_epochs = 20
num_classes = 9
batch_size = 256

# Setup

## Imports

In [0]:
# Ignore this cell if you're not using google colab

# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install torch==0.4.0 torchvision
import torch
print("Done!")

In [0]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as fct
from torch.utils import data

import matplotlib.pyplot as plt

from copy import deepcopy
from random import shuffle

In [0]:
from google.colab import auth
from googleapiclient.http import MediaFileUpload
from googleapiclient.discovery import build

## Data Download/Preperation

In [0]:
auth.authenticate_user()
drive_service = build('drive', 'v3')

In [0]:
# train id
file_id = '1zF_vQ0PF7TgMS5DyLPSg9tuN5N9BQfKl'

import io
from googleapiclient.http import MediaIoBaseDownload

request = drive_service.files().get_media(fileId=file_id)
downloaded = io.BytesIO()
downloader = MediaIoBaseDownload(downloaded, request)
done = False
while done is False:
    _, done = downloader.next_chunk()

downloaded.seek(0)

lines = downloaded.read().decode("utf8").strip().split("\n")
lines = list(map(lambda line: line.strip().split(","), lines))
train_q = [line[0] for line in lines]
train_s = [line[1] for line in lines]
train_q = np.array(list(map(lambda line: list(map(float, line)), train_q)))
train_s = np.array(list(map(lambda line: list(map(float, line)), train_s)))

In [0]:
# validation id
file_id = '1ToD1YNaqwkEEso_SaD0um7zQ0E1cbNef'

import io
from googleapiclient.http import MediaIoBaseDownload

request = drive_service.files().get_media(fileId=file_id)
downloaded = io.BytesIO()
downloader = MediaIoBaseDownload(downloaded, request)
done = False
while done is False:
    _, done = downloader.next_chunk()

downloaded.seek(0)

lines = downloaded.read().decode("utf8").strip().split("\n")
lines = list(map(lambda line: line.strip().split(","), lines))
val_q = [line[0] for line in lines]
val_s = [line[1] for line in lines]
val_q = np.array(list(map(lambda line: list(map(float, line)), val_q)))
val_s = np.array(list(map(lambda line: list(map(float, line)), val_s)))

In [0]:
# test id
file_id = '1e_Xksitcc8DS8D3eOx3gINmFvtXmXip8'

request = drive_service.files().get_media(fileId=file_id)
downloaded = io.BytesIO()
downloader = MediaIoBaseDownload(downloaded, request)
done = False
while done is False:
    _, done = downloader.next_chunk()

downloaded.seek(0)

lines = downloaded.read().decode("utf8").strip().split("\n")
lines = list(map(lambda line: line.strip().split(","), lines))
test_q = [line[0] for line in lines]
test_s = [line[1] for line in lines]
test_q = np.array(list(map(lambda line: list(map(float, line)), test_q))).reshape((-1, 9, 9))
test_s = np.array(list(map(lambda line: list(map(float, line)), test_s))).reshape((-1, 9, 9))

In [0]:
del downloaded, downloader, request, lines

In [0]:
class Dataset(data.Dataset):
    def __init__(self, xs, ys):
        'Initialization'
        self.ys = ys
        self.xs = xs

    def __len__(self):
        'Denotes the total number of samples'
        return self.xs.shape[0]

    def __getitem__(self, index):
        'Generates one sample of data'
        # Load data and get label
        # relabel the data first randomly
        a = list(range(1,10))
        b = list(range(1,10))
        shuffle(b)
        dX = dict(zip(a,b))
        dX[0] = 0

        X = self.xs[index]
        X.apply_(lambda x : dX[x])
              
        # one-hot-labelling, zero is [1,0,...,0]
        one_hot_X = torch.zeros(10,9,9)
        one_hot_X.scatter_(0,X.long(),1)

        # one-hot-labelling, zero is [0,...,0] and one-hot labels have length of 9
        one_hot_X = one_hot_X[1:,:,:]
        
        y = self.ys[index]
        y.apply_(lambda x: dX[x+1]-1)

        return one_hot_X, y
    
    def predict_quiz(self, quiz, i, j):
        q = deepcopy(quiz)
        if i > 5:
            q[0:3,:], q[6:,:] = deepcopy(q[6:,:]), deepcopy(q[0:3,:])
        elif i > 2:
            q[0:3,:], q[3:6,:] = deepcopy(q[3:6,:]), deepcopy(q[0:3,:])
        if i%3 != 0:
            q[0,:], q[i%3,:] = deepcopy(q[i%3,:]), deepcopy(q[0,:])

        if j > 5:
            q[:,0:3], q[:,6:] = deepcopy(q[:,6:]), deepcopy(q[:,0:3])
        elif j > 2:
            q[:,0:3], q[:,3:6] = deepcopy(q[:,3:6]), deepcopy(q[:,0:3])
        if j%3 != 0:
            q[:,0], q[:,j%3] = deepcopy(q[:,j%3]), deepcopy(q[:,0])

        q = torch.from_numpy(q)
        new_quizzes = torch.zeros((1, 10, 9, 9))
        poss = q[self.ind[:,0], self.ind[:,1]].type(torch.LongTensor)
        new_quizzes[0, poss, self.ind[:,0], self.ind[:,1]] = 1
        new_quizzes = new_quizzes.cuda()
        out = self(new_quizzes)
        conf, pred = torch.max(out, 1)
        
        return pred, conf

In [0]:
tensor_trainq = torch.from_numpy(train_q).type(torch.FloatTensor).reshape(-1,1,9,9)
tensor_trains = torch.from_numpy(train_s).type(torch.LongTensor) - 1
tensor_valq = torch.from_numpy(val_q).type(torch.FloatTensor).reshape(-1,1,9,9)
tensor_vals = torch.from_numpy(val_s).type(torch.LongTensor) - 1

In [0]:
train_loader = data.DataLoader(dataset=Dataset(tensor_trainq,tensor_trains), batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = data.DataLoader(dataset=Dataset(tensor_valq,tensor_vals), batch_size=batch_size, shuffle=False, num_workers=4)

In [0]:
def fill(quiz, sol, free):
    quiz = deepcopy(quiz)
    sol = sol.reshape(quiz.shape)
    ind = list(zip(*np.where(quiz == 0)))
    if ind[0] == (0,0):
        ind.pop(0)
    else:
        RuntimeError("Keine freie Stelle oben links")
    shuffle(ind)
    while len(ind)-1 > free:
        k = ind.pop()
        quiz[k] = sol[k]+1
    return quiz

In [0]:
def fill_batch(q_batch, s_batch, free):
    q_batch = deepcopy(q_batch)
    for k in range(q_batch.shape[0]):
        q_batch[k] = fill(q_batch[k], s_batch[k], free)
    return q_batch

## Model

In [0]:
class CNN(nn.Module) :
    
    def __init__(self) :
        super(CNN, self).__init__()
        
        convs = [None]*CONV_LAYERS
        in_filter = 9
        for k in range(CONV_LAYERS):
            convs[k] = nn.Sequential(
                nn.Conv2d(in_filter, FILTERS, padding=PADDING, kernel_size=KERNEL),
                nn.ReLU(),
            )
            in_filter = FILTERS
        
        in_size = in_filter * 9 * 9

        fcs = [None]*FC_LAYERS
        for k in range(FC_LAYERS):
            if k == FC_LAYERS-1:
                fcs[k] = nn.Sequential(
                    nn.Linear(in_size, 9),
                    nn.LogSoftmax(dim=1),
                )
            else:
                fcs[k] = nn.Sequential(
                    nn.Linear(in_size, NEURONS),
                    nn.ReLU(),
                )
                in_size = NEURONS
        
        self.conv = nn.Sequential(*convs)
        self.fc = nn.Sequential(*fcs)
        self.drop_out = nn.Dropout(P_DROPOUT)
        
        
    def forward(self, x):
        out = self.conv(x)
        out = out.reshape(out.size(0), -1)
        #out = self.drop_out(out)
        out = self.fc(out)
        return out

In [0]:
model = CNN().cuda()
name = "nn_sudoku_{}_{}_{}_{}_{}".format(CONV_LAYERS, FILTERS, FC_LAYERS, NEURONS, KERNEL)

criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1)

scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)

loss_list = []
acc_list = []
val_acc = [0]
val_pos = [0]

# Training

In [0]:
# Train the model
model.train()
tr = train_loader
va = val_loader
total_step = len(tr)
for epoch in range(num_epochs):
    acc_ep = []
    model.train()
    for i, (quizzes, labels) in enumerate(tr):
        
        quizzes = quizzes.cuda()
        labels = labels.cuda()

        # Run the forward pass
        outputs = model(quizzes)
        loss = criterion(outputs, labels[:,0])
        loss_list.append(loss.item())

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track the accuracy
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels[:,0]).sum().item()
        acc_list.append(correct / total)
        acc_ep.append(correct/total)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{:2}/{}], Loss: {:.4f}, Accuracy: {:4.1f}%, running Accuracy {:5.2f}%'
                  .format(epoch + 1, num_epochs, (i + 1)//100, total_step//100, loss.item(),
                          sum(acc_list[-100:]), sum(acc_ep[-500:])/len(acc_ep[-500:])*100))

    print("\n\n\n")
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in va:
            
            images = images.cuda()
            labels = labels.cuda()
            
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels[:,0]).sum().item()

        print('Validation Accuracy of the model on the {} validation sudokus: {:.2f}%'.format(total, (correct / total) * 100))
        val_acc.append(correct / total)
        val_pos.append(len(acc_list))
        if 100*(val_acc[-1] - val_acc[-2]) < 1: #minor improvement
            print("Learning rate decrease")
            scheduler.step()

    # Save the model and plot
    torch.save(model.state_dict(), name)

    file_metadata = {
      'name': name,
      'mimeType': 'application/octet-stream'
    }
    media = MediaFileUpload(name, 
                            mimetype='application/octet-stream',
                            resumable=True)

    created = drive_service.files().create(body=file_metadata,
                                           media_body=media,
                                           fields='id').execute()
    print('File ID: {}'.format(created.get('id')))
    
    print("\n\n\n")
print("DONE")

In [0]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in val_loader:
        
        images = images.cuda()
        labels = labels.cuda()
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels[:,0]).sum().item()

    print('Validation Accuracy of the model on the {} sudokus: {:.2f} %'.format(total, (correct / total) * 100))

## Plotting

In [0]:
x = list(range(len(acc_list)))

m_acc, b_acc = np.polyfit(x, acc_list, 1)
regr_acc = [m_acc*i+b_acc for i in x]

In [0]:
x = list(range(len(loss_list)))

m_loss, b_loss = np.polyfit(x, loss_list, 1)
regr_loss = [m_loss*i+b_loss for i in x]

In [0]:
fig, ax1 = plt.subplots()
fig.set_dpi(100)
plt.rcParams['axes.facecolor'] = 'white'
plt.grid(alpha=0)

ax2 = ax1.twinx()

ax1.set_xlabel("#steps")
ax1.set_ylabel("accuracy")
ax2.set_ylabel("loss")
ax1.set_ylim([0,1])
ax2.set_ylim([0,max(loss_list)+.3])
ax1.set_xlim([0,max(len(loss_list), len(acc_list))])

ax2.plot(loss_list, "g",label="loss")
ax2.plot(regr_loss, "y",label="loss regression line")

ax1.plot(acc_list, "b",label="accuracy")
ax1.plot(regr_acc, "c",label="accuracy regression line")
ax1.plot(val_pos, val_acc, "ro", label="validation accuracy")

plt.axhline(0, color='black')
plt.axvline(0,color='black')
plt.axvline(max(len(loss_list), len(acc_list)),color='black')

lgd1 = ax1.legend(bbox_to_anchor=(1.1, 1), loc=2, borderaxespad=0.)
lgd2 = ax2.legend(bbox_to_anchor=(1.1, .8), loc=2, borderaxespad=0.)

fig.tight_layout()

plt.savefig("plot_"+name+".png" ,bbox_extra_artists=(lgd1,lgd2), bbox_inches='tight')

## Save Data

In [0]:
file_metadata = {
      'name': "plot_"+name+".png",
      'mimeType': 'image/png'
    }
media = MediaFileUpload("plot_"+name+".png", 
                        mimetype='image/png',
                        resumable=True)

created = drive_service.files().create(body=file_metadata,
                                       media_body=media,
                                       fields='id').execute()
print('File ID: {}'.format(created.get('id')))

# Testing

In [0]:
correct = 0
total = 0
count = 0
for quiz,solved in zip(test2_q, test2_s):
    
    if count == 100 :
        break
    
    quiz = deepcopy(quiz)
    inds = list(zip(*np.where(quiz == 0)))
    
    while len(inds) > 0:
        i, j = inds.pop()
        pred, conf = model.predict_quiz(quiz, i, j)
        if pred+1 == solved[i, j]:
            correct += 1
        total += 1
    
    count += 1
    if count % 1000 == 0:
        print(count, end=" ")
print()
print('Test Accuracy of the model on the {} cells: {:.2f}%'.format(total, (correct / total) * 100))      

In [0]:
accs = []
amn_hints = [[0,0] for _ in range(81)]
compl_solved = 0
count = 0
model.eval()
with torch.no_grad():
    for quiz,solved in zip(test2_q, test2_s):

        if count == 1000 :
            break
        
        
        quiz = deepcopy(quiz)
        inds = list(zip(*np.where(quiz == 0)))
        empty = len(inds)
        accs.append(0)
        correct = True

        while len(inds) > 0:
            maxis = (-np.inf, 0, 0, 0) # confidence, i, j, pred
            for i, j in inds:
                pred, conf = model.predict_quiz(quiz, i, j)
                if conf > maxis[0]:
                    maxis = (conf, i, j, pred)
            conf, i, j, pred = maxis
            if pred+1 == solved[i, j]:
                accs[-1] += 1
                amn_hints[81-len(inds)][0] += 1
            else:
                correct = False
            amn_hints[81-len(inds)][1] += 1
            inds.pop(inds.index((i, j)))
            quiz[i][j] = solved[i][j]
        count += 1
        accs[-1] /= empty
        compl_solved += correct
        if count % 200 == 0:
            print(count, "\n", end="")
        elif count % 10 == 0 :
            print(count, end=" ")

In [0]:
acc_hints = list(enumerate(amn_hints))
for i in range(81):
    if acc_hints[i][1][1] != 0:
        break
acc_hints = acc_hints[i:]
acc_hints = np.array([[i, amn[0]/amn[1]] for i, amn in acc_hints])

In [0]:
print("Completed correct:", compl_solved, compl_solved / count
print("Average accuracy:", np.mean(accs), np.std(accs))
plt.plot(acc_hints[:,0], acc_hints[:,1]*100, color="red")
plt.show()

In [0]:
quiz = np.array([[8, 0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 3, 6, 0, 0, 0, 0, 0],
               [0, 7, 0, 0, 9, 0, 2, 0, 0],
               [0, 5, 0, 0, 0, 7, 0, 0, 0],
               [0, 0, 0, 0, 4, 5, 7, 0, 0],
               [0, 0, 0, 1, 0, 0, 0, 3, 0],
               [0, 0, 1, 0, 0, 0, 0, 6, 8],
               [0, 0, 8, 5, 0, 0, 0, 1, 0],
               [0, 9, 0, 0, 0, 0, 4, 0, 0]])
solved = np.array([[8, 1, 2, 7, 5, 3, 6, 4, 9],
              [9, 4, 3, 6, 8, 2, 1, 7, 5],
              [6, 7, 5, 4, 9, 1, 2, 8, 3],
              [1, 5, 4, 2, 3, 7, 8, 9, 6],
              [3, 6, 9, 8, 4, 5, 7, 2, 1],
              [2, 8, 7, 1, 6, 9, 5, 3, 4],
              [5, 2, 1, 9, 7, 4, 3, 6, 8],
              [4, 3, 8, 5, 2, 6, 9, 1, 7],
              [7, 9, 6, 3, 1, 8, 4, 5, 2]])
inds = list(zip(*np.where(quiz == 0)))
t = len(inds)
c = 0

while len(inds) > 0:
    maxis = (-np.inf, 0, 0, 0) # confidence, i, j, pred
    for i, j in inds:
        pred, conf = model.predict_quiz(quiz, i, j)
        if conf > maxis[0]:
            maxis = (conf, i, j, pred)
    conf, i, j, pred = maxis
    if int(pred+1) == solved[i, j]:
        c += 1
        print(i, j, int(pred)+1, solved[i][j])
    else:
        print("\t", i, j, int(pred)+1, solved[i][j])
    inds.pop(inds.index((i, j)))
    quiz[i][j] = solved[i][j]
print(c/t, c, t)

# Load Model

In [0]:
file_id = '1VZ0p4PoJYq8Adj3Nb9VY74oy0_r7Z_hG'

request = drive_service.files().get_media(fileId=file_id)
downloaded = io.BytesIO()
downloader = MediaIoBaseDownload(downloaded, request)
done = False
while done is False:
    _, done = downloader.next_chunk()

downloaded.seek(0)
with open("state.ckpt", "wb") as file:
    file.write(downloaded.read())
del downloaded, downloader, request

In [0]:
model.load_state_dict(torch.load("state.ckpt"))