In [98]:
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import itertools
import random
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm, tqdm_notebook

import sys
sys.path.append('/Users/andrew/Desktop/sudoku/src/sudoku')

from board import Board
from grid_string import GridString, read_solutions_file
from shuffler import Shuffler
from shuffled_grid import ShuffledGrid
from solutions import Solutions
import utils

In [10]:
# set random seed to 0
np.random.seed(0)
torch.manual_seed(0)
torch.set_default_tensor_type('torch.DoubleTensor')

In [17]:
filename = '/Users/andrew/Desktop/sudoku/data/shuffled_puzzles.txt'
with open(filename) as f:
    lines = f.read().splitlines()
puzzles = {}
for line in lines:
    puzzle, solution = line.split(',')
    puzzles[GridString(puzzle)] = GridString(solution)

In [249]:
def encode_input(grid_string: GridString):
    return torch.tensor(list(grid_string.traverse_grid()))

def encode_output(grid_string: GridString):
    return torch.tensor(list(grid_string.traverse_grid())) - 1

In [238]:
train_n = 10
train_puzzles = list(puzzles.keys())[0:train_n]
train_solutions = [puzzles[p] for p in train_puzzles]

In [250]:
max_digit = train_puzzles[0].max_digit
num_cells = max_digit**2
cell_vec_dim = max_digit + 1
train_x = torch.cat([encode_input(p) for p in train_puzzles]).reshape(train_n, num_cells)
train_y = torch.cat([encode_output(p) for p in train_solutions]).reshape(train_n, num_cells)

In [247]:
class MLP(nn.Module):
    def __init__(self, layer_sizes):
        super(MLP, self).__init__()
        self.layer_sizes = layer_sizes
        
        self.layers = nn.ModuleList()
        
        prev_layer_size = self.layer_sizes[0]
        for size in self.layer_sizes[1:]:
            self.layers.append(nn.Linear(prev_layer_size, size))
            prev_layer_size = size

    def forward(self, X):
        vector = X
        for layer in self.layers:
            vector = layer(vector)
        return vector

class RRN(nn.Module):
    def __init__(self, max_digit, embed_size=16):
        super(RRN, self).__init__()
        self.max_digit = max_digit
        self.embed_size = embed_size
        
        self.output_size = max_digit
        
        self.embed_layer = nn.Embedding(self.max_digit+1, self.embed_size)
        self.useless_layer = nn.Linear(self.embed_size, self.output_size)

    def forward(self, X):
        embedding = self.embed_layer(X)
        output = self.useless_layer(embedding)
        return output

In [248]:
model = RRN(max_digit)
optimizer = optim.Adam(model.parameters())

def closure():
    optimizer.zero_grad()
    prediction = model(train_x)
    prediction = prediction.permute(0,2,1)
    loss = F.cross_entropy(prediction, train_y)
    loss.backward()
    return loss

for i in range(100):
    print(optimizer.step(closure))

tensor(1.5979, grad_fn=<NllLoss2DBackward>)
tensor(1.5876, grad_fn=<NllLoss2DBackward>)
tensor(1.5773, grad_fn=<NllLoss2DBackward>)
tensor(1.5671, grad_fn=<NllLoss2DBackward>)
tensor(1.5569, grad_fn=<NllLoss2DBackward>)
tensor(1.5468, grad_fn=<NllLoss2DBackward>)
tensor(1.5368, grad_fn=<NllLoss2DBackward>)
tensor(1.5268, grad_fn=<NllLoss2DBackward>)
tensor(1.5168, grad_fn=<NllLoss2DBackward>)
tensor(1.5069, grad_fn=<NllLoss2DBackward>)
tensor(1.4971, grad_fn=<NllLoss2DBackward>)
tensor(1.4873, grad_fn=<NllLoss2DBackward>)
tensor(1.4776, grad_fn=<NllLoss2DBackward>)
tensor(1.4679, grad_fn=<NllLoss2DBackward>)
tensor(1.4583, grad_fn=<NllLoss2DBackward>)
tensor(1.4487, grad_fn=<NllLoss2DBackward>)
tensor(1.4392, grad_fn=<NllLoss2DBackward>)
tensor(1.4297, grad_fn=<NllLoss2DBackward>)
tensor(1.4203, grad_fn=<NllLoss2DBackward>)
tensor(1.4110, grad_fn=<NllLoss2DBackward>)
tensor(1.4017, grad_fn=<NllLoss2DBackward>)
tensor(1.3925, grad_fn=<NllLoss2DBackward>)
tensor(1.3833, grad_fn=<NllLoss2