In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import random
import time
import csv
from sudoku import *
from trainer import *
from datagen import * 
from neuralnets import *

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable

In [5]:
L_RATE = 1e-3        # learning rate
L2REG = 5e-4
N_ITERS = 200        # number of times policy iteration is run
N_TRAINEPS = 150
N_TESTEPS = 50
N_TRAINRUNS = 50      # number of MCTS runs per step in one episode
N_TESTRUNS = 50
N_EPOCHS = 50         # number of training epochs
N_BATCHES = 128       # batch size for neural net training
TRAINCPUCT = 2        # cpuct and temp are low (<1) if you want more exploitation than exploration
TRAINTEMP = 2
TESTCPUCT = 0.1
TESTTEMP = 0.1

In [6]:
cuda_available = torch.cuda.is_available()

model = SudokuNN()
if cuda_available:
    model.cuda()

    
def NNWrapper(statematrix):
    model.eval()
    state = np.array([[statematrix[0]]], dtype=np.float32)
    state = Variable(torch.from_numpy(state))
    mask = np.array([[statematrix[1]]], dtype=np.float32)
    mask = Variable(torch.from_numpy(mask))
    if cuda_available: state, mask = state.cuda(), mask.cuda()
    outprobs, outval = model(state, mask)
    outprobs = outprobs.squeeze().cpu()
    outprobs = outprobs.data.numpy()
    outval = outval.data.cpu().numpy()
    return outprobs, float(outval)


In [None]:
"""
Documentation of RunEpisodesSerial

    Run episodes with one CPU core. Each episode picks a random sudoku problem from the dataset.

    Args:
    dataset - list of (question, answer) tuples of 81-length strings
    n_eps - number of independent episodes to run
    n_runs - number of MCTS runs per step/turn in an episode
    cpuct&temp - exploration hyperparameters. <1 to exploit
    no_replacements - False if the game allows the player to replace previously filled cells
    scoring_scheme - 1, 2 or 3. see documentation for explanation of the schemes
    maxmoves - maximum moves the player can take before game stops. this is the resignation threshold

    Returns
    list of training examples, each of the form (game state, probability vector, game final score)
    
"""

In [None]:
problempool = extract("sudoku17.txt")
train = generate(problempool, 54, 1000)
test = generate(problempool, 54, 1000)
# the second argument of generate determines the number of extra hints added to the problems (sudoku17.txt 
# contains 17-hint sudoku problems). we start with 54, so the sudoku solver only has to fill in 10 cells

# for next stages of training, simply copy this cell but lower the number of extra hints for harder sudoku problems

scores_means = []
scores_stdevs = []
for iteration in range(N_ITERS):
    optimizer = optim.Adam(model.parameters(), L_RATE, weight_decay=L2REG)
    criterion1 = nn.KLDivLoss()
    criterion2 = nn.MSELoss()
    examples = RunEpisodesSerial(train, N_TRAINEPS, N_TRAINRUNS, CPUCT, TEMP, NNWrapper, maxmoves=30)
    n_examples = len(examples)
    
    model.train()
    for epoch in range(N_EPOCHS):
        random.shuffle(examples)
        losses = []
        states, masks, probs, values = [], [], [], []
        for index, (state,probvec,value) in enumerate(examples):
            states.append([state[0]])
            masks.append([state[1]])
            probs.append(probvec)
            values.append(value)
            if index != n_examples and (index+1)%N_BATCHES != 0: continue
        
            states, masks, probs, values = np.array(states, dtype=np.float32), np.array(masks, dtype=np.float32), np.array(probs, dtype=np.float32), np.array(values, dtype=np.float32)
            states, masks, probs, values = torch.from_numpy(states), torch.from_numpy(masks), torch.from_numpy(probs), torch.from_numpy(values)
            if cuda_available:
                states, masks, probs, values = states.cuda(), masks.cuda(), probs.cuda(), values.cuda()       
            states, masks, target_probs, target_values = Variable(states), Variable(masks), Variable(probs), Variable(values)
        
            optimizer.zero_grad()
            output_probs, output_values = model(states, masks)
            loss1 = criterion1(output_probs, target_probs)
            loss2 = criterion2(output_values, target_values)
            total_loss = loss1 + loss2
            total_loss.backward()
            optimizer.step()
            losses.append(total_loss.data[0])
            
            states, masks, probs, values = [], [], [], []
        
        print("Epoch %i. Mean loss = %.4f"%(epoch+1, np.mean(losses)))

    scores = EvaluateSolverSerial(test, N_TESTEPS, N_TESTRUNS, 0.1, 0.1, NNWrapper, maxmoves=30)
    scores_means.append(np.average(scores))
    scores_stdevs.append(np.std(scores))
    print("Iteration %i. Test accuracy: %.5f\n\n"%(iteration+1, scores_means[-1]))

Running episodes in serial.....................................................................................................Done!
Epoch 1. Mean loss = 0.0534
Epoch 2. Mean loss = 0.0155
Epoch 3. Mean loss = 0.0086
Epoch 4. Mean loss = 0.0058
Epoch 5. Mean loss = 0.0046
Epoch 6. Mean loss = 0.0042
Epoch 7. Mean loss = 0.0040
Epoch 8. Mean loss = 0.0040
Epoch 9. Mean loss = 0.0038
Epoch 10. Mean loss = 0.0037
Epoch 11. Mean loss = 0.0038
Epoch 12. Mean loss = 0.0038
Epoch 13. Mean loss = 0.0036
Epoch 14. Mean loss = 0.0037
Epoch 15. Mean loss = 0.0036
Epoch 16. Mean loss = 0.0037
Epoch 17. Mean loss = 0.0037
Epoch 18. Mean loss = 0.0035
Epoch 19. Mean loss = 0.0036
Epoch 20. Mean loss = 0.0037
Epoch 21. Mean loss = 0.0036
Epoch 22. Mean loss = 0.0035
Epoch 23. Mean loss = 0.0036
Epoch 24. Mean loss = 0.0035
Epoch 25. Mean loss = 0.0035
Epoch 26. Mean loss = 0.0035
Epoch 27. Mean loss = 0.0035
Epoch 28. Mean loss = 0.0034
Epoch 29. Mean loss = 0.0034
Epoch 30. Mean loss = 0.0036
Epoch 

Epoch 46. Mean loss = 0.0033
Epoch 47. Mean loss = 0.0033
Epoch 48. Mean loss = 0.0034
Epoch 49. Mean loss = 0.0033
Epoch 50. Mean loss = 0.0033
Running episodes in serial...................................................Done!
Iteration 5. Test accuracy: 0.11200


Running episodes in serial.....................................................................................................Done!
Epoch 1. Mean loss = 0.0067
Epoch 2. Mean loss = 0.0037
Epoch 3. Mean loss = 0.0034
Epoch 4. Mean loss = 0.0034
Epoch 5. Mean loss = 0.0034
Epoch 6. Mean loss = 0.0033
Epoch 7. Mean loss = 0.0033
Epoch 8. Mean loss = 0.0033
Epoch 9. Mean loss = 0.0033
Epoch 10. Mean loss = 0.0033
Epoch 11. Mean loss = 0.0033
Epoch 12. Mean loss = 0.0032
Epoch 13. Mean loss = 0.0032
Epoch 14. Mean loss = 0.0032
Epoch 15. Mean loss = 0.0033
Epoch 16. Mean loss = 0.0032
Epoch 17. Mean loss = 0.0033
Epoch 18. Mean loss = 0.0032
Epoch 19. Mean loss = 0.0032
Epoch 20. Mean loss = 0.0033
Epoch 21. Mean loss = 0.0032
E

Epoch 37. Mean loss = 0.0033
Epoch 38. Mean loss = 0.0033
Epoch 39. Mean loss = 0.0033
Epoch 40. Mean loss = 0.0032
Epoch 41. Mean loss = 0.0033
Epoch 42. Mean loss = 0.0033
Epoch 43. Mean loss = 0.0033
Epoch 44. Mean loss = 0.0033
Epoch 45. Mean loss = 0.0033
Epoch 46. Mean loss = 0.0032
Epoch 47. Mean loss = 0.0032
Epoch 48. Mean loss = 0.0032
Epoch 49. Mean loss = 0.0033
Epoch 50. Mean loss = 0.0032
Running episodes in serial...................................................Done!
Iteration 10. Test accuracy: 0.11600


Running episodes in serial.....................................................................................................Done!
Epoch 1. Mean loss = 0.0076
Epoch 2. Mean loss = 0.0044
Epoch 3. Mean loss = 0.0038
Epoch 4. Mean loss = 0.0036
Epoch 5. Mean loss = 0.0036
Epoch 6. Mean loss = 0.0036
Epoch 7. Mean loss = 0.0035
Epoch 8. Mean loss = 0.0035
Epoch 9. Mean loss = 0.0035
Epoch 10. Mean loss = 0.0035
Epoch 11. Mean loss = 0.0034
Epoch 12. Mean loss = 0.0034


In [None]:
x = np.arange(N_ITERS)
x = np.arange(30)
y = np.array(scores_means)
yerr = np.array(scores_stdevs)
plt.plot(x,y, "b-")
plt.errorbar(x,y,yerr=yerr, fmt="k.")
plt.xlabel("Iterations")
plt.ylabel("Score")
plt.show()