In [None]:
%matplotlib inline

from rubik import *
from rubikdatagen import * 
from rubiktrainer import *
from rubikNN import *

In [None]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable

In [None]:
cuda_available = torch.cuda.is_available()

model = RubiksResNet()
if cuda_available:
    model.cuda()

def NNWrapper(statematrix):
    model.eval()
    in_tensor = np.array([statematrix], dtype=np.float32)
    in_tensor = Variable(torch.from_numpy(in_tensor))
    if cuda_available: in_tensor = in_tensor.cuda()
    outprobs, outval = model(in_tensor)
    outprobs = outprobs.squeeze().cpu() # strip off the fourth dimension
    outprobs = outprobs.data.numpy()
    outval = outval.data.cpu().numpy()
    return outprobs, float(outval)    

In [None]:
L_RATE = 1e-3        # learning rate
L2REG = 5e-4
MAXMOVES = 10
N_ITERS = 50        # number of times policy iteration is run
N_TRAINEPS = 100
N_TESTEPS = 200
N_TRAINRUNS = 2000      # number of MCTS runs per step in one episode
N_TESTRUNS = 1000
N_EPOCHS = 50         # number of training epochs
N_BATCHES = 128       # batch size for neural net training
TRAINCPUCT = 2        # cpuct and temp are low (<1) if you want more exploitation than exploration
TRAINTEMP = 1
TESTCPUCT = 0.1
TESTTEMP = 0.25

In [None]:
train = generate_cubes(1000,5)
test = train[700:]
train = train[:700]
#train = generate_cubes(70,3)
#test = train

# for next stages of training, simply copy this cell but lower the number of extra hints for harder sudoku problems

scores_means = []
num_moves = []
for iteration in range(N_ITERS):
    print("Iteration %i"%(iteration+1))
    examples = RunEpisodes(train, N_TRAINEPS, N_TRAINRUNS, TRAINCPUCT, TRAINTEMP, MAXMOVES, NNWrapper)
    optimizer = optim.Adam(model.parameters(), L_RATE, weight_decay=L2REG)
    criterion1 = nn.KLDivLoss()
    criterion2 = nn.MSELoss()
    n_examples = len(examples)
    random.shuffle(examples)

    model.train()
    for epoch in range(N_EPOCHS):
        losses = []
        states, probs, values = [], [], []
        for index, (state,probvec,value) in enumerate(examples):
            states.append(state)
            probs.append(probvec)
            values.append(value)
            if index != n_examples and (index+1)%N_BATCHES != 0: continue
        
            states, probs, values = np.array(states, dtype=np.float32), np.array(probs, dtype=np.float32), np.array(values, dtype=np.float32)
            states, probs, values = torch.from_numpy(states), torch.from_numpy(probs), torch.from_numpy(values)
            if cuda_available:
                states, probs, values = states.cuda(), probs.cuda(), values.cuda()       
            states, target_probs, target_values = Variable(states), Variable(probs), Variable(values)
        
            optimizer.zero_grad()
            output_probs, output_values = model(states)
            loss1 = criterion1(output_probs, target_probs)
            loss2 = criterion2(output_values, target_values)
            total_loss = loss1 + loss2
            total_loss.backward()
            optimizer.step()
            losses.append(total_loss.data[0])
            
            states, probs, values = [], [], []
        
        print("Epoch %i. Mean loss = %.5f"%(epoch+1, np.mean(losses)))

    scores, num_moves = EvaluateSolver(test, N_TESTEPS, N_TESTRUNS, TESTCPUCT, TESTTEMP, MAXMOVES, NNWrapper)
    print("Finished evaluating. Number of cube states solved = %i, Average moves taken = %.2f\n\n"%(np.sum(scores),np.average(num_moves)))
    scores_means.append(np.average(scores))

In [None]:
x = np.arange(N_ITERS)
y = np.array(scores_means)
yerr = np.array(scores_stdevs)
plt.plot(x,y, "b-")
plt.errorbar(x,y,yerr=yerr, fmt="k.")
plt.xlabel("Iterations")
plt.ylabel("Score")
plt.show()