In [1]:
!pip install --upgrade --quiet git+https://github.com/dtonderski/DeepSudoku

[0m

In [2]:
!pip show deepsudoku

Name: deepsudoku
Version: 0.8.4
Summary: Solving Sudokus using a Neural Network assisted Monte-Carlo approach.
Home-page: https://github.com/dtonderski/DeepSudoku
Author: davton
Author-email: dtonderski@gmail.com
License: GNU GPLv3
Location: /usr/local/lib/python3.9/dist-packages
Requires: einops, numpy, py-sudoku, torch
Required-by: 


### Data
Starting from solved sudoku, first split the data into train/val/test, then load it into memory

In [3]:
import deepsudoku as ds
from deepsudoku.utils import data_utils

In [4]:
train_sudokus_raw, val_sudokus_raw, _ = data_utils.load_data()

In [5]:
val_sudokus = data_utils.make_moves(val_sudokus_raw, n_moves_distribution=data_utils.uniform_possible_moves_distribution)

### Network
Load and configure the neural network

In [6]:
from deepsudoku.dsnn import se_resnet, loss
import torch

In [7]:
network = se_resnet.SeResNet(blocks = 10, filters = 128, se_channels = 32, dropout = 0.2).cuda()
optimizer = torch.optim.Adam(network.parameters())
loss_fn = loss.loss

### Training
The training loop consists of first 

In [8]:
from deepsudoku.dsnn.training import generate_training_data
from deepsudoku.dsnn.evaluation import evaluate, get_averages, print_evaluation, categorical_accuracy, binary_accuracy
from deepsudoku.utils import network_utils
from deepsudoku.montecarlo.simulation import get_n_simulations_function

import os
import random
from datetime import datetime
import pickle as pkl

In [9]:
n_simulations_function = get_n_simulations_function(1, 64, use_builtin_difficulty=False)
min_data_size = 16384
sudokus_to_evaluate = 128
generate_and_evaluate_every_n_epochs = 10

In [10]:
previous_data = []
previous_simulations_to_save = 15

previous_data_path = '../models/initial/previous_data.pkl'

if os.path.exists(previous_data_path):
    with open(previous_data_path, 'rb') as f:
        previous_data = pkl.load(f)
    current_train_sudokus = [sudoku for simulation_sudokus in previous_data for sudoku in simulation_sudokus]
print(f"{len(previous_data)=}")

len(previous_data)=12


In [11]:
batch_size = 4096
epochs = 1000

In [12]:
best_model_path = f'../models/initial/best.pth'
if os.path.exists(best_model_path):
    print("Loading model")
    checkpoint = torch.load(best_model_path)
    network.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    losses = checkpoint['losses']
    cat_accs = checkpoint['cat_accs']
    bin_accs = checkpoint['bin_accs']
    min_percentages = checkpoint['min_percentages']
else:
    losses = []
    cat_accs = []
    bin_accs = []
    min_percentages = []
    
starting_epoch = len(losses)
best_percentage = 0 if len(min_percentages) == 0 else min_percentages[-1]
print(f"{starting_epoch=}")

Loading model


In [13]:
import time

In [16]:
train = True

if train:
    for epoch in range(starting_epoch, epochs):
        if epoch % generate_and_evaluate_every_n_epochs == 0:
            network.eval()
            moves_before_failure_dict, percentage_completed_dict = evaluate(val_sudokus, network, n_simulations_function, 
                                                                            n_played_sudokus = sudokus_to_evaluate)

            avg_moves_dict, avg_percentage_dict = get_averages(moves_before_failure_dict, percentage_completed_dict)

            print_evaluation(avg_moves_dict, avg_percentage_dict)
            
            current_min_average_percentage_before_failure = min(avg_percentage_dict.values())
            min_percentages.append(current_min_average_percentage_before_failure)

            train_sudokus = data_utils.make_moves(train_sudokus_raw, n_moves_distribution=data_utils.difficulty_uniform_combo_distribution)
            generated_train_sudokus = generate_training_data(train_sudokus, network, n_simulations_function, verbose = 1, min_data_size = min_data_size)
            
            previous_data.append(generated_train_sudokus)
            if len(previous_data) > previous_simulations_to_save:
                del previous_data[0]
            
            with open(previous_data_path, 'wb') as f:
                pkl.dump(previous_data, f)
            
            current_train_sudokus = [sudoku for simulation_sudokus in previous_data for sudoku in simulation_sudokus]
            current_fraction = sum([x[2] for x in current_train_sudokus])/len([x[2] for x in current_train_sudokus])
            print(f"{len(current_train_sudokus)=}, {current_fraction=}.")
            
            if (current_fraction > 0.5):
                print("========================================================================")
                print("Current fraction exceeded 0.5, stop training!")
                print("========================================================================")
                break

            
            network.train()
            
            if current_min_average_percentage_before_failure > best_percentage:
                print(f"Min percentage increased from {best_percentage} to "
                      f"{current_min_average_percentage_before_failure}! Saving network")
                best_percentage = current_min_average_percentage_before_failure
                torch.save({
                    'model_state_dict': network.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'cat_accs': cat_accs,
                    'bin_accs': bin_accs,
                    'losses': losses,
                    'min_percentages': min_percentages
                    }, f'{best_model_path}')
            
        random.shuffle(current_train_sudokus)
        batch_losses, batch_cat_accs, batch_cat_accs_weights, batch_bin_accs = [], [], [], []
        
        for i in range(0, len(current_train_sudokus), batch_size):
            batch_sudokus = current_train_sudokus[i:i+batch_size]

            x_np, y_np = data_utils.generate_numpy_batch(batch_sudokus, augment = True)
            x, y = network_utils.numpy_batch_to_pytorch(x_np, y_np, 'cuda')
            y_pred = network(x)
            
            binary_cross_entropy_weights = loss.get_binary_cross_entropy_weights(y[1])

            batch_p_loss, batch_v_loss = loss_fn(x, y_pred, y, binary_cross_entropy_weights=binary_cross_entropy_weights)    
            batch_loss = batch_v_loss + batch_p_loss

            batch_cat_acc = categorical_accuracy(x, y, y_pred)
            
            # Weight is number of valid sudokus
            batch_cat_accs_weights.append(y[1].sum())
            
            batch_bin_acc = binary_accuracy(y, y_pred)

            batch_losses.append(batch_loss.item())
            batch_cat_accs.append(batch_cat_acc)
            batch_bin_accs.append(batch_bin_acc)

            optimizer.zero_grad()
            batch_loss.backward()
            torch.nn.utils.clip_grad_norm_(network.parameters(), 1)
            optimizer.step()

            print(f"Epoch {epoch}, batch {min(i+batch_size, len(current_train_sudokus))}/{len(current_train_sudokus)}," 
                  f"{batch_p_loss.item()=:.4f}, {batch_v_loss.item()=:.4f}, {batch_cat_acc=:.4f}, {batch_bin_acc=:.4f}", end = "\r")

        losses.append(sum(batch_losses)/len(batch_losses))
        batch_cat_accs_weights = [x/sum(batch_cat_accs_weights) for x in batch_cat_accs_weights]
        # Weighted average over cat_accs
        cat_accs.append(sum([x*y for x,y in zip(batch_cat_accs, batch_cat_accs_weights)]))
        bin_accs.append(sum(batch_bin_accs)/len(batch_bin_accs))

        print(f'Epoch {epoch}, loss = {losses[-1]:.4f}, cat_acc = {cat_accs[-1]:.4f}, bin_acc = {bin_accs[-1]:.4f}, time = {datetime.now()}.                                       ')

1/128, time = 2022-12-13 06:29:20.146159
17/128, time = 2022-12-13 06:29:28.849171
33/128, time = 2022-12-13 06:29:38.211088
49/128, time = 2022-12-13 06:29:54.252297
65/128, time = 2022-12-13 06:30:04.670692
81/128, time = 2022-12-13 06:30:20.131395
97/128, time = 2022-12-13 06:30:24.223491
113/128, time = 2022-12-13 06:30:34.054361
0 to 9 zeros: average moves before ending: 5.9, avg percentage completed: 100.0
10 to 19 zeros: average moves before ending: 15.0, avg percentage completed: 100.0
20 to 29 zeros: average moves before ending: 24.3, avg percentage completed: 99.8
30 to 39 zeros: average moves before ending: 35.3, avg percentage completed: 100.0
40 to 49 zeros: average moves before ending: 44.2, avg percentage completed: 100.0
50 to 59 zeros: average moves before ending: 45.1, avg percentage completed: 84.4
60 to 69 zeros: average moves before ending: 26.9, avg percentage completed: 43.7
Sampled 0/16384 sudokus, time = 2022-12-13 06:30:43.556594
Sampled 2055/16384 sudokus, ti

KeyboardInterrupt: 