# Setup (colab only)

In [None]:
!git clone https://github.com/chris838/alpha-zero-general.git

In [None]:
%cd 'alpha-zero-general'

In [None]:
!pip install -r docker/requirements.txt

# Train AlphaZero

In [1]:
import logging
import coloredlogs
from Coach import Coach
from utils import dotdict
from santorini.keras.NNet import NNetWrapper
from santorini.SantoriniGame import SantoriniGame

In [2]:
import Arena
from MCTS import MCTS

from santorini.SantoriniPlayers import (
    RandomPlayer,
    HumanSantoriniPlayer,
    GreedySantoriniPlayer,
)

import numpy as np
from utils import *

In [3]:
log = logging.getLogger(__name__)
coloredlogs.install(level='INFO')  # Change this to DEBUG to see more info.

In [4]:
args = dotdict({
    'numIters': 1000,
    'numEps': 100,              # Number of complete self-play games to simulate during a new iteration.
    'tempThreshold': 15,        #
    'updateThreshold': 0.6,     # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'maxlenOfQueue': 200000,    # Number of game examples to train the neural networks.
    'numMCTSSims': 25,          # Number of games moves for MCTS to simulate.
    'arenaCompare': 40,         # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'numItersForTrainExamplesHistory': 20,
})

In [5]:
# If you have a pre-trained model, you can load it here.
import os
if os.path.exists(os.path.join('pretrained_models', 'santorini', 'keras', '5x5', 'best.pth.tar.index')):
  print ("Using best pre-existing model")
  args['load_model'] = True
  args['load_folder_file'] = ('pretrained_models/santorini/keras/5x5','best.pth.tar')
else:
  print ("Not using best pre-existing model")

Not using best pre-existing model


In [7]:
game = SantoriniGame(5)

In [8]:
nnet = NNetWrapper(game)

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-07-16 18:16:38.684521: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-16 18:16:38.684703: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [9]:
if args.load_model:
    print('Loading checkpoint "{}/{}"...'.format(args.load_folder_file[0], args.load_folder_file[1]))
    nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
else:
    print('Not loading a checkpoint.')

Not loading a checkpoint.


In [10]:
# Set very low iterations to let this notebook run in its entirety.

# In reality, training a model, even as simple as the one for Dots and Boxes, can take several hours or days.
args['numIters'] = 1000
args['numEps'] = 100
args['arenaCompare'] = 40

In [11]:
coach = Coach(game, nnet, args)

In [12]:
%time coach.learn()

2022-07-16 18:16:42 chris-pro.local Coach[45616] INFO Starting Iter #1 ...
Self Play:   0%|          | 0/100 [00:00<?, ?it/s]2022-07-16 18:16:42.423996: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-16 18:16:42.489868: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
Self Play: 100%|██████████| 100/100 [38:57<00:00, 23.38s/it]


Checkpoint Directory exists! 
Epoch 1/10


2022-07-16 18:55:45.204651: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2022-07-16 18:58:21 chris-pro.local Coach[45616] INFO PITTING AGAINST PREVIOUS VERSION
Arena.playGames (1):   0%|          | 0/20 [00:00<?, ?it/s]2022-07-16 18:58:21.185917: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
Arena.playGames (1): 100%|██████████| 20/20 [08:35<00:00, 25.79s/it]
Arena.playGames (2): 100%|██████████| 20/20 [08:36<00:00, 25.81s/it]
2022-07-16 19:15:33 chris-pro.local Coach[45616] INFO NEW/PREV WINS : 17 / 23 ; DRAWS : 0
2022-07-16 19:15:33 chris-pro.local Coach[45616] INFO REJECTING NEW MODEL
2022-07-16 19:15:33 chris-pro.local Coach[45616] INFO Starting Iter #2 ...
Self Play: 100%|██████████| 100/100 [38:39<00:00, 23.20s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2022-07-16 19:59:30 chris-pro.local Coach[45616] INFO PITTING AGAINST PREVIOUS VERSION
Arena.playGames (1): 100%|██████████| 20/20 [07:10<00:00, 21.50s/it]
Arena.playGames (2): 100%|██████████| 20/20 [07:41<00:00, 23.09s/it]
2022-07-16 20:14:22 chris-pro.local Coach[45616] INFO NEW/PREV WINS : 27 / 13 ; DRAWS : 0
2022-07-16 20:14:22 chris-pro.local Coach[45616] INFO ACCEPTING NEW MODEL
2022-07-16 20:14:22 chris-pro.local Coach[45616] INFO Starting Iter #3 ...


Checkpoint Directory exists! 
Checkpoint Directory exists! 


Self Play: 100%|██████████| 100/100 [31:56<00:00, 19.16s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2022-07-16 20:53:48 chris-pro.local Coach[45616] INFO PITTING AGAINST PREVIOUS VERSION
Arena.playGames (1):   0%|          | 0/20 [00:00<?, ?it/s]2022-07-16 20:54:08 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1):  10%|█         | 2/20 [00:39<05:44, 19.12s/it]2022-07-16 20:54:53 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1):  35%|███▌      | 7/20 [02:39<05:16, 24.32s/it]2022-07-16 20:56:39 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1):  80%|████████  | 16/20 [05:44<01:14, 18.51s/it]2022-07-16 20:59:47 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1):  90%|█████████ | 18/20 [06:25<00:39, 19.54s/it]2022-07-16 21:00:32 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1): 100%|██████████| 20/20 [07:03<00:00, 21.19s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2022-07-16 21:50:41 chris-pro.local Coach[45616] INFO PITTING AGAINST PREVIOUS VERSION
Arena.playGames (1):  25%|██▌       | 5/20 [01:45<05:38, 22.56s/it]2022-07-16 21:52:40 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1):  65%|██████▌   | 13/20 [04:21<02:29, 21.33s/it]2022-07-16 21:55:15 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1):  95%|█████████▌| 19/20 [06:21<00:20, 20.43s/it]2022-07-16 21:57:23 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (1): 100%|██████████| 20/20 [06:42<00:00, 20.14s/it]
Arena.playGames (2):   5%|▌         | 1/20 [00:23<07:24, 23.40s/it]2022-07-16 21:58:05 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a workaround.
Arena.playGames (2):  15%|█▌        | 3/20 [01:17<07:25, 26.23s/it]2022-07-16 21:59:05 chris-pro.local MCTS[45616] ERROR All valid moves were masked, doing a work

Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

In [None]:
%matplotlib widget

game = SantoriniGame(5)

random_player = RandomPlayer(game).play
greedy_player = GreedySantoriniPlayer(game).play

args = dotdict({'numMCTSSims': 25, 'cpuct': 1.0})
mcts = MCTS(game, nnet, args)
alphago_player = lambda x: np.argmax(mcts.getActionProb(x, temp=0))

arena = Arena.Arena(alphago_player, greedy_player, game, display=game.display_3d)

%time oneWon, twoWon, draws = arena.playGames(5, verbose=False)
print("\AlphaGo won {} games, Greedy Player won {} games".format(oneWon, twoWon))

In [None]:
# Checkpoints and best model (if found) will be saved in this folder
%ls temp