In [1]:
import tensorflow as tf
import numpy as np
import gym
from go_ai import data, metrics, policies
from go_ai.models import value_model
import matplotlib.pyplot as plt
import shutil
import multiprocessing as mp

# Hyperparameters

In [2]:
BOARD_SIZE = 9

In [25]:
ITERATIONS = 256
EPISODES_PER_ITERATION = 128
NUM_EVAL_GAMES = 32

In [4]:
EPISODES_DIR = './data/'

In [5]:
BATCH_SIZE = 32
LEARNING_RATE = 2e-3

In [6]:
WEIGHTS_DIR = 'model_weights/'
CHECKPOINT_PATH = WEIGHTS_DIR + 'checkpoint_{}x{}.h5'.format(BOARD_SIZE, BOARD_SIZE)
TMP_WEIGHTS_PATH = WEIGHTS_DIR + 'tmp.h5'
LOAD_SAVED_MODELS = False

In [7]:
NUM_WORKERS = mp.cpu_count()

# Go Environment
Train on a small board for fast training and efficient debugging

In [8]:
go_env = gym.make('gym_go:go-v0', size=BOARD_SIZE)

# Metrics and Tensorboard

In [9]:
DEMO_TRAJECTORY_PATH = 'logs/a_trajectory.png'

Metrics

In [10]:
tb_metrics = {}
for metric_key in ['val_loss', 'move_loss']:
    tb_metrics[metric_key] = tf.keras.metrics.Mean('{}'.format(metric_key), 
                                                   dtype=tf.float32)
tb_metrics['pred_win_acc'] = tf.keras.metrics.Accuracy()

Tensorboard

# Machine Learning Models

In [11]:
val_net = value_model.make_val_net(BOARD_SIZE)

In [12]:
_ = tf.keras.utils.plot_model(val_net, to_file='logs/model.png')

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [13]:
val_net.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 9, 9, 128)         7040      
_________________________________________________________________
batch_normalization (BatchNo (None, 9, 9, 128)         512       
_________________________________________________________________
re_lu (ReLU)                 (None, 9, 9, 128)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 128)         147584    
_________________________________________________________________
batch_normalization_1 (Batch (None, 9, 9, 128)         512       
_________________________________________________________________
re_lu_1 (ReLU)               (None, 9, 9, 128)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          7

In [14]:
if LOAD_SAVED_MODELS:
    val_net.load_weights(CHECKPOINT_PATH)
    print("Starting from checkpoint")
else:
    val_net.save_weights(CHECKPOINT_PATH)
    print("Initialized checkpoint and temp")
    
# Sync temp with checkpoint
val_net.save_weights(TMP_WEIGHTS_PATH)

Initialized checkpoint and temp


# Policies

In [15]:
temp_policy_args = {
    'mode': 'values',
    'board_size': BOARD_SIZE,
    'model_path': TMP_WEIGHTS_PATH,
}

In [16]:
checkpoint_policy_args = {
    'mode': 'values',
    'board_size': BOARD_SIZE,
    'model_path': CHECKPOINT_PATH,
}

In [17]:
random_policy_args = {
    'mode': 'random',
    'board_size': BOARD_SIZE,
}

In [18]:
greedy_policy_args = {
    'mode': 'greedy',
    'board_size': BOARD_SIZE,
}

# Demo and Time Games

Symmetries

In [19]:
%%time
go_env.reset()
action = (1, 2)
next_state, _, _, _ = go_env.step(action)
metrics.plot_symmetries(next_state, 'logs/symmetries.jpg')

CPU times: user 125 ms, sys: 6.69 ms, total: 132 ms
Wall time: 131 ms


With replay memory

In [21]:
%%time
data.make_episodes(temp_policy_args, temp_policy_args, 1, num_workers=1, 
                   outdir=EPISODES_DIR)

Episode worker: 1it [00:17, 17.57s/it]
values vs. values: 100%|██████████| 1/1 [00:00<00:00, 984.58it/s, 100.0% WIN]

CPU times: user 21.7 s, sys: 2.43 s, total: 24.1 s
Wall time: 17.8 s





1.0

# Train

In [26]:
for iteration in range(ITERATIONS):
    # Optimization
    
    # Make and write out the episode data
    data.make_episodes(temp_policy_args, temp_policy_args, EPISODES_PER_ITERATION, 
                       num_workers=NUM_WORKERS, outdir=EPISODES_DIR)
    # Read in the episode data
    np_data = data.episodes_from_dir(EPISODES_DIR)
    batched_np_data = [np.array_split(datum, len(np_data[0]) // BATCH_SIZE) for datum in np_data]
    batched_mem = list(zip(*batched_np_data))

    # Optimize
    value_model.optimize_val_net(temp_policy_args, batched_mem, LEARNING_RATE, tb_metrics)
    # Resets the metrics
    metrics.reset_metrics(tb_metrics)
    
    # Evaluate against checkpoint model and other baselines
    rand_win_rate = data.make_episodes(temp_policy_args, random_policy_args, 
                                       NUM_EVAL_GAMES, num_workers=NUM_WORKERS)
    opp_win_rate = data.make_episodes(temp_policy_args, checkpoint_policy_args, 
                                      NUM_EVAL_GAMES, num_workers=NUM_WORKERS)

    stats = f"{100*opp_win_rate:.1f}%O, {100*rand_win_rate:.1f}%R"

    # If it's better than the checkpoint, update
    if opp_win_rate > 0.6:
        shutil.copy(TMP_WEIGHTS_PATH, CHECKPOINT_PATH)
        print(f"{stats} Accepted new model")
        
        greed_win_rate = data.make_episodes(temp_policy_args, greedy_policy_args, 
                                        NUM_EVAL_GAMES, num_workers=NUM_WORKERS)

        # Plot samples of states and response heatmaps
        fig = metrics.gen_traj_fig(go_env, temp_policy_args)
        fig.savefig(DEMO_TRAJECTORY_PATH)
        plt.close()

    elif opp_win_rate >= 0.5:
        print(f"{stats} Continuing to train current weights")

    else:
        shutil.copy(CHECKPOINT_PATH, TMP_WEIGHTS_PATH)
        print(f"{stats} Rejected new model")

values vs. values: 100%|██████████| 128/128 [15:14<00:00,  7.14s/it, 46.9% WIN]
Updating: 100%|██████████| 500/500 [00:34<00:00, 14.50it/s, 64.3% ACC, 0.941VL]
values vs. random: 100%|██████████| 32/32 [01:42<00:00,  3.20s/it, 84.4% WIN]
values vs. values: 100%|██████████| 32/32 [04:52<00:00,  9.15s/it, 53.1% WIN]


53.1%O, 84.4%R Continuing to train current weights


values vs. values: 100%|██████████| 128/128 [20:20<00:00,  9.54s/it, 55.9% WIN] 
Updating: 100%|██████████| 579/579 [00:40<00:00, 14.20it/s, 70.1% ACC, 0.710VL]
values vs. random: 100%|██████████| 32/32 [02:00<00:00,  3.75s/it, 96.9% WIN]
values vs. values: 100%|██████████| 32/32 [04:56<00:00,  9.26s/it, 75.0% WIN]


75.0%O, 96.9%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [04:28<00:00,  8.39s/it, 12.5% WIN]
values vs. values: 100%|██████████| 128/128 [18:32<00:00,  8.69s/it, 51.2% WIN] 
Updating: 100%|██████████| 458/458 [00:32<00:00, 14.15it/s, 79.5% ACC, 0.454VL]
values vs. random: 100%|██████████| 32/32 [02:04<00:00,  3.89s/it, 96.9% WIN] 
values vs. values: 100%|██████████| 32/32 [04:40<00:00,  8.77s/it, 59.4% WIN]


59.4%O, 96.9%R Continuing to train current weights


values vs. values: 100%|██████████| 128/128 [13:23<00:00,  6.28s/it, 53.9% WIN] 
Updating: 100%|██████████| 334/334 [00:23<00:00, 14.10it/s, 65.2% ACC, 0.831VL]
values vs. random: 100%|██████████| 32/32 [01:44<00:00,  3.26s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [03:52<00:00,  7.26s/it, 100.0% WIN]


100.0%O, 100.0%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [03:58<00:00,  7.45s/it, 93.8% WIN]
values vs. values: 100%|██████████| 128/128 [22:12<00:00, 10.41s/it, 47.7% WIN]
Updating: 100%|██████████| 517/517 [00:36<00:00, 14.28it/s, 81.0% ACC, 0.493VL]
values vs. random: 100%|██████████| 32/32 [01:39<00:00,  3.11s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:50<00:00,  9.08s/it, 50.0% WIN]


50.0%O, 100.0%R Continuing to train current weights


values vs. values: 100%|██████████| 128/128 [11:42<00:00,  5.49s/it, 54.3% WIN]
Updating: 100%|██████████| 260/260 [00:18<00:00, 14.24it/s, 69.3% ACC, 0.748VL]
values vs. random: 100%|██████████| 32/32 [01:42<00:00,  3.21s/it, 81.2% WIN]
values vs. values: 100%|██████████| 32/32 [02:55<00:00,  5.50s/it, 14.1% WIN]


14.1%O, 81.2%R Rejected new model


values vs. values: 100%|██████████| 128/128 [21:12<00:00,  9.94s/it, 50.8% WIN]
Updating: 100%|██████████| 493/493 [00:34<00:00, 14.09it/s, 81.9% ACC, 0.464VL]
values vs. random: 100%|██████████| 32/32 [01:47<00:00,  3.35s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [05:48<00:00, 10.89s/it, 90.6% WIN]


90.6%O, 100.0%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [04:16<00:00,  8.00s/it, 96.9% WIN]
values vs. values: 100%|██████████| 128/128 [21:11<00:00,  9.93s/it, 44.5% WIN]
Updating: 100%|██████████| 552/552 [00:38<00:00, 14.36it/s, 81.8% ACC, 0.468VL]
values vs. random: 100%|██████████| 32/32 [01:50<00:00,  3.46s/it, 96.9% WIN] 
values vs. values: 100%|██████████| 32/32 [03:29<00:00,  6.56s/it, 45.3% WIN]


45.3%O, 96.9%R Rejected new model


values vs. values: 100%|██████████| 128/128 [21:07<00:00,  9.90s/it, 48.4% WIN] 
Updating: 100%|██████████| 556/556 [00:39<00:00, 14.04it/s, 82.2% ACC, 0.450VL]
values vs. random: 100%|██████████| 32/32 [01:35<00:00,  2.97s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [05:26<00:00, 10.19s/it, 62.5% WIN]


62.5%O, 100.0%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [04:15<00:00,  7.99s/it, 93.8% WIN] 
values vs. values: 100%|██████████| 128/128 [17:20<00:00,  8.13s/it, 49.2% WIN]
Updating: 100%|██████████| 461/461 [00:32<00:00, 14.18it/s, 75.6% ACC, 0.663VL]
values vs. random: 100%|██████████| 32/32 [01:49<00:00,  3.41s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [01:39<00:00,  3.12s/it, 6.2% WIN]


6.2%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [17:40<00:00,  8.29s/it, 50.0% WIN]
Updating: 100%|██████████| 464/464 [00:33<00:00, 13.94it/s, 75.5% ACC, 0.637VL]
values vs. random: 100%|██████████| 32/32 [01:37<00:00,  3.04s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:20<00:00,  8.13s/it, 37.5% WIN]


37.5%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [16:45<00:00,  7.85s/it, 46.9% WIN]
Updating: 100%|██████████| 442/442 [00:31<00:00, 13.98it/s, 71.7% ACC, 0.667VL]
values vs. random: 100%|██████████| 32/32 [02:28<00:00,  4.63s/it, 18.8% WIN]
values vs. values: 100%|██████████| 32/32 [00:30<00:00,  1.04it/s, 3.1% WIN]


3.1%O, 18.8%R Rejected new model


values vs. values: 100%|██████████| 128/128 [17:07<00:00,  8.03s/it, 43.8% WIN]
Updating: 100%|██████████| 449/449 [00:32<00:00, 13.96it/s, 75.1% ACC, 0.638VL]
values vs. random: 100%|██████████| 32/32 [01:42<00:00,  3.20s/it, 93.8% WIN]
values vs. values: 100%|██████████| 32/32 [03:06<00:00,  5.84s/it, 53.1% WIN]


53.1%O, 93.8%R Continuing to train current weights


values vs. values: 100%|██████████| 128/128 [05:02<00:00,  2.36s/it, 51.6% WIN]
Updating: 100%|██████████| 91/91 [00:06<00:00, 13.87it/s, 69.6% ACC, 0.817VL]
values vs. random: 100%|██████████| 32/32 [02:02<00:00,  3.83s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [03:55<00:00,  7.37s/it, 75.0% WIN]


75.0%O, 100.0%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [04:17<00:00,  8.05s/it, 100.0% WIN]
values vs. values: 100%|██████████| 128/128 [14:26<00:00,  6.77s/it, 54.7% WIN] 
Updating: 100%|██████████| 353/353 [00:24<00:00, 14.14it/s, 78.4% ACC, 0.595VL]
values vs. random: 100%|██████████| 32/32 [01:36<00:00,  3.03s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [03:37<00:00,  6.79s/it, 42.2% WIN]


42.2%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [14:38<00:00,  6.87s/it, 53.1% WIN] 
Updating: 100%|██████████| 368/368 [00:26<00:00, 13.98it/s, 78.0% ACC, 0.583VL]
values vs. random: 100%|██████████| 32/32 [02:26<00:00,  4.57s/it, 12.5% WIN]
values vs. values: 100%|██████████| 32/32 [00:31<00:00,  1.02it/s, 3.1% WIN]


3.1%O, 12.5%R Rejected new model


values vs. values: 100%|██████████| 128/128 [14:37<00:00,  6.86s/it, 41.4% WIN]
Updating: 100%|██████████| 360/360 [00:25<00:00, 14.11it/s, 72.5% ACC, 0.699VL]
values vs. random: 100%|██████████| 32/32 [01:35<00:00,  3.00s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:12<00:00,  7.90s/it, 50.0% WIN]


50.0%O, 100.0%R Continuing to train current weights


values vs. values: 100%|██████████| 128/128 [17:20<00:00,  8.13s/it, 48.4% WIN]
Updating: 100%|██████████| 473/473 [00:33<00:00, 14.06it/s, 80.1% ACC, 0.484VL]
values vs. random: 100%|██████████| 32/32 [01:52<00:00,  3.52s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:01<00:00,  7.55s/it, 59.4% WIN]


59.4%O, 100.0%R Continuing to train current weights


values vs. values: 100%|██████████| 128/128 [14:10<00:00,  6.64s/it, 39.1% WIN] 
Updating: 100%|██████████| 321/321 [00:22<00:00, 14.18it/s, 69.7% ACC, 0.763VL]
values vs. random: 100%|██████████| 32/32 [01:42<00:00,  3.19s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [03:59<00:00,  7.48s/it, 62.5% WIN]


62.5%O, 100.0%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [04:36<00:00,  8.63s/it, 100.0% WIN]
values vs. values: 100%|██████████| 128/128 [12:00<00:00,  5.63s/it, 56.2% WIN]
Updating: 100%|██████████| 260/260 [00:18<00:00, 14.00it/s, 75.8% ACC, 0.625VL]
values vs. random: 100%|██████████| 32/32 [02:15<00:00,  4.25s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:22<00:00,  8.19s/it, 43.8% WIN]


43.8%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [11:45<00:00,  5.51s/it, 52.0% WIN]
Updating: 100%|██████████| 253/253 [00:17<00:00, 14.09it/s, 79.1% ACC, 0.559VL]
values vs. random: 100%|██████████| 32/32 [01:46<00:00,  3.33s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:04<00:00,  7.63s/it, 43.8% WIN]


43.8%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [12:35<00:00,  5.91s/it, 53.1% WIN]
Updating: 100%|██████████| 273/273 [00:20<00:00, 13.46it/s, 69.7% ACC, 0.761VL]
values vs. random: 100%|██████████| 32/32 [01:35<00:00,  2.98s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [04:47<00:00,  8.98s/it, 34.4% WIN]


34.4%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [12:03<00:00,  5.65s/it, 59.4% WIN]
Updating: 100%|██████████| 254/254 [00:18<00:00, 13.99it/s, 78.7% ACC, 0.605VL]
values vs. random: 100%|██████████| 32/32 [01:58<00:00,  3.70s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [03:31<00:00,  6.61s/it, 75.0% WIN]


75.0%O, 100.0%R Accepted new model


values vs. greedy: 100%|██████████| 32/32 [04:15<00:00,  8.00s/it, 100.0% WIN]
values vs. values: 100%|██████████| 128/128 [17:47<00:00,  8.34s/it, 47.7% WIN] 
Updating: 100%|██████████| 475/475 [00:34<00:00, 13.79it/s, 78.4% ACC, 0.551VL]
values vs. random: 100%|██████████| 32/32 [01:36<00:00,  3.01s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [03:23<00:00,  6.37s/it, 6.2% WIN]


6.2%O, 100.0%R Rejected new model


values vs. values: 100%|██████████| 128/128 [18:15<00:00,  8.56s/it, 53.9% WIN]
Updating: 100%|██████████| 482/482 [00:33<00:00, 14.24it/s, 76.6% ACC, 0.600VL]
values vs. random: 100%|██████████| 32/32 [01:43<00:00,  3.24s/it, 100.0% WIN]
values vs. values: 100%|██████████| 32/32 [05:06<00:00,  9.59s/it, 43.8% WIN]


43.8%O, 100.0%R Rejected new model


values vs. values:   0%|          | 0/128 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Evaluate

Play against our AI

In [27]:
human_policy_args = {
    'mode': 'human',
    'board_size': BOARD_SIZE,
}

In [29]:
val_net.load_weights(CHECKPOINT_PATH)

In [30]:
val_net.save('novice_9x9.h5')

In [28]:
data.make_episodes(checkpoint_policy_args, human_policy_args, 1, num_workers=1)

Episode worker: 0it [00:00, ?it/s]

    0   1   2   3   4   5   6   7   8
  -------------------------------------
0 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
1 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
2 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
3 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
4 | . | B | . | . | . | . | . | . | . |
  -------------------------------------
5 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
6 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
7 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
8 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
	Turn: W, Last Turn Passed: False, Game Over: 0
	Black Area: 81, White Area: 0

Enter coordinates separated by space (`q` to quit)
4 4
    0   1   2   3   4   5   6   7   8
  ---------------------------

Enter coordinates separated by space (`q` to quit)
4 6
    0   1   2   3   4   5   6   7   8
  -------------------------------------
0 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
1 | . | . | B | . | . | . | . | . | . |
  -------------------------------------
2 | . | . | . | . | W | W | W | . | . |
  -------------------------------------
3 | . | . | . | W | B | B | B | W | . |
  -------------------------------------
4 | . | B | . | B | W | . | W | . | . |
  -------------------------------------
5 | . | . | . | B | W | W | . | . | . |
  -------------------------------------
6 | . | . | . | . | B | B | B | . | . |
  -------------------------------------
7 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
8 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
	Turn: W, Last Turn Passed: False, Game Over: 0
	Black Area: 10, White Area: 9

Enter coordinates separated by space (`q` to quit)
4 5
    0   1   

Enter coordinates separated by space (`q` to quit)
7 5
    0   1   2   3   4   5   6   7   8
  -------------------------------------
0 | . | . | . | . | . | . | . | . | . |
  -------------------------------------
1 | . | . | B | . | . | . | . | . | . |
  -------------------------------------
2 | . | . | B | . | W | W | W | . | . |
  -------------------------------------
3 | . | . | . | W | . | . | . | W | . |
  -------------------------------------
4 | . | B | B | B | W | W | W | . | . |
  -------------------------------------
5 | . | B | W | B | W | W | . | . | . |
  -------------------------------------
6 | . | B | W | W | B | B | B | B | . |
  -------------------------------------
7 | . | B | B | W | B | W | . | . | . |
  -------------------------------------
8 | . | . | W | W | . | . | . | . | . |
  -------------------------------------
	Turn: W, Last Turn Passed: False, Game Over: 0
	Black Area: 15, White Area: 20

Enter coordinates separated by space (`q` to quit)
8 4
    0   1  

Enter coordinates separated by space (`q` to quit)
1 4
    0   1   2   3   4   5   6   7   8
  -------------------------------------
0 | . | . | . | B | . | . | . | . | . |
  -------------------------------------
1 | . | . | B | W | W | B | B | B | . |
  -------------------------------------
2 | . | . | B | . | W | W | W | B | . |
  -------------------------------------
3 | . | . | B | W | . | . | . | W | . |
  -------------------------------------
4 | . | B | B | B | W | W | W | . | B |
  -------------------------------------
5 | . | B | W | B | W | W | W | W | W |
  -------------------------------------
6 | . | B | W | W | . | . | . | . | . |
  -------------------------------------
7 | . | B | B | W | . | W | W | . | W |
  -------------------------------------
8 | . | . | W | W | W | . | . | W | . |
  -------------------------------------
	Turn: W, Last Turn Passed: False, Game Over: 0
	Black Area: 17, White Area: 39

Enter coordinates separated by space (`q` to quit)
0 4
    0   1  

Enter coordinates separated by space (`q` to quit)
p
    0   1   2   3   4   5   6   7   8
  -------------------------------------
0 | . | . | B | B | W | B | . | . | . |
  -------------------------------------
1 | . | . | B | W | W | B | B | B | . |
  -------------------------------------
2 | . | . | B | B | W | W | W | B | W |
  -------------------------------------
3 | . | . | B | . | B | . | W | W | W |
  -------------------------------------
4 | . | B | B | B | W | W | W | W | . |
  -------------------------------------
5 | . | B | W | B | W | W | W | W | W |
  -------------------------------------
6 | . | B | W | W | . | . | . | B | . |
  -------------------------------------
7 | . | B | B | W | . | W | W | . | W |
  -------------------------------------
8 | . | W | W | W | W | . | . | W | . |
  -------------------------------------
	Turn: W, Last Turn Passed: False, Game Over: 0
	Black Area: 22, White Area: 35

Enter coordinates separated by space (`q` to quit)
p
    0   1   2  

Episode worker: 1it [04:00, 240.51s/it]

values vs. human:   0%|          | 0/1 [00:00<?, ?it/s][A
values vs. human: 100%|██████████| 1/1 [00:00<00:00, 450.23it/s, 100.0% WIN]


1.0