In [1]:
import importlib
import time

from tqdm import tqdm
from tqdm import trange

from multiprocessing import Pool, Queue

import pentago
from pentago import agent, memory, minimax, learning_agent, neural_model, monte_carlo, evaluate, game

In [2]:
thread_count = 20
training_chunk_size = 500
q = None

def load_queue(queue):
    q = queue
    
def train_thread(controller):
    for _ in range(int(training_chunk_size/thread_count)):
        controller.play_game()
        q.put(1)
    q.close()
    q.join_thread()
    return controller.view

In [4]:
view = memory.MemoryView()
controller = pentago.Controller([minimax.MinimaxSearchAgent(depth=2)] * 2, view)
initial_view = memory.MemoryView()

q = Queue()
pbar = tqdm(total=int(training_chunk_size/thread_count)*thread_count,position=0)
pool = Pool(thread_count,load_queue,(q,))
result = pool.map_async(train_thread, [controller for x in range(thread_count)])
pool.close()

while True:
    if q.empty() and result.ready():
        break
    elif not q.empty():
        q.get()
        pbar.update()
    else:
        time.sleep(1)
q.close()
pool.join()
q.join_thread()
pbar.refresh()

initial_view = memory.MemoryView().add_experiences(result.get())

100%|██████████| 500/500 [04:59<00:00,  2.20it/s]

In [5]:
import dill as pickle

In [6]:
pickle.dump(initial_view, open('initial_view.pkl', mode='wb'))

In [7]:
initial_view = pickle.load(open('initial_view.pkl', mode='rb'))

In [8]:
importlib.reload(neural_model)
importlib.reload(learning_agent)
importlib.reload(game)
importlib.reload(monte_carlo)
neural_agent = learning_agent.NeuralAgent(model_params={'dense_regularization_const': 1e-2,
                                                        'regularization_const': 0,
                                                        'num_layers': 5,
                                                        'kernel_size': 3,
                                                        'num_filters': 64})

Instructions for updating:
Colocations handled automatically by placer.


In [None]:
print(evaluate.tally_wins([agent.RandomAgent(), neural_agent], trials=10))
print(evaluate.tally_wins([minimax.MinimaxSearchAgent(), neural_agent], trials=10, verbose=True))

In [None]:
neural_agent.fit(view.get_experiences())

In [None]:
print(evaluate.tally_wins([agent.RandomAgent(), neural_agent], trials=10))
print(evaluate.tally_wins([minimax.MinimaxSearchAgent(), neural_agent], trials=10))

In [None]:
q = Queue()
pool = Pool(thread_count,load_queue,(q,))

while True:
    view = memory.MemoryView()
    controller = pentago.Controller([neural_agent] * 2, view)
    pbar = tqdm(total=int(training_chunk_size/thread_count)*thread_count,position=0)
    
    result = pool.map_async(train_thread, [controller for x in range(thread_count)])

    while True:
        if q.empty() and result.ready():
            break
        elif not q.empty():
            q.get()
            pbar.update()
        else:
            time.sleep(1)
    pbar.refresh()

    view = memory.MemoryView().add_experiences(result.get())

    neural_agent.fit(view.get_experiences())
    wins = evaluate.tally_wins([minimax.MinimaxSearchAgent(), neural_agent], trials=10)
    print(wins)
    if wins[-1] > 7:
        break


In [None]:
pool.close()
q.close()
pool.join()
q.join_thread()

In [None]:
pickle.dump(view, open('trained_view.pkl', mode='wb'))

In [None]:
view = pickle.load(open('trained_view.pkl', mode='rb'))

In [None]:
params = neural_agent.to_params()
print(params)