In [1]:
import numpy as np
import copy

from MCTS import MCTS
from Game import Game 
from model import AmoebaZeroModel

Using TensorFlow backend.


In [16]:
HISTORY_SIZE     = 100000 #Number of stored states (not games)
BATCH_SIZE       = 256
TRAINING_LOOP_NR = 1000
SELFPLAY_NR      = 100
EVAL_NR          = 40
THRESHOLD        = 4

class Pipeline:
    def __init__(self):
        self.history_input = np.zeros( (HISTORY_SIZE,15,15,4))
        self.history_p     = np.zeros( (HISTORY_SIZE,225))
        self.history_v     = np.zeros( (HISTORY_SIZE))
        self.batch_input   = np.zeros( (BATCH_SIZE,15,15,4))
        self.batch_p       = np.zeros( (BATCH_SIZE,225))
        self.batch_v       = np.zeros( (BATCH_SIZE))
        self.next_index    = 0
        self.history_nr    = 0
        self.version       = 0
        
        self.model_best    = AmoebaZeroModel()
        self.model_trained = copy.deepcopy(self.model_best)
    
    def main_loop(self):
        nr = 0
        self.self_play()
        while True:
            print("Loop - ", nr)
            self.train()
            if evaluate_trained():
                self.version += 1
                self.model_trained.model.save("./models/saved_model_"+str(self.version)+".h5")
                print("New model saved. Version: ", self.version)
                self.model_best = copy.deepcopy(self.model_trained)
                self.self_play()
                
    def self_play(self):
        for i in range(SELFPLAY_NR):
            start_index = self.next_index
            g = Game()
            mcts = MCTS(g, self.model_best.evaluate)
            while (g.winner==0):
                x,y,n = mcts.select_move()                
                g.fill_grids_for_nn(self.history_input,self.next_index)
                mcts.fill_p(self.history_p,self.next_index)
                self.next_index = (self.next_index + 1) % HISTORY_SIZE
                g.move(x,y)
                mcts.reinit(g,n)
            v = g.get_final_value()
            i = start_index
            while i != self.next_index:
                self.history_v[i] = v
                i = (i+1)%HISTORY_SIZE
    
    def train(self):
        for i in range(TRAINING_LOOP_NR):
            for j in range(BATCH_SIZE):
                ind = random.randint(self.history_nr)
                self.batch_input[j] = self.history_input[ind]
                self.batch_p[j]     = self.history_p[ind]
                self.batch_v[j]     = self.history_v[ind]
            self.model_trained.fit(batch_input,(self.batch_p,self.batch_v),BATCH_SIZE)           
            
    def evaluate_trained(self):
        balance = 0 #+ if trained, - if best won more
        
        #best begins
        for i in range(EVAL_NR//2):
            g  = Game()
            nr = 0
            while (g.winner==0):
                if nr%2==0:
                    mcts = MCTS(g, self.model_best.evaluate)
                    x,y,_ = mcts.select_move()
                    g.move(x,y)
                else:
                    mcts = MCTS(g, self.model_trained.evaluate)
                    x,y,_ = mcts.select_move()
                    g.move(x,y)
                nr+=1
            if g.winner == 1:
                balance -= 1
            elif g.winner == 2:
                balance += 1

        #trained begins
        for i in range(EVAL_NR//2):
            g  = Game()
            nr = 0
            while (g.winner==0):
                if nr%2==0:
                    mcts = MCTS(g, self.model_trained.evaluate)
                    x,y,_ = mcts.select_move()
                    g.move(x,y)
                else:
                    mcts = MCTS(g, self.model_best.evaluate)
                    x,y,_ = mcts.select_move()
                    g.move(x,y)
                nr+=1
            if g.winner == 1:
                balance += 1
            elif g.winner == 2:
                balance -= 1
                
        return balance >= THRESHOLD
           
        
        

In [None]:
pl = Pipeline()
pl.main_loop()
