In [1]:
from model.game import GameState, Action
from ai.MCTreeSearch import MCTreeSearch, Node, Edge
from ai.utils import ActionEncoder, StateStack, mirror_action, to_label, SampleBuilder, get_action_space
from ai.agent import AlphaZero
from copy import deepcopy
import numpy as np
import ai.config as config


In [2]:
class RejectedAction(Exception):
    pass

In [3]:
def apply_and_update(game: GameState, playing_agent: AlphaZero, other_agent: AlphaZero, action: Action, action_id: int):
    if not game.is_legal_action(action):
        raise RejectedAction("your fucked bro!")
    game.apply_action(action)
    playing_agent.update_root(action_id)
    if other_agent is not None:
        other_agent.update_root(action_id)


def build_trees(game: GameState, agent: AlphaZero):
    agent.build_mcts(StateStack(deepcopy(game)))
    
def play_best(current_model, best_model, goes_first=True):
    current_Agent = AlphaZero(config.MCTS_SIMS, config.CPUCT, current_model, pov=0, name="Current AlphaZero")
    best_Agent = AlphaZero(config.MCTS_SIMS, config.CPUCT, best_model, pov=1, name="Best AlphaZero")
    if goes_first:
        players = [current_Agent, best_Agent]
    else:
        players = [best_Agent, current_Agent]
        best_Agent.pov, current_Agent.pov = (0, 1)
    
    game = GameState(None, None)
    for ep in range(config.EPISODES):
        game.init()
        build_trees(game, best_Agent)
        build_trees(game, current_Agent)
        
        turn = 1
        
        while not game.is_terminal():
            player: AlphaZero = players[game.get_player_turn()]
            tau = 1 if turn < config.TURNS_UNTIL_TAU0 else 0
            action, action_id, state_stack, value, pi = player.train_act(tau)
            apply_and_update(game, current_Agent, best_Agent, action, action_id)
            turn += 1
            print('*', end='')
        
    
    #return sample_builder

def self_play(current_model):
    current_Agent = AlphaZero(config.MCTS_SIMS, config.CPUCT, current_model, pov=0, name="Current AlphaZero")
    game = GameState(None, None)
    sample_builder = SampleBuilder()
    
    for ep in range(config.EPISODES):
        game.init()
        build_trees(game, current_Agent)
        
        turn = 1
        player: AlphaZero = current_Agent
    
        while not game.is_terminal():
            player.pov = game.get_player_turn()
            tau = 1 if turn < config.TURNS_UNTIL_TAU0 else 0
            action, action_id, state_stack, value, pi = player.train_act(tau)
            apply_and_update(game, current_Agent, None, action, action_id)
            sample_builder.commit_move(state_stack, pi)
            turn += 1
            print('*', end='')
            
        value = game.get_value()
        sample_builder.commit_sample(value, 0)
    
    return sample_builder
    

In [4]:
from ai.model import AlphaZeroNetwork
action_space = len(get_action_space(10, 10))
current_model = AlphaZeroNetwork(config.REG_CONST, config.LEARNING_RATE, 
                                 (10,10,20),   action_space, config.HIDDEN_CNN_LAYERS)

best_model = AlphaZeroNetwork(config.REG_CONST, config.LEARNING_RATE, 
                                 (10,10,20),   action_space, config.HIDDEN_CNN_LAYERS)
best_model.model.set_weights(current_model.model.get_weights())

In [6]:
try:
    sample_builder = self_play(current_model)
except RejectedAction as e:
    print(e)

********************************************************************************

In [17]:
import random
def fit(current_model: AlphaZeroNetwork, sample_builder:SampleBuilder):
    for i in range(10):
        minibatch = random.sample(sample_builder.samples, min(config.BATCH_SIZE, len(sample_builder.samples)))
        training_states = np.array([row['state'].get_deep_representation_stack() for row in minibatch])
        training_targets = {'value_head': np.array([row['value'] for row in minibatch]),
                            'policy_head': np.array([row['policy'] for row in minibatch])}

        ret = current_model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0, batch_size = 32)

In [23]:
fit(current_model, sample_builder)

Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples
Train on 80 samples


In [1]:
import seaborn as sns
import pandas as pd
import cufflinks as cf
import numpy as np
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot


In [2]:
init_notebook_mode(connected=True)

In [3]:
cf.go_offline()

In [4]:
x = np.arange(1, 51)
y = np.random.rand(50)

In [5]:
import time
import plotly.graph_objects as go

In [6]:
fig = go.FigureWidget()
fig.update_layout(xaxis_title="Turn", yaxis_title='Expected value')
fig.add_scatter(fill='tozeroy')
fig

FigureWidget({
    'data': [{'fill': 'tozeroy', 'type': 'scatter', 'uid': '253d9a98-323f-4395-ba5d-2f9b4733213…

In [7]:
for i in range(len(x)):
    time.sleep(0.3)
    with fig.batch_update():
        fig.data[0].x = x[:i]
        fig.data[0].y = y[:i]