In [1]:
from copy import deepcopy
from typing import Optional

import random

import numpy as np
import tensorflow as tf

from model.international_game import InternationalGame
from ai.agent import AlphaZero
from ai.model import NeuralNetwork


import ai.utils as ut
import ai.config as config
import ai.modified_tree_search as mts
import model.game as game

In [2]:
def get_models():
    model = ut.load_best_model()
    
    current_NN = NeuralNetwork(config.REG_CONST, config.LEARNING_RATE, 
                                  (10,10,25), len(ut.get_action_space()), config.HIDDEN_CNN_LAYERS)
    best_NN = NeuralNetwork(config.REG_CONST, config.LEARNING_RATE, 
                                  (10,10,25), len(ut.get_action_space()), config.HIDDEN_CNN_LAYERS)
    
    current_NN.model.set_weights(model.get_weights())
    best_NN.model.set_weights(model.get_weights())
    
    return current_NN, best_NN

In [3]:
class RejectedActionError(Exception):
    def __init__(self, action, game, tau, agent, other_agent):
        self.action = action
        self.game = game
        self.tau = tau
        self.agent = agent
        self.other_agent = other_agent

class TreeError(Exception):
    def __init__(self, action_id, game, tau, agent, other_agent):
        self.action_id = action_id
        self.game = game
        self.tau = tau
        self.other_agent = other_agent
        self.agent = agent

In [4]:
random.seed(101)

In [5]:
np.random.seed(101)

In [6]:
tf.random.set_seed(101)

In [7]:
def play_match(agent: AlphaZero, 
               model: NeuralNetwork, 
               other_agent:Optional[AlphaZero]=None, 
               other_model:Optional[NeuralNetwork]=None, turns_until_tau0=0):
    
    self_play = True if other_agent is None else False
    
    current_game = InternationalGame(1, None, None, None)
    sample_builder = ut.SampleBuilder()
    
    current_game.init()
    
    agent.build_mct(ut.GameState(deepcopy(current_game)), model)
    if not self_play:
        if other_model is None:
            raise ValueError('illegal argument: other_model can\'t be None when other_model is\'nt')
        other_agent.build_mct(ut.GameState(deepcopy(current_game)), other_model)
    
    turn = 0
    while not current_game.end():
        if not self_play:
            current_agent = agent if current_game.current_turn == 1 else other_agent
        else:
            current_agent = agent

        tau = 1 if turn < turns_until_tau0 else 0
        
        try:
            action, state_stack, value, pi = current_agent.train_act(tau)
        except KeyError as e:
            raise TreeError(e.args[0], current_game, tau, agent, other_agent)
            
        if not current_game.is_legal_action(action):
            raise RejectedActionError(action, current_game, tau, agent, other_agent)
        
        current_game.apply_action(action)
        if state_stack.head is None:
            raise TreeError(None, current_game, tau, agent, other_agent)
        sample_builder.add_move(state_stack, pi)
        
        current_agent.on_update(action)
        
        if not self_play:
            other_agent.on_update(action)
        
        turn += 1
        
        print('*', end='' if turn % 20 != 0 else '\n')
    
    print()
    
    winner = current_game.get_winner()
    
    if winner == 1:
        winner = 'agent'
    elif winner == 2:
        winner = 'other'
    else:
        winner = 'draw'
            
    value = ut.evaluate(current_game)
    
    sample_builder.commit_sample(value, game.MAXIMIZER)
    
    return sample_builder, winner

In [8]:
def train_batches(minibatch, batch_size):
    training_states = np.array([row['state'].get_deep_representation_stack() for row in minibatch])
    
    training_targets = {'value_head': np.array([row['value'] for row in minibatch]), 
                        'policy_head': np.array([row['policy'] for row in minibatch])}
    
    indices = [i for i in range(len(training_states))]
    random.shuffle(indices)
    for i in range(len(x_train)):
        start = indices[i] * batch_size
        end = start + batch_size
        
        yield training_states[start:end], {'value_head':training_targets['value_head'][start:end], 
                                           'policy_head':training_targets['policy_head'][start:end]}

In [9]:
def fit(model, samples):
    overall_loss = []
    value_loss = []
    policy_loss = []
    for i in range(config.TRAINING_LOOPS):
        minibatch = random.sample(samples, min(config.BATCH_SIZE, len(samples)))
        for x_train, y_train in train_batches(minibatch, 32):
            res = model.train_on_batch(x=x_train, y=y_train, return_dict=True)
            overall_loss.append(res['loss'])
            value_loss.append(res['value_head_loss'])
            policy_loss.append(res['policy_head_loss'])
    return overall_loss, value_loss, policy_loss

In [10]:
def train_manger(best_version):
    iteration = 0
    while True:
        current_NN, best_NN = get_models()

        current_agent = AlphaZero(config.MCTS_SIMS)
        best_agent = AlphaZero(config.MCTS_SIMS)

        dataset = ut.SampleBuilder()

        for i in range(config.EPISODES):
            print(f'Episode {i} started')
            start_time = time.monotonic()
            sb, _ = play_match(current_agent, current_NN, turns_until_tau0=config.TURNS_UNTIL_TAU0)
            dataset.samples.extend(sb.samples)
            print(f'Episode {i} ended in {(time.monotonic() - start_time)/60} minutes')
        
        size = len(dataset.samples)
        if iteration % 2:
            dataset.save(iteration)
            print(f'Gathered {size} sample')
        
        
        if size >= config.DATA_LEN:
            
            overall_loss, value_loss, policy_loss = fit(current_agent.model, dataset.samples)

            score = {'agent': 0, 'draw': 0, 'other': 0}

            for _ in range(config.EVAL_EPISODES):
                print(f'Evaluation episode {i} started')
                start_time = time.monotonic()
                _, winner = play_match(current_agent, current_NN, best_agent, best_NN)
                score[winner] += 1
                print(f'Evaluation episode {i} ended in {(time.monotonic() - start_time)/60} minutes')

            ratio = score['agent'] * 100 // config.EVAL_EPISODES
            
            print(f'current version win ration: {ratio}')

            if ratio >= config.SCORING_THRESHOLD:
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_version = best_version + 1
                best_NN.save('best alphazero', best_version)
                print('Saving a new version')
            else:
                current_NN.save('alphazero' + str(best_version), iteration)
                print('Saving version progress')
        
        iteration += 1

In [11]:
tree_error = None
reject_error = None

In [12]:
import time
start_time = time.monotonic()
try:
    train_manger(1)
except RejectedActionError as reject_e:
    print('rejected')
    reject_error = reject_e
except TreeError as tree_e:
    print('search error')
    tree_error = tree_e
print(f'slept for {time.monotonic() - start_time}')

loading version 1
Episode 0 started
********************
********************
********************
********************
********************
*****************
Episode 0 ended in 453.8119999999999
Episode 1 started
********************
********************
********************
********************
***********
Episode 1 ended in 355.32800000000134
Episode 2 started
********************
********************
**************rejected
slept for 1049.0470000000023


In [13]:
print(reject_error.game.grid)
print('---------------------------')

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  .  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

---------------------------


In [20]:
print(reject_error.action)

(7,4)------->>>(9,2)


In [19]:
actions = reject_error.game.get_all_possible_actions()
for action in actions:
    print(action)

(6,3)------->>>(5,2)
(8,7)------->>>(7,6)
(8,7)------->>>(7,8)
(5,8)------->>>(4,7)
(5,8)------->>>(4,9)
(9,6)------->>>(8,5)
(9,8)------->>>(8,9)
(9,10)------->>>(8,9)
(10,3)------->>>(9,2)
(10,3)------->>>(9,4)


In [18]:
print(reject_error.game.current_turn)

1


In [17]:
print(reject_error.tau)

0


In [22]:
print(reject_error.agent.mct.root.game_state.get_game().grid)
print('---------------------------')
print(reject_error.game.grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

---------------------------
     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  .  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 



In [21]:
print(reject_error.agent.mct.root.game_state.turn)
actions = reject_error.agent.mct.root.game_state.get_game().get_all_possible_actions()
for action in actions:
    print(action)

1
(7,4)------->>>(9,2)


In [24]:
reject_error.agent.mct.root.game_state.get_game().grid[5][4].piece.type

'KING'

In [25]:
reject_error.game.grid[5][4].piece.type

'PAWN'

In [63]:
copygame = deepcopy(reject_error.game)

In [64]:
print(copygame.grid)
piece = copygame.grid[7][2].piece
actions_stack = []
for action in reversed(copygame.actions):
    actions_stack.append(action)
    copygame.undo()
    if piece.type != 'KING':
        print(copygame.grid)
        break

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  .  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  B  .  B  .  B  .  B 
 2|  B  .  .  .  B  .  B  .  B  . 
 3|  .  .  .  B  .  B  .  .  .  W 
 4|  .  .  .  .  B  .  .  .  .  . 
 5|  .  .  .  .  .  .  .  .  .  B 
 6|  .  .  .  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  W  .  B  .  .  .  W  .  W  . 
 9|  .  W  .  W  .  W  .  W  .  W 
10|  .  .  W  .  W  .  W  .  W  . 



In [88]:
print(len(actions_stack))

0


In [51]:
copygame.undo()
print(copygame.grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  B  .  B  .  B  .  B 
 2|  .  .  .  .  B  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  B  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  .  .  .  .  .  .  B 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  .  .  .  .  .  .  . 
 8|  .  .  .  .  .  .  W  .  W  . 
 9|  .  .  .  W  .  W  .  W  .  W 
10|  .  .  W  .  W  .  W  .  W  . 



In [87]:
print(copygame.grid)
action = actions_stack.pop()
print(action)
copygame.apply_action(action)
print(copygame.grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  B  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  .  .  .  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

(3,8)------->>>(8,3)
     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  .  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 



In [25]:
print(copygame.grid)
print(reject_error.game.grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  B  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  .  .  .  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  .  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 



In [89]:
root_game = reject_error.agent.mct.root.game_state.get_game()
print(root_game.grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  .  .  B  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 



In [92]:
print(reject_error.agent.mct.state_stack.dq[4].get_game().grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  B  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  .  .  .  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 



In [120]:
parent_game_state = reject_error.agent.mct.state_stack.dq[4]
print(parent_game_state.get_game().grid)
print('--------------------------------')
parent_actions, parent_states = parent_game_state.get_all_possible_states()
for action, state in zip(parent_actions, parent_states):
    print(action)
    print(state.get_game().grid)

     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  B  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  B  .  .  .  W  .  . 
 6|  .  .  W  .  .  .  .  .  .  . 
 7|  .  .  .  W  .  .  .  .  .  . 
 8|  .  .  .  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

--------------------------------
(5,4)------->>>(7,2)
     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  B  .  W 
 4|  .  .  B  .  B  .  .  .  .  . 
 5|  .  .  .  .  .  .  .  W  .  . 
 6|  .  .  .  .  .  .  .  .  .  . 
 7|  .  B  .  W  .  .  .  .  .  . 
 8|  .  .  .  .  .  .  W  .  .  . 
 9|  .  .  .  .  .  W  .  W  .  W 
10|  .  .  W  .  .  .  W  .  W  . 

(3,8)------->>>(8,3)
     1  2  3  4  5  6  7  8  9  10
 1|  .  .  .  .  .  B  .  B  .  B 
 2|  .  .  B  .  .  .  B  .  B  . 
 3|  .  B  .  .  .  B  .  .  .  W 
 4|  .  .  B 

In [106]:
buged_action = parent_actions[1]
print(buged_action)

(3,8)------->>>(8,3)


In [116]:
buged_game = parent_game_state.get_game()
print(buged_game.grid[2][7].piece)

(2,7,BLACK KING)


In [118]:
import pickle
pickle.dump(buged_game, open('firstbug_game.pk', 'wb'))

In [122]:
pickle.dump(reject_error.game, open('secondbug_game.pk', 'wb'))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import time
import random

from IPython.display import display
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

import cufflinks as cf
import plotly.graph_objects as go

In [None]:
init_notebook_mode(connected=True)
cf.go_offline()
np.random.seed(5)

In [None]:
await task

In [None]:
print(len(mct.root.edges))

In [None]:
currentAgent = AlphaZero(config.MCTS_SIMS)


In [None]:
currentAgent.build_mct(ut.GameState(current_game), current_model)

In [None]:
import tensorflow as tf
tf.__version__

In [None]:


class PlayGround:
    def __init__(self):
        self.action_space_shape = len(get_action_space(10, 10))
        self.current_model = NeuralNetwork(config.REG_CONST, config.LEARNING_RATE, 
                                      (10,10,25),   self.action_space_shape, config.HIDDEN_CNN_LAYERS)

        #self.best_model = NeuralNetwork(config.REG_CONST, config.LEARNING_RATE, 
                                   #(10,10,25),   self.action_space_shape, config.HIDDEN_CNN_LAYERS)
        
        self.best_model.model.set_weights(self.current_model.model.get_weights())
        self.exp = []
        self.turn = []
        
        self.fig = go.FigureWidget()
        self.fig.update_layout(xaxis_title="Turn", yaxis_title='Expected value')
        #self.fig.add_scatter(fill='tozeroy')
        self.fig.add_scatter()

    def plot_figure(self):
        display(self.fig)
        
    def update_plot(self):
        with self.fig.batch_update():
            self.fig.data[0].x = self.turn
            self.fig.data[0].y = self.exp    

In [None]:
#calc_new_rate(1613, [1609, 1477, 1388, 1586, 1720], [2, 0, 1, 1, 2])
#1601

In [None]:
import numpy as np
import random


In [None]:
import ai.figures as figs

In [None]:
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objects as go
import cufflinks as cf
import ai.figures as figs
init_notebook_mode()
cf.go_offline()

In [None]:
import pandas as pd
df = pd.read_csv('data/users v3.csv')
df.index.name = 'player_id'

In [None]:
iplot(figs.registration_intensity_histogram(df))

In [None]:
iplot(figs.categories_bars(df))
iplot(figs.categories_ratio_pie(df))

In [None]:
df.head()

In [None]:
df['signup_date'] = pd.to_datetime(df['signup_date'])
df['year'] = df['signup_date'].apply(lambda t: t.year)
df['month'] = df['signup_date'].apply(lambda t: t.month)
df['day'] = df['signup_date'].apply(lambda t: t.day)
df.head()

In [None]:
bymonth = df.groupby('month').count()

trace1 = go.Scatter(x = bymonth.index,y=bymonth.rate,
                          marker = dict(color = 'rgba(31, 119, 180, 0.8)',
                                        line=dict(color='rgb(0,0,0)',
                                                  width=1.5)))
layout = dict(barmode = 'group',title = 'Registration Intensity',
                 xaxis= dict(title= 'Month',ticklen= 5,zeroline= False, ticks="inside"), 
                 yaxis = dict(title = 'Users', ticklen = 5, zeroline = False, ticks="outside"))
data = [trace1]

fig = {'data': data, 'layout': layout}
    
iplot(fig)