In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline

from env import *
from states import *
from agents import *
from models import *
from util import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


#### Evaluations

Compare performance of different agents.

In [None]:
## load models and set up agents
q_model = QModel(save_dir='model/')
q_model.restore('dqn-v3/750000')

a_base = Agent()
a_rl = RlAgent(q_model)
e_sim = Env(Game(),DefaultPlayer(),state_generator=StateExtend)
a_mcts1 = MCTSAgent(q_model,e_sim,N_search=1600)
for a in [a_base,a_rl,a_mcts1]:
    a.infer_flag = True

# agents = [a_base,a_base,a_base,a_base]
# agents = [a_rl,a_base,a_rl,a_base]
# agents = [a_base,a_rl,a_base,a_rl]

agents = [a_mcts1,a_base,a_mcts1,a_base]
# agents = [a_base,a_mcts1,a_base,a_mcts1]
# agents = [a_mcts1,a_rl,a_mcts1,a_rl]
# agents = [a_rl,a_mcts1,a_rl,a_mcts1]

N_iter = 10
# N_iter = 10000

In [None]:
## simulate for a few games and plot the result
tt = time.time()
curr_scores,eval_scores = \
    run_games(N_iter=N_iter,agents=agents,state_generator=StateExtend)
print(time.time()-tt,np.mean(curr_scores),np.mean(eval_scores))

plt.figure()
plt.hist(eval_scores,np.arange(-200,200+10,10))
plt.grid()
plt.show()

In [None]:
## use debug mode to check how each agent plays
a_mcts_1.debug_flag = True
run_games(N_iter=1,agents=[a_mcts_2,a_mcts_1,a_mcts_2,a_mcts_1],if_random_game=False,if_display=True,
                  fname='dataset/valid_set_q.p',state_generator=StateExtend)

#### human-AI interaction

In [2]:
%gui asyncio

In [3]:
## load models and agents

mcts_model = MCTSModel(save_dir='model/')
mcts_model.restore('mcts-v7/5000')

# a_ai = RlAgent(q_model)
e_sim = Env(Game(),DefaultPlayer(),state_generator=StateExtend)
a_ai = MCTSAgent(mcts_model,e_sim,N_search=1600,c_puct=400)
# a_ai = MCTSAgent(mcts_model,e_sim,N_search=16000)
a_ai.infer_flag = True
a_ai.debug_flag = True

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from model/mcts-v7/5000/model


In [4]:
## start playing

a_ipy = IpyAgent()
# agents = [a_ipy,a_ipy,a_ipy,a_ipy]
agents = [a_ai,a_ipy,a_ai,a_ipy]
env = Env(Game(),DefaultPlayer(),state_generator=StateExtend)
state,_ = generate_random_game(env,if_display=True)
task = asyncio.ensure_future(run_single_game_ipy(agents,state,env,if_display=True))

init:player:0 -> [♦2,♦2,♣2,♣2,♣5,♣10,♣10,♣K,♥5,♥10,♥J,♥A,♠4,♠5,♠6,♠7,♠8,♠J,♠Q,♠K,♠A,♣9,♠9,♠9,☆],25.
init:player:1 -> [♦4,♦5,♦7,♦7,♦J,♦Q,♦A,♣6,♣7,♣8,♣A,♥2,♥3,♥7,♥7,♥10,♥Q,♥K,♥A,♠5,♠10,♠J,♠Q,♠K,☆☆],25.
init:player:2 -> [♦4,♦5,♦6,♦10,♦10,♦Q,♦K,♦K,♣3,♣3,♣7,♣A,♥4,♥5,♥8,♥J,♥K,♠2,♠3,♠6,♠7,♠A,♦9,♣9,♥9],25.
init:player:3 -> [♦3,♦6,♦8,♦A,♣4,♣4,♣5,♣6,♣8,♣J,♣Q,♣Q,♣K,♥6,♥8,♥Q,♠2,♠3,♠4,♠8,♠10,♦9,♥9,☆,☆☆],25.
 ♦2,♦2,1465.00,113.95,4.65,0.04; ♣2,♣2,14.00,3.78,4.47,0.09; ♣5,10.00,-11.17,5.10,0.08; ♣K,10.00,-14.89,4.51,0.07; ♠4,9.00,-2.75,4.04,0.07; ♠A,8.00,-2.87,4.77,0.06; ♠5,7.00,-4.20,4.37,0.05; ♥A,7.00,3.49,3.76,0.05; ♠K,6.00,-8.61,5.07,0.04; ♠9,♠9,6.00,5.44,4.55,0.05; ♥5,6.00,1.73,5.57,0.04; ♥J,6.00,3.06,4.42,0.04; ♠8,6.00,0.65,4.67,0.03; ♣10,♣10,6.00,5.91,6.03,0.04; ☆,6.00,-2.56,4.40,0.04; ♠J,5.00,0.10,4.76,0.04; ♥10,5.00,-0.08,4.99,0.04; ♠7,5.00,0.13,4.58,0.03; ♠6,5.00,-0.25,3.91,0.04; ♠Q,4.00,-1.11,4.55,0.04; ♣9,4.00,-0.83,4.49,0.04
round:0,player:0 -> ♦2,♦2, curr best ♦2,♦2. player remain: [♣2,

Dropdown(options=(('Please select from below:', ([], None, False)), ('♦7,♦7', ((5, 5), [(2, 5, (5, 5))], True)…