# Random search good parameters for models

In [1]:
from players.atm import ATM
from players.ai_player import AIPlayer
from players.random_player import RandomPlayer
from training_env import TrainingEnv
from agents import build_dqn_agent, fit_agent, train_loop, load_agent_weights
from models import simple_model, complex_model, test_model
from util import visualize_history, use_jupyter
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
use_jupyter()

In [3]:
# How many players in table
NUMBER_OF_SEATS = 2
# Max betsize in simulation environment (shouldn't really matter with discrete relative to pot sizing)
MAX_BET = 100000
# 'norm' (normalized) or 'one-hot' < how to encode player hand ranking from 7642 unique values
RANK_ENCODING = 'norm'

WINDOW = 1
MODEL = test_model

FIRST_RUN_STEPS = 1000
SECOND_RUN_STEPS = 1000
THIRD_RUN_STEPS = 1000
THIRD_RUN_ITERATIONS = 50

In [4]:
env = TrainingEnv.build_environment(ATM(), NUMBER_OF_SEATS, debug=False)

In [5]:
model = MODEL(WINDOW, env.n_observation_dimensions, env.n_actions)
print(model.summary())
# window_length - how many timesteps to look into past (will multiply observation space by this, be careful)
# enable_double_dqn - https://arxiv.org/pdf/1509.06461.pdf
# enable_dueling_network - ???
# train_interval - every how many steps to run a train cycle (or if in 0...1 range, the soft update weight)
# n_warmup_steps - how many steps to run without training
# batch_size - number of (s, a, G) triplets to train on in one training cycle (as a batch)
# gamma - future reward discount essentially
# memory_interval - how often to add last step to memory buffer (discarding every other)

agent = build_dqn_agent(model, env.n_actions, window_length=WINDOW, target_model_update=0.001, 
                        enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', 
                        train_interval=100, n_warmup_steps=50, batch_size=32, gamma=.99, memory_interval=1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 396)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                25408     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                528       
_________________________________________________________________
dropout_3 (Dropout)          (None, 16)                0         
__________

In [6]:
# Let's play for 100000 steps (decisions made by AI)
agent, hist = fit_agent(agent, env, FIRST_RUN_STEPS, debug=False)

Training for 1000 steps ...
Interval 1 (0 steps performed)
66 episodes - episode_reward: 1.021 [-55.600, 42.800] - loss: 160.329 - mean_absolute_error: 1.938 - mean_q: 3.178 - money_won: 2.645

Interval 2 (200 steps performed)
71 episodes - episode_reward: 1.743 [-58.200, 49.000] - loss: 83.360 - mean_absolute_error: 1.815 - mean_q: 2.952 - money_won: 9.290

Interval 3 (400 steps performed)
71 episodes - episode_reward: 0.766 [-63.000, 56.400] - loss: 89.912 - mean_absolute_error: 1.751 - mean_q: 2.715 - money_won: 0.550

Interval 4 (600 steps performed)
71 episodes - episode_reward: 1.291 [-66.800, 42.600] - loss: 34.182 - mean_absolute_error: 1.624 - mean_q: 2.819 - money_won: 5.285

Interval 5 (800 steps performed)
done, took 22.917 seconds
