In [1]:
import tensorflow as tf
print('TensorFlow Version: {}'.format(tf.__version__))
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.7.1
Default GPU Device: 


##### >**Note:** Make sure you have OpenAI Gym cloned. Then run this command `pip install -e gym/[all]`.

In [2]:
import numpy as np
import gym
env = gym.make('CartPole-v0')
env = gym.make('CartPole-v1')

In [3]:
def model_input(state_size):
    #states = tf.placeholder(tf.float32, [None, *state_size], name='states')
    states = tf.placeholder(tf.float32, [None, state_size], name='states')
    actions = tf.placeholder(tf.int32, [None], name='actions')
    next_states = tf.placeholder(tf.float32, [None, state_size], name='next_states')
    rewards = tf.placeholder(tf.float32, [None], name='rewards')
    dones = tf.placeholder(tf.float32, [None], name='dones')
    rates = tf.placeholder(tf.float32, [None], name='rates') # success rate
    return states, actions, next_states, rewards, dones, rates

In [4]:
def Act(states, action_size, hidden_size, reuse=False, alpha=0.1, training=False):
    with tf.variable_scope('Act', reuse=reuse):
        # First fully connected layer
        h1 = tf.layers.dense(inputs=states, units=hidden_size)
        bn1 = tf.layers.batch_normalization(h1, training=training)        
        nl1 = tf.maximum(alpha * bn1, bn1)
        
        # Second fully connected layer
        h2 = tf.layers.dense(inputs=nl1, units=hidden_size)
        bn2 = tf.layers.batch_normalization(h2, training=training)        
        nl2 = tf.maximum(alpha * bn2, bn2)
        
        # Output layer
        logits = tf.layers.dense(inputs=nl2, units=action_size)        
        return logits

In [5]:
def Env(states, actions, state_size, action_size, hidden_size, reuse=False, alpha=0.1, training=False):
    with tf.variable_scope('Env', reuse=reuse):
        # First fully connected layer
        h1 = tf.layers.dense(inputs=states, units=action_size)
        bn1 = tf.layers.batch_normalization(h1, training=training)        
        nl1 = tf.maximum(alpha * bn1, bn1)
        
        # Second fully connected layer
        nl1_fused = tf.concat(axis=1, values=[nl1, actions])
        h2 = tf.layers.dense(inputs=nl1_fused, units=hidden_size)
        bn2 = tf.layers.batch_normalization(h2, training=training)        
        nl2 = tf.maximum(alpha * bn2, bn2)
                
        # Output layer
        states_logits = tf.layers.dense(inputs=nl2, units=state_size, trainable=False)
        Qlogits = tf.layers.dense(inputs=nl2, units=1, trainable=False)
        return states_logits, Qlogits

In [6]:
def model_loss(state_size, action_size, hidden_size, gamma,
               states, actions, next_states, rewards, dones, rates):
    ################################################ a = act(s)
    actions_logits = Act(states=states, hidden_size=hidden_size, action_size=action_size)
    actions_labels = tf.one_hot(indices=actions, depth=action_size, dtype=actions_logits.dtype)
    aloss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=actions_logits, 
                                                                      labels=actions_labels))
    ################################################ s', r = env(s, a)
    ################################################ s', Q = env(s, a)
    ################################################ ~s', ~Q = env(s, ~a)
    e_next_states_logits, eQs = Env(actions=actions_labels, states=states, hidden_size=hidden_size, 
                                    action_size=action_size, state_size=state_size)
    a_next_states_logits, aQs = Env(actions=actions_logits, states=states, hidden_size=hidden_size, 
                                    action_size=action_size, state_size=state_size, reuse=True)
    next_states_labels = tf.nn.sigmoid(next_states)
    eloss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=e_next_states_logits, 
                                                                   labels=next_states_labels))
    aloss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=a_next_states_logits, 
                                                                    labels=next_states_labels))
    eQs_logits = tf.reshape(eQs, shape=[-1])
    aQs_logits = tf.reshape(aQs, shape=[-1])
    eloss += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=eQs_logits, # GAN
                                                                    labels=rates)) # 0-1
    eloss += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=aQs_logits, # GAN
                                                                    labels=tf.zeros_like(rates))) # min
    aloss2 += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=aQs_logits, # GAN
                                                                     labels=tf.ones_like(rates))) # max
    #################################################### s'', Q' = ~env(s', ~a')
    next_actions_logits = Act(states=next_states, hidden_size=hidden_size, action_size=action_size, reuse=True)
    _, aQs2 = Env(actions=next_actions_logits, states=next_states, hidden_size=hidden_size, 
                  action_size=action_size, state_size=state_size, reuse=True)
    aQs2_logits = tf.reshape(aQs2, shape=[-1]) * (1-dones)
    eloss += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=aQs2_logits, # GAN
                                                                    labels=tf.zeros_like(rates))) # min
    aloss2 += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=aQs2_logits, # GAN
                                                                     labels=tf.ones_like(rates))) # max
    ###################################################### Q(s,a)= r + Q'(s',a')
    ###################################################### ~Q(s,~a)= r + Q'(s',a')
    targetQs = rewards + (gamma * aQs2_logits)
    eloss += tf.reduce_mean(tf.square(eQs_logits - targetQs)) # Q-learning
    #aloss2 += tf.reduce_mean(tf.square(aQs_logits - targetQs))
    aloss2 += -tf.reduce_mean(aQs_logits) # max
    aloss2 += -tf.reduce_mean(aQs2_logits) # max
    return actions_logits, aloss, eloss, aloss2

In [7]:
def model_opt(a_loss, e_loss, a_loss2, a_learning_rate, e_learning_rate):
    # Get weights and bias to update
    t_vars = tf.trainable_variables()
    a_vars = [var for var in t_vars if var.name.startswith('Act')]
    e_vars = [var for var in t_vars if var.name.startswith('Env')]

    # Optimize
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): # Required for batchnorm (BN)
        a_opt = tf.train.AdamOptimizer(a_learning_rate).minimize(a_loss, var_list=a_vars)
        e_opt = tf.train.AdamOptimizer(e_learning_rate).minimize(e_loss, var_list=e_vars)
        a_opt2 = tf.train.AdamOptimizer(a_learning_rate).minimize(a_loss2, var_list=a_vars)
    return a_opt, e_opt, a_opt2

In [8]:
class Model:
    def __init__(self, state_size, action_size, hidden_size, a_learning_rate, e_learning_rate, gamma):

        # Data of the Model: make the data available inside the framework
        self.states, self.actions, self.next_states, self.rewards, self.dones, self.rates = model_input(
            state_size=state_size)

        # Create the Model: calculating the loss and forwad pass
        self.actions_logits, self.a_loss, self.e_loss, self.a_loss2 = model_loss(
            state_size=state_size, action_size=action_size, hidden_size=hidden_size, gamma=gamma, # model init
            states=self.states, actions=self.actions, next_states=self.next_states, 
            rewards=self.rewards, dones=self.dones, rates=self.rates) # model input
        
        # Update the model: backward pass and backprop
        self.a_opt, self.e_opt, self.a_opt2 = model_opt(a_loss=self.a_loss, 
                                                        e_loss=self.e_loss,
                                                        a_loss2=self.a_loss2, 
                                                        a_learning_rate=a_learning_rate,
                                                        e_learning_rate=e_learning_rate)

In [9]:
from collections import deque
class Memory():
    def __init__(self, max_size = 1000):
        self.buffer = deque(maxlen=max_size) # data batch
    def sample(self, batch_size):
        idx = np.random.choice(np.arange(len(self.buffer)), size=batch_size, replace=False)
        return [self.buffer[ii] for ii in idx]

## Hyperparameters

One of the more difficult aspects of reinforcememt learning are the large number of hyperparameters. Not only are we tuning the network, but we're tuning the simulation.

In [10]:
env.observation_space, env.action_space

(Box(4,), Discrete(2))

In [11]:
# Exploration parameters
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01           # minimum exploration probability 
decay_rate = 0.0001            # exponential decay rate for exploration prob

# Network parameters
state_size = 4
action_size = 2
hidden_size = 4*2             # number of units in each Q-network hidden layer
a_learning_rate = 1e-4         # Q-network learning rate
e_learning_rate = 1e-4         # Q-network learning rate

# Memory parameters
memory_size = int(1e5)            # memory capacity
batch_size = int(1e2)             # experience mini-batch size
gamma=0.99

In [12]:
# Reset/init the graph/session
graph = tf.reset_default_graph()

# Init the model
model = Model(action_size=action_size, state_size=state_size, hidden_size=hidden_size, gamma=gamma,
              a_learning_rate=a_learning_rate, e_learning_rate=e_learning_rate)

# Init the memory
memory = Memory(max_size=memory_size)

In [13]:
state = env.reset()
total_reward = 0
num_step = 0
for _ in range(memory_size):
    action = env.action_space.sample()
    next_state, reward, done, _ = env.step(action)
    rate = -1
    memory.buffer.append([state, action, next_state, reward, float(done), rate])
    num_step += 1 # memory incremented
    total_reward += reward
    state = next_state
    if done is True:
        rate = total_reward/500
        for idx in range(num_step): # episode length
            if memory.buffer[-1-idx][-1] == -1:
                memory.buffer[-1-idx][-1] = rate
        state = env.reset()
        total_reward = 0 # reset
        num_step = 0 # reset

## Training the model

Below we'll train our agent. If you want to watch it train, uncomment the `env.render()` line. This is slow because it's rendering the frames slower than the network can train. But, it's cool to watch the agent get better at the game.

In [None]:
# Save/load the model and save for plotting
saver = tf.train.Saver()
episode_rewards_list, rewards_list = [], []
aloss_list, eloss_list, aloss2_list = [], [], []

# TF session for training
with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    #saver.restore(sess, 'checkpoints/model.ckpt')    
    #saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    total_step = 0 # Explore or exploit parameter
    episode_reward = deque(maxlen=100) # 100 episodes average/running average/running mean/window

    # Training episodes/epochs
    for ep in range(11111):
        aloss_batch, eloss_batch, aloss2_batch = [], [], []
        total_reward = 0 
        state = env.reset()
        num_step = 0
        rate = -1

        # Training steps/batches
        while True:
            # Explore (env) or Exploit (model)
            total_step += 1
            explore_p = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * total_step) 
            if explore_p > np.random.rand():
                action = env.action_space.sample()
            else:
                action_logits = sess.run(model.actions_logits, feed_dict={model.states: state.reshape([1, -1])})
                action = np.argmax(action_logits)
            next_state, reward, done, _ = env.step(action)
            memory.buffer.append([state, action, next_state, reward, float(done), rate])
            num_step += 1 # momory added
            total_reward += reward
            state = next_state
            
            # Training
            while True:
                batch = memory.sample(batch_size=batch_size)
                rates = np.array([each[5] for each in batch])
                states = np.array([each[0] for each in batch])[rates > 0]
                actions = np.array([each[1] for each in batch])[rates > 0]
                next_states = np.array([each[2] for each in batch])[rates > 0]
                rewards = np.array([each[3] for each in batch])[rates > 0]
                dones = np.array([each[4] for each in batch])[rates > 0]
                rates = np.array([each[5] for each in batch])[rates > 0]
                if len(dones) == batch_size:
                    break
            aloss, _ = sess.run([model.a_loss, model.a_opt],
                                  feed_dict = {model.states: states, 
                                               model.actions: actions,
                                               model.next_states: next_states,
                                               model.rewards: rewards,
                                               model.dones: dones,
                                               model.rates: rates})
            eloss, _ = sess.run([model.e_loss, model.e_opt],
                                  feed_dict = {model.states: states, 
                                               model.actions: actions,
                                               model.next_states: next_states,
                                               model.rewards: rewards,
                                               model.dones: dones,
                                               model.rates: rates})
            aloss2, _= sess.run([model.a_loss2, model.a_opt2], 
                                 feed_dict = {model.states: states, 
                                              model.actions: actions,
                                              model.next_states: next_states,
                                              model.rewards: rewards,
                                              model.dones: dones,
                                              model.rates: rates})
            #print(len(dones), np.count_nonzero(dones), np.max(rates))
            aloss_batch.append(aloss)
            eloss_batch.append(eloss)
            aloss2_batch.append(aloss2)
            if done is True:
                break
                
        # Rating the latest played episode
        rate = total_reward/500 # update rate at the end/ when episode is done
        for idx in range(num_step): # episode length
            if memory.buffer[-1-idx][-1] == -1: # double-check the landmark/marked indexes
                memory.buffer[-1-idx][-1] = rate # rate the trajectory/data

        # Print out
        episode_reward.append(total_reward)
        print('Episode:{}'.format(ep),
              'meanR:{:.4f}'.format(np.mean(episode_reward)),
              'R:{:.4f}'.format(total_reward),
              'rate:{:.4f}'.format(rate),
              'aloss:{:.4f}'.format(np.mean(aloss_batch)),
              'eloss:{:.4f}'.format(np.mean(eloss_batch)),
              'aloss2:{:.4f}'.format(np.mean(aloss2_batch)),
              'exploreP:{:.4f}'.format(explore_p))

        # Ploting out
        episode_rewards_list.append([ep, np.mean(episode_reward)])
        rewards_list.append([ep, total_reward])
        aloss_list.append([ep, np.mean(aloss_batch)])
        eloss_list.append([ep, np.mean(eloss_batch)])
        aloss2_list.append([ep, np.mean(aloss2_batch)])
        
        # Break episode/epoch loop
        ## Option 1: Solve the First Version
        #The task is episodic, and in order to solve the environment, 
        #your agent must get an average score of +30 over 100 consecutive episodes.
        if np.mean(episode_reward) >= 500:
            break
            
    # At the end of all training episodes/epochs
    saver.save(sess, 'checkpoints/model.ckpt')

Episode:0 meanR:15.0000 R:15.0000 rate:0.0300 aloss:0.7034 eloss:3.8038 aloss2:1.9327 exploreP:0.9985
Episode:1 meanR:18.0000 R:21.0000 rate:0.0420 aloss:0.7134 eloss:3.7880 aloss2:1.9536 exploreP:0.9964
Episode:2 meanR:17.0000 R:15.0000 rate:0.0300 aloss:0.7011 eloss:3.7725 aloss2:1.9586 exploreP:0.9950
Episode:3 meanR:16.2500 R:14.0000 rate:0.0280 aloss:0.7064 eloss:3.7593 aloss2:1.9546 exploreP:0.9936
Episode:4 meanR:18.2000 R:26.0000 rate:0.0520 aloss:0.7094 eloss:3.7415 aloss2:1.9808 exploreP:0.9910
Episode:5 meanR:19.3333 R:25.0000 rate:0.0500 aloss:0.7067 eloss:3.7208 aloss2:1.9980 exploreP:0.9886
Episode:6 meanR:20.7143 R:29.0000 rate:0.0580 aloss:0.7032 eloss:3.7109 aloss2:2.0020 exploreP:0.9857
Episode:7 meanR:19.8750 R:14.0000 rate:0.0280 aloss:0.7073 eloss:3.7085 aloss2:2.0140 exploreP:0.9844
Episode:8 meanR:20.3333 R:24.0000 rate:0.0480 aloss:0.7058 eloss:3.6806 aloss2:2.0301 exploreP:0.9820
Episode:9 meanR:21.8000 R:35.0000 rate:0.0700 aloss:0.7062 eloss:3.6680 aloss2:2.0

Episode:80 meanR:20.3086 R:18.0000 rate:0.0360 aloss:0.7139 eloss:2.5910 aloss2:3.6880 exploreP:0.8498
Episode:81 meanR:20.1829 R:10.0000 rate:0.0200 aloss:0.7214 eloss:2.5780 aloss2:3.7182 exploreP:0.8490
Episode:82 meanR:20.1205 R:15.0000 rate:0.0300 aloss:0.7228 eloss:2.5673 aloss2:3.7469 exploreP:0.8477
Episode:83 meanR:20.0357 R:13.0000 rate:0.0260 aloss:0.7192 eloss:2.5542 aloss2:3.7876 exploreP:0.8466
Episode:84 meanR:19.9529 R:13.0000 rate:0.0260 aloss:0.7133 eloss:2.5384 aloss2:3.8091 exploreP:0.8456
Episode:85 meanR:19.8953 R:15.0000 rate:0.0300 aloss:0.7084 eloss:2.5252 aloss2:3.8461 exploreP:0.8443
Episode:86 meanR:20.0460 R:33.0000 rate:0.0660 aloss:0.7151 eloss:2.5148 aloss2:3.8889 exploreP:0.8416
Episode:87 meanR:20.0568 R:21.0000 rate:0.0420 aloss:0.7157 eloss:2.4873 aloss2:3.9522 exploreP:0.8398
Episode:88 meanR:20.0112 R:16.0000 rate:0.0320 aloss:0.7214 eloss:2.4860 aloss2:3.9711 exploreP:0.8385
Episode:89 meanR:19.9444 R:14.0000 rate:0.0280 aloss:0.7164 eloss:2.4625 

Episode:159 meanR:20.7900 R:12.0000 rate:0.0240 aloss:0.6934 eloss:4.2344 aloss2:3.2963 exploreP:0.7177
Episode:160 meanR:20.7200 R:12.0000 rate:0.0240 aloss:0.6924 eloss:4.2127 aloss2:3.3352 exploreP:0.7168
Episode:161 meanR:20.7200 R:10.0000 rate:0.0200 aloss:0.6952 eloss:4.2992 aloss2:3.3030 exploreP:0.7161
Episode:162 meanR:20.6500 R:13.0000 rate:0.0260 aloss:0.6965 eloss:4.0922 aloss2:3.4186 exploreP:0.7152
Episode:163 meanR:21.0600 R:53.0000 rate:0.1060 aloss:0.6950 eloss:4.1382 aloss2:3.4052 exploreP:0.7115
Episode:164 meanR:20.9800 R:11.0000 rate:0.0220 aloss:0.6942 eloss:4.0991 aloss2:3.4585 exploreP:0.7107
Episode:165 meanR:21.0200 R:15.0000 rate:0.0300 aloss:0.6945 eloss:4.0863 aloss2:3.4754 exploreP:0.7097
Episode:166 meanR:20.9600 R:19.0000 rate:0.0380 aloss:0.6934 eloss:3.9819 aloss2:3.5615 exploreP:0.7083
Episode:167 meanR:21.0900 R:25.0000 rate:0.0500 aloss:0.6947 eloss:3.8750 aloss2:3.6410 exploreP:0.7066
Episode:168 meanR:21.0900 R:31.0000 rate:0.0620 aloss:0.6944 elo

Episode:239 meanR:19.9500 R:14.0000 rate:0.0280 aloss:0.7004 eloss:2.2097 aloss2:7.1673 exploreP:0.6144
Episode:240 meanR:19.8800 R:12.0000 rate:0.0240 aloss:0.7034 eloss:2.2647 aloss2:7.0623 exploreP:0.6137
Episode:241 meanR:19.8100 R:13.0000 rate:0.0260 aloss:0.6911 eloss:2.2219 aloss2:7.1227 exploreP:0.6129
Episode:242 meanR:19.8300 R:18.0000 rate:0.0360 aloss:0.7025 eloss:2.2944 aloss2:7.1281 exploreP:0.6118
Episode:243 meanR:19.9800 R:25.0000 rate:0.0500 aloss:0.6981 eloss:2.3550 aloss2:7.0232 exploreP:0.6103
Episode:244 meanR:19.7300 R:12.0000 rate:0.0240 aloss:0.6991 eloss:2.3285 aloss2:7.0169 exploreP:0.6096
Episode:245 meanR:19.9000 R:27.0000 rate:0.0540 aloss:0.6968 eloss:2.3733 aloss2:6.9961 exploreP:0.6079
Episode:246 meanR:20.0000 R:23.0000 rate:0.0460 aloss:0.6968 eloss:2.4493 aloss2:6.9045 exploreP:0.6066
Episode:247 meanR:20.0400 R:28.0000 rate:0.0560 aloss:0.7001 eloss:2.5191 aloss2:6.7306 exploreP:0.6049
Episode:248 meanR:20.0400 R:10.0000 rate:0.0200 aloss:0.7072 elo

Episode:319 meanR:17.4500 R:14.0000 rate:0.0280 aloss:0.6982 eloss:2.0495 aloss2:8.0039 exploreP:0.5364
Episode:320 meanR:17.4500 R:12.0000 rate:0.0240 aloss:0.7042 eloss:2.0894 aloss2:8.0005 exploreP:0.5357
Episode:321 meanR:17.2800 R:10.0000 rate:0.0200 aloss:0.7034 eloss:2.0639 aloss2:7.9945 exploreP:0.5352
Episode:322 meanR:17.1500 R:9.0000 rate:0.0180 aloss:0.7079 eloss:2.0432 aloss2:8.0264 exploreP:0.5347
Episode:323 meanR:17.2200 R:18.0000 rate:0.0360 aloss:0.7189 eloss:2.1384 aloss2:7.9862 exploreP:0.5338
Episode:324 meanR:17.0700 R:8.0000 rate:0.0160 aloss:0.7036 eloss:2.1274 aloss2:7.9856 exploreP:0.5334
Episode:325 meanR:17.1800 R:22.0000 rate:0.0440 aloss:0.7085 eloss:2.0677 aloss2:8.0142 exploreP:0.5322
Episode:326 meanR:17.3300 R:23.0000 rate:0.0460 aloss:0.7087 eloss:2.0825 aloss2:8.0318 exploreP:0.5310
Episode:327 meanR:17.1700 R:15.0000 rate:0.0300 aloss:0.7105 eloss:2.0986 aloss2:8.0414 exploreP:0.5302
Episode:328 meanR:16.8100 R:10.0000 rate:0.0200 aloss:0.7195 eloss

Episode:398 meanR:15.5100 R:14.0000 rate:0.0280 aloss:0.7249 eloss:2.0234 aloss2:8.6605 exploreP:0.4848
Episode:399 meanR:15.4500 R:16.0000 rate:0.0320 aloss:0.7061 eloss:2.0499 aloss2:8.6360 exploreP:0.4840
Episode:400 meanR:15.2300 R:11.0000 rate:0.0220 aloss:0.7278 eloss:2.0186 aloss2:8.6418 exploreP:0.4835
Episode:401 meanR:15.0900 R:11.0000 rate:0.0220 aloss:0.7148 eloss:1.9492 aloss2:8.6868 exploreP:0.4830
Episode:402 meanR:14.8700 R:15.0000 rate:0.0300 aloss:0.7204 eloss:2.0453 aloss2:8.6373 exploreP:0.4822
Episode:403 meanR:14.5000 R:12.0000 rate:0.0240 aloss:0.7117 eloss:2.0421 aloss2:8.6507 exploreP:0.4817
Episode:404 meanR:14.3800 R:10.0000 rate:0.0200 aloss:0.7127 eloss:2.0410 aloss2:8.6329 exploreP:0.4812
Episode:405 meanR:14.3600 R:37.0000 rate:0.0740 aloss:0.7193 eloss:1.9707 aloss2:8.6704 exploreP:0.4795
Episode:406 meanR:14.1900 R:10.0000 rate:0.0200 aloss:0.7160 eloss:1.9860 aloss2:8.6583 exploreP:0.4790
Episode:407 meanR:14.0500 R:9.0000 rate:0.0180 aloss:0.7184 elos

Episode:478 meanR:12.9800 R:21.0000 rate:0.0420 aloss:0.7366 eloss:2.0512 aloss2:9.3537 exploreP:0.4376
Episode:479 meanR:12.9600 R:9.0000 rate:0.0180 aloss:0.7265 eloss:2.0216 aloss2:9.3955 exploreP:0.4372
Episode:480 meanR:13.0300 R:16.0000 rate:0.0320 aloss:0.7356 eloss:2.0846 aloss2:9.3749 exploreP:0.4365
Episode:481 meanR:13.0500 R:11.0000 rate:0.0220 aloss:0.7468 eloss:2.0652 aloss2:9.4030 exploreP:0.4360
Episode:482 meanR:12.9900 R:12.0000 rate:0.0240 aloss:0.7456 eloss:2.0403 aloss2:9.4350 exploreP:0.4355
Episode:483 meanR:12.9900 R:11.0000 rate:0.0220 aloss:0.7403 eloss:2.0883 aloss2:9.4317 exploreP:0.4350
Episode:484 meanR:12.9800 R:10.0000 rate:0.0200 aloss:0.7463 eloss:1.9915 aloss2:9.5139 exploreP:0.4346
Episode:485 meanR:12.9700 R:12.0000 rate:0.0240 aloss:0.7498 eloss:2.0756 aloss2:9.4731 exploreP:0.4341
Episode:486 meanR:12.9400 R:9.0000 rate:0.0180 aloss:0.7541 eloss:1.9781 aloss2:9.5476 exploreP:0.4337
Episode:487 meanR:12.9400 R:11.0000 rate:0.0220 aloss:0.7310 eloss

Episode:558 meanR:12.8900 R:15.0000 rate:0.0300 aloss:0.7406 eloss:2.0361 aloss2:9.4081 exploreP:0.3965
Episode:559 meanR:12.9000 R:13.0000 rate:0.0260 aloss:0.7477 eloss:2.0380 aloss2:9.4369 exploreP:0.3959
Episode:560 meanR:12.9000 R:10.0000 rate:0.0200 aloss:0.7427 eloss:2.0339 aloss2:9.4108 exploreP:0.3956
Episode:561 meanR:12.9900 R:21.0000 rate:0.0420 aloss:0.7337 eloss:2.0422 aloss2:9.3394 exploreP:0.3948
Episode:562 meanR:12.8500 R:14.0000 rate:0.0280 aloss:0.7439 eloss:2.0305 aloss2:9.3300 exploreP:0.3942
Episode:563 meanR:12.8700 R:14.0000 rate:0.0280 aloss:0.7270 eloss:2.0330 aloss2:9.3078 exploreP:0.3937
Episode:564 meanR:12.8500 R:9.0000 rate:0.0180 aloss:0.7318 eloss:2.0095 aloss2:9.3436 exploreP:0.3933
Episode:565 meanR:12.8700 R:12.0000 rate:0.0240 aloss:0.7398 eloss:2.0267 aloss2:9.3065 exploreP:0.3929
Episode:566 meanR:12.8600 R:14.0000 rate:0.0280 aloss:0.7470 eloss:2.0569 aloss2:9.2615 exploreP:0.3923
Episode:567 meanR:12.8000 R:10.0000 rate:0.0200 aloss:0.7318 elos

Episode:637 meanR:11.6000 R:20.0000 rate:0.0400 aloss:0.7268 eloss:2.0273 aloss2:8.8149 exploreP:0.3630
Episode:638 meanR:11.5600 R:10.0000 rate:0.0200 aloss:0.7466 eloss:1.9970 aloss2:8.7825 exploreP:0.3627
Episode:639 meanR:11.5800 R:12.0000 rate:0.0240 aloss:0.7421 eloss:2.0446 aloss2:8.7984 exploreP:0.3622
Episode:640 meanR:11.5700 R:9.0000 rate:0.0180 aloss:0.7461 eloss:2.0625 aloss2:8.7136 exploreP:0.3619
Episode:641 meanR:11.5500 R:10.0000 rate:0.0200 aloss:0.7201 eloss:2.0431 aloss2:8.7709 exploreP:0.3616
Episode:642 meanR:11.4500 R:9.0000 rate:0.0180 aloss:0.7336 eloss:1.9667 aloss2:8.8120 exploreP:0.3613
Episode:643 meanR:11.4600 R:10.0000 rate:0.0200 aloss:0.7250 eloss:2.0149 aloss2:8.7332 exploreP:0.3609
Episode:644 meanR:11.4500 R:11.0000 rate:0.0220 aloss:0.7316 eloss:1.9835 aloss2:8.7464 exploreP:0.3605
Episode:645 meanR:11.4400 R:8.0000 rate:0.0160 aloss:0.7293 eloss:1.9075 aloss2:8.8118 exploreP:0.3602
Episode:646 meanR:11.4300 R:8.0000 rate:0.0160 aloss:0.7275 eloss:2

Episode:717 meanR:11.3800 R:8.0000 rate:0.0160 aloss:0.7035 eloss:2.0238 aloss2:8.4302 exploreP:0.3326
Episode:718 meanR:11.4100 R:12.0000 rate:0.0240 aloss:0.7109 eloss:2.0759 aloss2:8.3924 exploreP:0.3322
Episode:719 meanR:11.4400 R:12.0000 rate:0.0240 aloss:0.6973 eloss:2.0412 aloss2:8.4207 exploreP:0.3319
Episode:720 meanR:11.4400 R:12.0000 rate:0.0240 aloss:0.6954 eloss:2.0651 aloss2:8.4134 exploreP:0.3315
Episode:721 meanR:11.4100 R:8.0000 rate:0.0160 aloss:0.7089 eloss:2.1009 aloss2:8.3749 exploreP:0.3312
Episode:722 meanR:11.3900 R:9.0000 rate:0.0180 aloss:0.7084 eloss:2.1131 aloss2:8.3659 exploreP:0.3309
Episode:723 meanR:11.3000 R:9.0000 rate:0.0180 aloss:0.7011 eloss:2.1203 aloss2:8.3717 exploreP:0.3306
Episode:724 meanR:11.2800 R:9.0000 rate:0.0180 aloss:0.6888 eloss:2.1367 aloss2:8.3667 exploreP:0.3303
Episode:725 meanR:11.2600 R:10.0000 rate:0.0200 aloss:0.7039 eloss:2.1039 aloss2:8.3797 exploreP:0.3300
Episode:726 meanR:11.4200 R:26.0000 rate:0.0520 aloss:0.6997 eloss:2.

Episode:796 meanR:13.5500 R:13.0000 rate:0.0260 aloss:0.7238 eloss:2.2370 aloss2:8.4187 exploreP:0.2985
Episode:797 meanR:13.5200 R:8.0000 rate:0.0160 aloss:0.7319 eloss:2.1721 aloss2:8.4850 exploreP:0.2982
Episode:798 meanR:13.5000 R:12.0000 rate:0.0240 aloss:0.7192 eloss:2.2439 aloss2:8.4545 exploreP:0.2979
Episode:799 meanR:13.5400 R:14.0000 rate:0.0280 aloss:0.7197 eloss:2.1481 aloss2:8.5026 exploreP:0.2975
Episode:800 meanR:13.5400 R:11.0000 rate:0.0220 aloss:0.7159 eloss:2.2228 aloss2:8.4765 exploreP:0.2972
Episode:801 meanR:13.5700 R:14.0000 rate:0.0280 aloss:0.7119 eloss:2.1650 aloss2:8.5354 exploreP:0.2968
Episode:802 meanR:13.5700 R:13.0000 rate:0.0260 aloss:0.7177 eloss:2.2070 aloss2:8.5125 exploreP:0.2964
Episode:803 meanR:13.6100 R:13.0000 rate:0.0260 aloss:0.7197 eloss:2.1471 aloss2:8.5690 exploreP:0.2960
Episode:804 meanR:13.6100 R:10.0000 rate:0.0200 aloss:0.7152 eloss:2.2721 aloss2:8.5039 exploreP:0.2957
Episode:805 meanR:13.6300 R:15.0000 rate:0.0300 aloss:0.7306 elos

Episode:877 meanR:11.2200 R:9.0000 rate:0.0180 aloss:0.7151 eloss:2.0689 aloss2:8.5150 exploreP:0.2737
Episode:878 meanR:11.1900 R:10.0000 rate:0.0200 aloss:0.7174 eloss:2.1800 aloss2:8.4749 exploreP:0.2734
Episode:879 meanR:11.1900 R:10.0000 rate:0.0200 aloss:0.7091 eloss:2.1166 aloss2:8.4924 exploreP:0.2731
Episode:880 meanR:11.2100 R:12.0000 rate:0.0240 aloss:0.7152 eloss:2.0944 aloss2:8.5023 exploreP:0.2728
Episode:881 meanR:11.1800 R:9.0000 rate:0.0180 aloss:0.7012 eloss:2.1434 aloss2:8.5122 exploreP:0.2726
Episode:882 meanR:11.1700 R:13.0000 rate:0.0260 aloss:0.7201 eloss:2.1283 aloss2:8.5197 exploreP:0.2723
Episode:883 meanR:11.1400 R:9.0000 rate:0.0180 aloss:0.7265 eloss:2.1276 aloss2:8.5054 exploreP:0.2720
Episode:884 meanR:11.0900 R:16.0000 rate:0.0320 aloss:0.7171 eloss:2.1551 aloss2:8.4859 exploreP:0.2716
Episode:885 meanR:11.1000 R:10.0000 rate:0.0200 aloss:0.7089 eloss:2.0506 aloss2:8.5652 exploreP:0.2713
Episode:886 meanR:11.0500 R:8.0000 rate:0.0160 aloss:0.6903 eloss:2

Episode:957 meanR:10.7100 R:15.0000 rate:0.0300 aloss:0.7631 eloss:2.1948 aloss2:9.4485 exploreP:0.2522
Episode:958 meanR:10.6200 R:9.0000 rate:0.0180 aloss:0.7529 eloss:2.1963 aloss2:9.4670 exploreP:0.2520
Episode:959 meanR:10.6000 R:8.0000 rate:0.0160 aloss:0.7547 eloss:2.2505 aloss2:9.4521 exploreP:0.2518
Episode:960 meanR:10.6100 R:11.0000 rate:0.0220 aloss:0.7504 eloss:2.2265 aloss2:9.4839 exploreP:0.2515
Episode:961 meanR:10.6300 R:11.0000 rate:0.0220 aloss:0.7639 eloss:2.2470 aloss2:9.4653 exploreP:0.2513
Episode:962 meanR:10.6700 R:17.0000 rate:0.0340 aloss:0.7379 eloss:2.2168 aloss2:9.5148 exploreP:0.2509
Episode:963 meanR:10.6500 R:10.0000 rate:0.0200 aloss:0.7425 eloss:2.1753 aloss2:9.5271 exploreP:0.2506
Episode:964 meanR:10.6700 R:11.0000 rate:0.0220 aloss:0.7682 eloss:2.2127 aloss2:9.5043 exploreP:0.2504
Episode:965 meanR:10.6500 R:10.0000 rate:0.0200 aloss:0.7449 eloss:2.2579 aloss2:9.5122 exploreP:0.2501
Episode:966 meanR:10.6400 R:11.0000 rate:0.0220 aloss:0.7461 eloss

Episode:1036 meanR:10.8800 R:11.0000 rate:0.0220 aloss:0.7220 eloss:2.3384 aloss2:8.0623 exploreP:0.2323
Episode:1037 meanR:10.9100 R:12.0000 rate:0.0240 aloss:0.7480 eloss:2.4203 aloss2:8.0036 exploreP:0.2321
Episode:1038 meanR:10.9400 R:11.0000 rate:0.0220 aloss:0.7232 eloss:2.3803 aloss2:8.0048 exploreP:0.2318
Episode:1039 meanR:10.9800 R:13.0000 rate:0.0260 aloss:0.7259 eloss:2.4052 aloss2:7.9947 exploreP:0.2315
Episode:1040 meanR:10.9400 R:9.0000 rate:0.0180 aloss:0.7220 eloss:2.3386 aloss2:8.0148 exploreP:0.2313
Episode:1041 meanR:10.9200 R:10.0000 rate:0.0200 aloss:0.7291 eloss:2.2939 aloss2:8.0246 exploreP:0.2311
Episode:1042 meanR:10.9400 R:11.0000 rate:0.0220 aloss:0.7197 eloss:2.3712 aloss2:7.9860 exploreP:0.2309
Episode:1043 meanR:10.9300 R:9.0000 rate:0.0180 aloss:0.7387 eloss:2.2967 aloss2:8.0177 exploreP:0.2307
Episode:1044 meanR:10.9200 R:12.0000 rate:0.0240 aloss:0.7423 eloss:2.3062 aloss2:8.0240 exploreP:0.2304
Episode:1045 meanR:10.9100 R:9.0000 rate:0.0180 aloss:0.7

Episode:1116 meanR:10.5600 R:10.0000 rate:0.0200 aloss:0.7170 eloss:2.1352 aloss2:8.3905 exploreP:0.2142
Episode:1117 meanR:10.5400 R:8.0000 rate:0.0160 aloss:0.7175 eloss:2.0982 aloss2:8.4146 exploreP:0.2141
Episode:1118 meanR:10.5200 R:9.0000 rate:0.0180 aloss:0.7273 eloss:2.0601 aloss2:8.4549 exploreP:0.2139
Episode:1119 meanR:10.5100 R:10.0000 rate:0.0200 aloss:0.7139 eloss:2.1270 aloss2:8.3939 exploreP:0.2137
Episode:1120 meanR:10.5700 R:16.0000 rate:0.0320 aloss:0.7231 eloss:2.1679 aloss2:8.3774 exploreP:0.2133
Episode:1121 meanR:10.5800 R:11.0000 rate:0.0220 aloss:0.7299 eloss:2.0914 aloss2:8.4215 exploreP:0.2131
Episode:1122 meanR:10.5800 R:10.0000 rate:0.0200 aloss:0.7259 eloss:2.2338 aloss2:8.3230 exploreP:0.2129
Episode:1123 meanR:10.6400 R:15.0000 rate:0.0300 aloss:0.7373 eloss:2.1131 aloss2:8.3899 exploreP:0.2126
Episode:1124 meanR:10.7400 R:20.0000 rate:0.0400 aloss:0.7162 eloss:2.1417 aloss2:8.3943 exploreP:0.2122
Episode:1125 meanR:10.7300 R:10.0000 rate:0.0200 aloss:0.

Episode:1195 meanR:10.7000 R:9.0000 rate:0.0180 aloss:0.7583 eloss:2.2452 aloss2:8.5336 exploreP:0.1977
Episode:1196 meanR:10.6900 R:9.0000 rate:0.0180 aloss:0.7676 eloss:2.0372 aloss2:8.6016 exploreP:0.1975
Episode:1197 meanR:10.7500 R:14.0000 rate:0.0280 aloss:0.7559 eloss:2.2295 aloss2:8.5323 exploreP:0.1972
Episode:1198 meanR:10.7500 R:11.0000 rate:0.0220 aloss:0.7699 eloss:2.1658 aloss2:8.5602 exploreP:0.1970
Episode:1199 meanR:10.7800 R:13.0000 rate:0.0260 aloss:0.7494 eloss:2.1566 aloss2:8.5934 exploreP:0.1968
Episode:1200 meanR:10.7900 R:11.0000 rate:0.0220 aloss:0.7451 eloss:2.1058 aloss2:8.5997 exploreP:0.1966
Episode:1201 meanR:10.8200 R:12.0000 rate:0.0240 aloss:0.7472 eloss:2.1682 aloss2:8.5805 exploreP:0.1964
Episode:1202 meanR:10.8400 R:13.0000 rate:0.0260 aloss:0.7483 eloss:2.1259 aloss2:8.6002 exploreP:0.1961
Episode:1203 meanR:10.7900 R:9.0000 rate:0.0180 aloss:0.7592 eloss:2.1182 aloss2:8.6403 exploreP:0.1960
Episode:1204 meanR:10.8200 R:12.0000 rate:0.0240 aloss:0.7

Episode:1274 meanR:10.2400 R:10.0000 rate:0.0200 aloss:0.7602 eloss:2.0852 aloss2:8.8425 exploreP:0.1831
Episode:1275 meanR:10.2600 R:12.0000 rate:0.0240 aloss:0.7458 eloss:2.1287 aloss2:8.7933 exploreP:0.1829
Episode:1276 meanR:10.2000 R:10.0000 rate:0.0200 aloss:0.7729 eloss:2.0970 aloss2:8.8220 exploreP:0.1827
Episode:1277 meanR:10.1400 R:9.0000 rate:0.0180 aloss:0.7498 eloss:2.1207 aloss2:8.8434 exploreP:0.1826
Episode:1278 meanR:10.1600 R:10.0000 rate:0.0200 aloss:0.7531 eloss:2.1780 aloss2:8.8224 exploreP:0.1824
Episode:1279 meanR:10.1800 R:13.0000 rate:0.0260 aloss:0.7695 eloss:2.0642 aloss2:8.8614 exploreP:0.1822
Episode:1280 meanR:10.1600 R:8.0000 rate:0.0160 aloss:0.7755 eloss:2.0661 aloss2:8.8739 exploreP:0.1820
Episode:1281 meanR:10.1800 R:11.0000 rate:0.0220 aloss:0.7627 eloss:2.1852 aloss2:8.8448 exploreP:0.1818
Episode:1282 meanR:10.1700 R:8.0000 rate:0.0160 aloss:0.7508 eloss:2.1302 aloss2:8.8794 exploreP:0.1817
Episode:1283 meanR:10.2000 R:11.0000 rate:0.0220 aloss:0.7

Episode:1353 meanR:10.3300 R:9.0000 rate:0.0180 aloss:0.7515 eloss:2.1787 aloss2:8.9531 exploreP:0.1694
Episode:1354 meanR:10.3200 R:9.0000 rate:0.0180 aloss:0.7589 eloss:2.1235 aloss2:8.9900 exploreP:0.1692
Episode:1355 meanR:10.3800 R:15.0000 rate:0.0300 aloss:0.7768 eloss:2.1147 aloss2:8.9921 exploreP:0.1690
Episode:1356 meanR:10.3800 R:9.0000 rate:0.0180 aloss:0.7715 eloss:2.1420 aloss2:8.9598 exploreP:0.1688
Episode:1357 meanR:10.3800 R:11.0000 rate:0.0220 aloss:0.7870 eloss:2.1846 aloss2:8.9642 exploreP:0.1687
Episode:1358 meanR:10.3900 R:10.0000 rate:0.0200 aloss:0.7747 eloss:2.1070 aloss2:8.9594 exploreP:0.1685
Episode:1359 meanR:10.3800 R:9.0000 rate:0.0180 aloss:0.7659 eloss:2.1072 aloss2:9.0169 exploreP:0.1683
Episode:1360 meanR:10.3900 R:10.0000 rate:0.0200 aloss:0.7552 eloss:2.0541 aloss2:9.0913 exploreP:0.1682
Episode:1361 meanR:10.3800 R:9.0000 rate:0.0180 aloss:0.7847 eloss:2.1445 aloss2:9.0186 exploreP:0.1680
Episode:1362 meanR:10.3600 R:8.0000 rate:0.0160 aloss:0.7637

Episode:1434 meanR:10.3500 R:9.0000 rate:0.0180 aloss:0.7700 eloss:2.0731 aloss2:9.1373 exploreP:0.1566
Episode:1435 meanR:10.3100 R:10.0000 rate:0.0200 aloss:0.7528 eloss:2.1442 aloss2:9.0952 exploreP:0.1564
Episode:1436 meanR:10.2700 R:9.0000 rate:0.0180 aloss:0.7548 eloss:2.1700 aloss2:9.0918 exploreP:0.1563
Episode:1437 meanR:10.2800 R:9.0000 rate:0.0180 aloss:0.7655 eloss:2.1590 aloss2:9.0600 exploreP:0.1562
Episode:1438 meanR:10.2600 R:8.0000 rate:0.0160 aloss:0.7713 eloss:2.1858 aloss2:9.0730 exploreP:0.1560
Episode:1439 meanR:10.2900 R:13.0000 rate:0.0260 aloss:0.7747 eloss:2.1195 aloss2:9.1114 exploreP:0.1559
Episode:1440 meanR:10.2800 R:13.0000 rate:0.0260 aloss:0.7738 eloss:2.1176 aloss2:9.0643 exploreP:0.1557
Episode:1441 meanR:10.2500 R:8.0000 rate:0.0160 aloss:0.7513 eloss:2.0859 aloss2:9.1323 exploreP:0.1555
Episode:1442 meanR:10.2300 R:10.0000 rate:0.0200 aloss:0.7545 eloss:2.1577 aloss2:9.1292 exploreP:0.1554
Episode:1443 meanR:10.2200 R:11.0000 rate:0.0220 aloss:0.769

Episode:1514 meanR:10.1500 R:9.0000 rate:0.0180 aloss:0.7643 eloss:2.0643 aloss2:9.1669 exploreP:0.1452
Episode:1515 meanR:10.1500 R:9.0000 rate:0.0180 aloss:0.7742 eloss:2.1978 aloss2:9.0651 exploreP:0.1451
Episode:1516 meanR:10.1100 R:10.0000 rate:0.0200 aloss:0.7800 eloss:2.2572 aloss2:9.0885 exploreP:0.1449
Episode:1517 meanR:10.1100 R:9.0000 rate:0.0180 aloss:0.7955 eloss:2.1464 aloss2:9.1153 exploreP:0.1448
Episode:1518 meanR:10.0700 R:10.0000 rate:0.0200 aloss:0.7748 eloss:2.1512 aloss2:9.1509 exploreP:0.1447
Episode:1519 meanR:10.0700 R:12.0000 rate:0.0240 aloss:0.7852 eloss:2.0254 aloss2:9.2132 exploreP:0.1445
Episode:1520 meanR:10.0600 R:10.0000 rate:0.0200 aloss:0.7706 eloss:2.1477 aloss2:9.1895 exploreP:0.1444
Episode:1521 meanR:10.0700 R:10.0000 rate:0.0200 aloss:0.7714 eloss:2.2300 aloss2:9.0962 exploreP:0.1442
Episode:1522 meanR:10.0700 R:10.0000 rate:0.0200 aloss:0.7752 eloss:2.1449 aloss2:9.1308 exploreP:0.1441
Episode:1523 meanR:10.0800 R:11.0000 rate:0.0220 aloss:0.8

Episode:1595 meanR:10.0900 R:9.0000 rate:0.0180 aloss:0.7593 eloss:1.9762 aloss2:9.3682 exploreP:0.1345
Episode:1596 meanR:10.0700 R:8.0000 rate:0.0160 aloss:0.7565 eloss:2.0902 aloss2:9.3517 exploreP:0.1344
Episode:1597 meanR:10.0600 R:9.0000 rate:0.0180 aloss:0.7684 eloss:2.1699 aloss2:9.2707 exploreP:0.1343
Episode:1598 meanR:10.0700 R:10.0000 rate:0.0200 aloss:0.7428 eloss:2.1521 aloss2:9.3171 exploreP:0.1342
Episode:1599 meanR:10.0700 R:9.0000 rate:0.0180 aloss:0.7602 eloss:2.1642 aloss2:9.2715 exploreP:0.1341
Episode:1600 meanR:10.0600 R:10.0000 rate:0.0200 aloss:0.7729 eloss:1.9900 aloss2:9.3685 exploreP:0.1340
Episode:1601 meanR:10.0800 R:13.0000 rate:0.0260 aloss:0.7517 eloss:2.1305 aloss2:9.2967 exploreP:0.1338
Episode:1602 meanR:10.0700 R:9.0000 rate:0.0180 aloss:0.7802 eloss:2.0705 aloss2:9.3164 exploreP:0.1337
Episode:1603 meanR:10.0500 R:10.0000 rate:0.0200 aloss:0.7679 eloss:2.1591 aloss2:9.3300 exploreP:0.1336
Episode:1604 meanR:10.0400 R:9.0000 rate:0.0180 aloss:0.7660

Episode:1674 meanR:10.0900 R:9.0000 rate:0.0180 aloss:0.7618 eloss:2.0946 aloss2:9.4458 exploreP:0.1250
Episode:1675 meanR:10.1000 R:10.0000 rate:0.0200 aloss:0.7836 eloss:2.1092 aloss2:9.3840 exploreP:0.1249
Episode:1676 meanR:10.1000 R:9.0000 rate:0.0180 aloss:0.7733 eloss:2.0872 aloss2:9.4490 exploreP:0.1248
Episode:1677 meanR:10.1000 R:10.0000 rate:0.0200 aloss:0.7808 eloss:2.1124 aloss2:9.4720 exploreP:0.1247
Episode:1678 meanR:10.1000 R:11.0000 rate:0.0220 aloss:0.7516 eloss:2.1219 aloss2:9.4433 exploreP:0.1245
Episode:1679 meanR:10.0700 R:9.0000 rate:0.0180 aloss:0.7615 eloss:2.0383 aloss2:9.5161 exploreP:0.1244
Episode:1680 meanR:10.0600 R:9.0000 rate:0.0180 aloss:0.7943 eloss:2.1789 aloss2:9.3742 exploreP:0.1243
Episode:1681 meanR:10.0700 R:12.0000 rate:0.0240 aloss:0.7647 eloss:2.1794 aloss2:9.4206 exploreP:0.1242
Episode:1682 meanR:10.0700 R:9.0000 rate:0.0180 aloss:0.7789 eloss:2.2337 aloss2:9.3968 exploreP:0.1241
Episode:1683 meanR:10.0900 R:10.0000 rate:0.0200 aloss:0.768

Episode:1753 meanR:10.1400 R:10.0000 rate:0.0200 aloss:0.7856 eloss:2.0531 aloss2:9.5380 exploreP:0.1161
Episode:1754 meanR:10.1400 R:9.0000 rate:0.0180 aloss:0.7652 eloss:2.0654 aloss2:9.5526 exploreP:0.1160
Episode:1755 meanR:10.1100 R:8.0000 rate:0.0160 aloss:0.8024 eloss:2.0113 aloss2:9.5258 exploreP:0.1159
Episode:1756 meanR:10.1000 R:9.0000 rate:0.0180 aloss:0.7779 eloss:2.0681 aloss2:9.5272 exploreP:0.1158
Episode:1757 meanR:10.1000 R:9.0000 rate:0.0180 aloss:0.7898 eloss:2.1695 aloss2:9.4748 exploreP:0.1157
Episode:1758 meanR:10.1100 R:10.0000 rate:0.0200 aloss:0.7833 eloss:2.0522 aloss2:9.5720 exploreP:0.1156
Episode:1759 meanR:10.1100 R:9.0000 rate:0.0180 aloss:0.7794 eloss:2.1014 aloss2:9.5321 exploreP:0.1155
Episode:1760 meanR:10.0900 R:8.0000 rate:0.0160 aloss:0.7808 eloss:2.0883 aloss2:9.5863 exploreP:0.1155
Episode:1761 meanR:10.0700 R:9.0000 rate:0.0180 aloss:0.7666 eloss:2.0452 aloss2:9.5984 exploreP:0.1154
Episode:1762 meanR:10.0900 R:11.0000 rate:0.0220 aloss:0.7675 

Episode:1832 meanR:9.9200 R:9.0000 rate:0.0180 aloss:0.7582 eloss:2.1488 aloss2:9.5350 exploreP:0.1081
Episode:1833 meanR:9.9100 R:9.0000 rate:0.0180 aloss:0.7695 eloss:2.0918 aloss2:9.5816 exploreP:0.1080
Episode:1834 meanR:9.9200 R:10.0000 rate:0.0200 aloss:0.7902 eloss:2.1412 aloss2:9.5567 exploreP:0.1079
Episode:1835 meanR:9.9100 R:9.0000 rate:0.0180 aloss:0.7513 eloss:2.1366 aloss2:9.5514 exploreP:0.1078
Episode:1836 meanR:9.9000 R:8.0000 rate:0.0160 aloss:0.7679 eloss:2.1347 aloss2:9.6104 exploreP:0.1077
Episode:1837 meanR:9.9000 R:10.0000 rate:0.0200 aloss:0.7666 eloss:2.0996 aloss2:9.5614 exploreP:0.1077
Episode:1838 meanR:9.9100 R:11.0000 rate:0.0220 aloss:0.7674 eloss:2.0580 aloss2:9.6561 exploreP:0.1075
Episode:1839 meanR:9.9300 R:12.0000 rate:0.0240 aloss:0.7596 eloss:2.1160 aloss2:9.5916 exploreP:0.1074
Episode:1840 meanR:9.9400 R:12.0000 rate:0.0240 aloss:0.7708 eloss:2.1358 aloss2:9.5501 exploreP:0.1073
Episode:1841 meanR:9.9600 R:12.0000 rate:0.0240 aloss:0.7908 eloss:2

Episode:1911 meanR:10.0700 R:10.0000 rate:0.0200 aloss:0.7822 eloss:2.1868 aloss2:9.5178 exploreP:0.1006
Episode:1912 meanR:10.0500 R:9.0000 rate:0.0180 aloss:0.7896 eloss:2.0250 aloss2:9.6218 exploreP:0.1005
Episode:1913 meanR:10.0900 R:12.0000 rate:0.0240 aloss:0.7486 eloss:2.1574 aloss2:9.6222 exploreP:0.1004
Episode:1914 meanR:10.0700 R:9.0000 rate:0.0180 aloss:0.7948 eloss:2.0517 aloss2:9.6254 exploreP:0.1003
Episode:1915 meanR:10.0800 R:10.0000 rate:0.0200 aloss:0.7425 eloss:2.2613 aloss2:9.5557 exploreP:0.1003
Episode:1916 meanR:10.0500 R:9.0000 rate:0.0180 aloss:0.7619 eloss:2.1247 aloss2:9.5614 exploreP:0.1002
Episode:1917 meanR:10.0500 R:9.0000 rate:0.0180 aloss:0.7461 eloss:2.1881 aloss2:9.5933 exploreP:0.1001
Episode:1918 meanR:10.0300 R:8.0000 rate:0.0160 aloss:0.7556 eloss:2.1732 aloss2:9.5953 exploreP:0.1000
Episode:1919 meanR:10.0500 R:12.0000 rate:0.0240 aloss:0.7468 eloss:2.2095 aloss2:9.5910 exploreP:0.0999
Episode:1920 meanR:10.0400 R:8.0000 rate:0.0160 aloss:0.8157

Episode:1990 meanR:9.9900 R:9.0000 rate:0.0180 aloss:0.7465 eloss:2.1946 aloss2:9.7150 exploreP:0.0938
Episode:1991 meanR:9.9900 R:11.0000 rate:0.0220 aloss:0.7688 eloss:2.1519 aloss2:9.6746 exploreP:0.0937
Episode:1992 meanR:10.0100 R:12.0000 rate:0.0240 aloss:0.7393 eloss:2.1690 aloss2:9.6981 exploreP:0.0936
Episode:1993 meanR:10.0200 R:10.0000 rate:0.0200 aloss:0.7722 eloss:2.1764 aloss2:9.7115 exploreP:0.0936
Episode:1994 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.7647 eloss:2.0714 aloss2:9.7349 exploreP:0.0935
Episode:1995 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.7677 eloss:2.1336 aloss2:9.6780 exploreP:0.0934
Episode:1996 meanR:9.9700 R:8.0000 rate:0.0160 aloss:0.7465 eloss:2.1078 aloss2:9.7479 exploreP:0.0933
Episode:1997 meanR:9.9600 R:9.0000 rate:0.0180 aloss:0.7788 eloss:2.1288 aloss2:9.6828 exploreP:0.0932
Episode:1998 meanR:9.9600 R:10.0000 rate:0.0200 aloss:0.7878 eloss:2.1096 aloss2:9.6769 exploreP:0.0932
Episode:1999 meanR:9.9600 R:9.0000 rate:0.0180 aloss:0.7839 elo

Episode:2070 meanR:9.7800 R:11.0000 rate:0.0220 aloss:0.7934 eloss:2.2150 aloss2:9.4625 exploreP:0.0875
Episode:2071 meanR:9.7600 R:8.0000 rate:0.0160 aloss:0.7481 eloss:2.3529 aloss2:9.4782 exploreP:0.0875
Episode:2072 meanR:9.7600 R:10.0000 rate:0.0200 aloss:0.7652 eloss:2.2540 aloss2:9.5301 exploreP:0.0874
Episode:2073 meanR:9.7400 R:8.0000 rate:0.0160 aloss:0.7818 eloss:2.2052 aloss2:9.4789 exploreP:0.0873
Episode:2074 meanR:9.7400 R:10.0000 rate:0.0200 aloss:0.7818 eloss:2.1309 aloss2:9.5813 exploreP:0.0873
Episode:2075 meanR:9.6900 R:10.0000 rate:0.0200 aloss:0.7559 eloss:2.2774 aloss2:9.5217 exploreP:0.0872
Episode:2076 meanR:9.7000 R:10.0000 rate:0.0200 aloss:0.7650 eloss:2.2305 aloss2:9.5388 exploreP:0.0871
Episode:2077 meanR:9.7000 R:9.0000 rate:0.0180 aloss:0.8109 eloss:2.2080 aloss2:9.5307 exploreP:0.0870
Episode:2078 meanR:9.7100 R:9.0000 rate:0.0180 aloss:0.7604 eloss:2.1472 aloss2:9.5749 exploreP:0.0870
Episode:2079 meanR:9.6900 R:10.0000 rate:0.0200 aloss:0.8167 eloss:2

Episode:2150 meanR:9.7400 R:10.0000 rate:0.0200 aloss:0.7603 eloss:2.2010 aloss2:9.6580 exploreP:0.0818
Episode:2151 meanR:9.7400 R:10.0000 rate:0.0200 aloss:0.7563 eloss:2.1080 aloss2:9.6529 exploreP:0.0817
Episode:2152 meanR:9.7000 R:9.0000 rate:0.0180 aloss:0.7491 eloss:2.2201 aloss2:9.6264 exploreP:0.0816
Episode:2153 meanR:9.7200 R:11.0000 rate:0.0220 aloss:0.7557 eloss:2.0886 aloss2:9.6669 exploreP:0.0815
Episode:2154 meanR:9.7200 R:10.0000 rate:0.0200 aloss:0.7684 eloss:2.1684 aloss2:9.6052 exploreP:0.0815
Episode:2155 meanR:9.7400 R:11.0000 rate:0.0220 aloss:0.7694 eloss:2.0972 aloss2:9.6569 exploreP:0.0814
Episode:2156 meanR:9.7300 R:10.0000 rate:0.0200 aloss:0.7471 eloss:2.1928 aloss2:9.6359 exploreP:0.0813
Episode:2157 meanR:9.7400 R:10.0000 rate:0.0200 aloss:0.7495 eloss:2.1349 aloss2:9.6723 exploreP:0.0812
Episode:2158 meanR:9.7400 R:9.0000 rate:0.0180 aloss:0.7427 eloss:2.2175 aloss2:9.6434 exploreP:0.0812
Episode:2159 meanR:9.7600 R:10.0000 rate:0.0200 aloss:0.7360 eloss

Episode:2230 meanR:9.8100 R:11.0000 rate:0.0220 aloss:0.7751 eloss:2.1435 aloss2:9.4870 exploreP:0.0763
Episode:2231 meanR:9.8000 R:8.0000 rate:0.0160 aloss:0.7483 eloss:2.2974 aloss2:9.4368 exploreP:0.0763
Episode:2232 meanR:9.8000 R:10.0000 rate:0.0200 aloss:0.7937 eloss:2.0791 aloss2:9.4795 exploreP:0.0762
Episode:2233 meanR:9.8000 R:9.0000 rate:0.0180 aloss:0.8223 eloss:2.1339 aloss2:9.4185 exploreP:0.0762
Episode:2234 meanR:9.8100 R:10.0000 rate:0.0200 aloss:0.7838 eloss:2.2488 aloss2:9.4535 exploreP:0.0761
Episode:2235 meanR:9.8200 R:9.0000 rate:0.0180 aloss:0.7932 eloss:2.1422 aloss2:9.5628 exploreP:0.0760
Episode:2236 meanR:9.8100 R:10.0000 rate:0.0200 aloss:0.7832 eloss:2.3116 aloss2:9.3942 exploreP:0.0760
Episode:2237 meanR:9.8200 R:10.0000 rate:0.0200 aloss:0.7923 eloss:2.1710 aloss2:9.4706 exploreP:0.0759
Episode:2238 meanR:9.8000 R:10.0000 rate:0.0200 aloss:0.7766 eloss:2.1136 aloss2:9.5547 exploreP:0.0758
Episode:2239 meanR:9.8100 R:10.0000 rate:0.0200 aloss:0.7788 eloss:

Episode:2311 meanR:9.8900 R:11.0000 rate:0.0220 aloss:0.7649 eloss:2.0622 aloss2:9.7794 exploreP:0.0712
Episode:2312 meanR:9.8800 R:9.0000 rate:0.0180 aloss:0.7400 eloss:2.2453 aloss2:9.6435 exploreP:0.0712
Episode:2313 meanR:9.8800 R:10.0000 rate:0.0200 aloss:0.7353 eloss:2.0834 aloss2:9.8139 exploreP:0.0711
Episode:2314 meanR:9.8800 R:10.0000 rate:0.0200 aloss:0.7310 eloss:2.2255 aloss2:9.6842 exploreP:0.0710
Episode:2315 meanR:9.8800 R:10.0000 rate:0.0200 aloss:0.7189 eloss:2.2470 aloss2:9.6325 exploreP:0.0710
Episode:2316 meanR:9.8700 R:10.0000 rate:0.0200 aloss:0.7330 eloss:2.0884 aloss2:9.7496 exploreP:0.0709
Episode:2317 meanR:9.8800 R:9.0000 rate:0.0180 aloss:0.7376 eloss:2.0980 aloss2:9.7439 exploreP:0.0709
Episode:2318 meanR:9.8700 R:8.0000 rate:0.0160 aloss:0.7420 eloss:2.1711 aloss2:9.6955 exploreP:0.0708
Episode:2319 meanR:9.8600 R:9.0000 rate:0.0180 aloss:0.7571 eloss:2.2334 aloss2:9.6565 exploreP:0.0708
Episode:2320 meanR:9.8400 R:8.0000 rate:0.0160 aloss:0.7474 eloss:2.

Episode:2391 meanR:9.7400 R:10.0000 rate:0.0200 aloss:0.7748 eloss:2.2211 aloss2:9.5807 exploreP:0.0667
Episode:2392 meanR:9.7600 R:11.0000 rate:0.0220 aloss:0.7911 eloss:2.1519 aloss2:9.6331 exploreP:0.0666
Episode:2393 meanR:9.7700 R:10.0000 rate:0.0200 aloss:0.7565 eloss:2.1443 aloss2:9.7245 exploreP:0.0665
Episode:2394 meanR:9.8000 R:12.0000 rate:0.0240 aloss:0.7832 eloss:2.2084 aloss2:9.6626 exploreP:0.0665
Episode:2395 meanR:9.8200 R:12.0000 rate:0.0240 aloss:0.7544 eloss:2.1917 aloss2:9.7320 exploreP:0.0664
Episode:2396 meanR:9.8200 R:9.0000 rate:0.0180 aloss:0.7708 eloss:2.1939 aloss2:9.7196 exploreP:0.0663
Episode:2397 meanR:9.8200 R:9.0000 rate:0.0180 aloss:0.7776 eloss:2.2698 aloss2:9.6962 exploreP:0.0663
Episode:2398 meanR:9.8100 R:10.0000 rate:0.0200 aloss:0.7730 eloss:2.0985 aloss2:9.7413 exploreP:0.0662
Episode:2399 meanR:9.8100 R:9.0000 rate:0.0180 aloss:0.7948 eloss:2.2355 aloss2:9.7586 exploreP:0.0662
Episode:2400 meanR:9.8200 R:11.0000 rate:0.0220 aloss:0.7801 eloss:

Episode:2472 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.7020 eloss:2.1419 aloss2:9.9463 exploreP:0.0624
Episode:2473 meanR:9.6600 R:8.0000 rate:0.0160 aloss:0.7120 eloss:2.0712 aloss2:9.8479 exploreP:0.0624
Episode:2474 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.7198 eloss:2.0742 aloss2:9.9322 exploreP:0.0623
Episode:2475 meanR:9.6800 R:9.0000 rate:0.0180 aloss:0.7053 eloss:2.1544 aloss2:9.8013 exploreP:0.0623
Episode:2476 meanR:9.6500 R:10.0000 rate:0.0200 aloss:0.7179 eloss:2.1322 aloss2:9.7706 exploreP:0.0622
Episode:2477 meanR:9.6500 R:10.0000 rate:0.0200 aloss:0.7064 eloss:2.0999 aloss2:9.8313 exploreP:0.0622
Episode:2478 meanR:9.6400 R:8.0000 rate:0.0160 aloss:0.7091 eloss:2.1639 aloss2:9.7674 exploreP:0.0621
Episode:2479 meanR:9.6100 R:9.0000 rate:0.0180 aloss:0.7020 eloss:2.1020 aloss2:9.7684 exploreP:0.0621
Episode:2480 meanR:9.5900 R:9.0000 rate:0.0180 aloss:0.7004 eloss:2.2694 aloss2:9.6426 exploreP:0.0620
Episode:2481 meanR:9.5900 R:10.0000 rate:0.0200 aloss:0.7029 eloss:2.

Episode:2552 meanR:9.5300 R:10.0000 rate:0.0200 aloss:0.7450 eloss:5.1617 aloss2:7.7907 exploreP:0.0586
Episode:2553 meanR:9.5700 R:13.0000 rate:0.0260 aloss:0.7718 eloss:5.5562 aloss2:7.7180 exploreP:0.0585
Episode:2554 meanR:9.6100 R:13.0000 rate:0.0260 aloss:0.7464 eloss:5.1186 aloss2:7.8969 exploreP:0.0584
Episode:2555 meanR:9.6300 R:11.0000 rate:0.0220 aloss:0.7686 eloss:4.8341 aloss2:7.9372 exploreP:0.0584
Episode:2556 meanR:9.6100 R:8.0000 rate:0.0160 aloss:0.7781 eloss:4.4949 aloss2:8.1561 exploreP:0.0583
Episode:2557 meanR:9.6100 R:10.0000 rate:0.0200 aloss:0.8111 eloss:5.1213 aloss2:7.8402 exploreP:0.0583
Episode:2558 meanR:9.6100 R:10.0000 rate:0.0200 aloss:0.7791 eloss:5.5275 aloss2:7.6901 exploreP:0.0582
Episode:2559 meanR:9.6200 R:11.0000 rate:0.0220 aloss:0.7659 eloss:5.1408 aloss2:7.9292 exploreP:0.0582
Episode:2560 meanR:9.6200 R:9.0000 rate:0.0180 aloss:0.7701 eloss:5.2463 aloss2:7.8309 exploreP:0.0581
Episode:2561 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.7677 eloss

Episode:2633 meanR:9.7500 R:9.0000 rate:0.0180 aloss:0.7467 eloss:3.5098 aloss2:8.6832 exploreP:0.0549
Episode:2634 meanR:9.7600 R:10.0000 rate:0.0200 aloss:0.7795 eloss:3.7395 aloss2:8.6052 exploreP:0.0548
Episode:2635 meanR:9.7600 R:9.0000 rate:0.0180 aloss:0.8154 eloss:3.6138 aloss2:8.5608 exploreP:0.0548
Episode:2636 meanR:9.7600 R:10.0000 rate:0.0200 aloss:0.8079 eloss:3.6463 aloss2:8.6195 exploreP:0.0547
Episode:2637 meanR:9.7500 R:10.0000 rate:0.0200 aloss:0.7542 eloss:3.7610 aloss2:8.6023 exploreP:0.0547
Episode:2638 meanR:9.7400 R:9.0000 rate:0.0180 aloss:0.7781 eloss:3.5682 aloss2:8.5064 exploreP:0.0546
Episode:2639 meanR:9.7100 R:9.0000 rate:0.0180 aloss:0.7370 eloss:3.7055 aloss2:8.5749 exploreP:0.0546
Episode:2640 meanR:9.7200 R:10.0000 rate:0.0200 aloss:0.7689 eloss:3.5334 aloss2:8.5480 exploreP:0.0546
Episode:2641 meanR:9.7200 R:10.0000 rate:0.0200 aloss:0.7901 eloss:3.3998 aloss2:8.6979 exploreP:0.0545
Episode:2642 meanR:9.7200 R:9.0000 rate:0.0180 aloss:0.7346 eloss:3.

Episode:2714 meanR:9.5100 R:10.0000 rate:0.0200 aloss:0.7572 eloss:2.7683 aloss2:8.3383 exploreP:0.0515
Episode:2715 meanR:9.5100 R:10.0000 rate:0.0200 aloss:0.7062 eloss:2.6448 aloss2:8.3325 exploreP:0.0515
Episode:2716 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.7202 eloss:2.8081 aloss2:8.2352 exploreP:0.0515
Episode:2717 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.7245 eloss:2.7103 aloss2:8.3036 exploreP:0.0514
Episode:2718 meanR:9.5000 R:9.0000 rate:0.0180 aloss:0.7173 eloss:2.7618 aloss2:8.3310 exploreP:0.0514
Episode:2719 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.7277 eloss:2.8690 aloss2:8.2766 exploreP:0.0513
Episode:2720 meanR:9.4700 R:9.0000 rate:0.0180 aloss:0.7125 eloss:2.8521 aloss2:8.3134 exploreP:0.0513
Episode:2721 meanR:9.5100 R:12.0000 rate:0.0240 aloss:0.7156 eloss:2.8195 aloss2:8.2827 exploreP:0.0512
Episode:2722 meanR:9.5200 R:10.0000 rate:0.0200 aloss:0.7168 eloss:2.6886 aloss2:8.2687 exploreP:0.0512
Episode:2723 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.7427 eloss

Episode:2796 meanR:9.5700 R:11.0000 rate:0.0220 aloss:0.6794 eloss:2.6589 aloss2:7.5717 exploreP:0.0484
Episode:2797 meanR:9.5900 R:10.0000 rate:0.0200 aloss:0.6879 eloss:2.8122 aloss2:7.4786 exploreP:0.0484
Episode:2798 meanR:9.5800 R:10.0000 rate:0.0200 aloss:0.7262 eloss:2.6677 aloss2:7.4743 exploreP:0.0483
Episode:2799 meanR:9.5900 R:10.0000 rate:0.0200 aloss:0.7092 eloss:2.7288 aloss2:7.4212 exploreP:0.0483
Episode:2800 meanR:9.5700 R:9.0000 rate:0.0180 aloss:0.6801 eloss:2.8018 aloss2:7.3381 exploreP:0.0483
Episode:2801 meanR:9.5500 R:9.0000 rate:0.0180 aloss:0.7329 eloss:2.6838 aloss2:7.4410 exploreP:0.0482
Episode:2802 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.7167 eloss:2.6506 aloss2:7.4333 exploreP:0.0482
Episode:2803 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.6950 eloss:2.6313 aloss2:7.3777 exploreP:0.0481
Episode:2804 meanR:9.5500 R:9.0000 rate:0.0180 aloss:0.7109 eloss:2.6668 aloss2:7.3904 exploreP:0.0481
Episode:2805 meanR:9.5400 R:8.0000 rate:0.0160 aloss:0.7334 eloss:2

Episode:2876 meanR:9.6600 R:9.0000 rate:0.0180 aloss:0.6957 eloss:2.7471 aloss2:7.2943 exploreP:0.0455
Episode:2877 meanR:9.6500 R:9.0000 rate:0.0180 aloss:0.6845 eloss:2.8057 aloss2:7.2690 exploreP:0.0455
Episode:2878 meanR:9.6500 R:10.0000 rate:0.0200 aloss:0.7090 eloss:2.7775 aloss2:7.2939 exploreP:0.0455
Episode:2879 meanR:9.6600 R:9.0000 rate:0.0180 aloss:0.6662 eloss:2.7929 aloss2:7.2857 exploreP:0.0454
Episode:2880 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.6916 eloss:2.6352 aloss2:7.3629 exploreP:0.0454
Episode:2881 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.7163 eloss:2.7347 aloss2:7.2461 exploreP:0.0454
Episode:2882 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.6642 eloss:2.7695 aloss2:7.2519 exploreP:0.0453
Episode:2883 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.6898 eloss:2.6617 aloss2:7.3319 exploreP:0.0453
Episode:2884 meanR:9.6800 R:11.0000 rate:0.0220 aloss:0.7166 eloss:2.7119 aloss2:7.2188 exploreP:0.0453
Episode:2885 meanR:9.6900 R:9.0000 rate:0.0180 aloss:0.7010 eloss:2

Episode:2956 meanR:9.6800 R:11.0000 rate:0.0220 aloss:0.7072 eloss:2.8485 aloss2:7.0564 exploreP:0.0429
Episode:2957 meanR:9.6700 R:8.0000 rate:0.0160 aloss:0.6987 eloss:2.8120 aloss2:7.0748 exploreP:0.0429
Episode:2958 meanR:9.6700 R:9.0000 rate:0.0180 aloss:0.7564 eloss:2.7478 aloss2:7.0602 exploreP:0.0428
Episode:2959 meanR:9.6500 R:9.0000 rate:0.0180 aloss:0.7039 eloss:2.8548 aloss2:7.0551 exploreP:0.0428
Episode:2960 meanR:9.6600 R:11.0000 rate:0.0220 aloss:0.7150 eloss:2.8171 aloss2:7.0665 exploreP:0.0428
Episode:2961 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.6852 eloss:2.8289 aloss2:7.0751 exploreP:0.0427
Episode:2962 meanR:9.6700 R:9.0000 rate:0.0180 aloss:0.7318 eloss:2.7754 aloss2:7.1090 exploreP:0.0427
Episode:2963 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.7041 eloss:2.7844 aloss2:7.1580 exploreP:0.0427
Episode:2964 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.7224 eloss:2.8796 aloss2:7.0833 exploreP:0.0426
Episode:2965 meanR:9.6700 R:9.0000 rate:0.0180 aloss:0.6842 eloss:2.

Episode:3037 meanR:9.5700 R:10.0000 rate:0.0200 aloss:0.7328 eloss:2.7205 aloss2:7.3976 exploreP:0.0405
Episode:3038 meanR:9.6000 R:11.0000 rate:0.0220 aloss:0.7266 eloss:2.6859 aloss2:7.4294 exploreP:0.0404
Episode:3039 meanR:9.5800 R:8.0000 rate:0.0160 aloss:0.7275 eloss:2.7487 aloss2:7.4077 exploreP:0.0404
Episode:3040 meanR:9.5800 R:10.0000 rate:0.0200 aloss:0.7188 eloss:2.7626 aloss2:7.4466 exploreP:0.0404
Episode:3041 meanR:9.6000 R:10.0000 rate:0.0200 aloss:0.7352 eloss:2.7526 aloss2:7.4548 exploreP:0.0403
Episode:3042 meanR:9.5900 R:9.0000 rate:0.0180 aloss:0.7293 eloss:2.7452 aloss2:7.4226 exploreP:0.0403
Episode:3043 meanR:9.5700 R:8.0000 rate:0.0160 aloss:0.6943 eloss:2.8445 aloss2:7.3644 exploreP:0.0403
Episode:3044 meanR:9.5700 R:10.0000 rate:0.0200 aloss:0.6969 eloss:2.8386 aloss2:7.4105 exploreP:0.0403
Episode:3045 meanR:9.5800 R:10.0000 rate:0.0200 aloss:0.7258 eloss:2.7815 aloss2:7.4482 exploreP:0.0402
Episode:3046 meanR:9.5600 R:9.0000 rate:0.0180 aloss:0.7038 eloss:2

Episode:3117 meanR:9.7800 R:12.0000 rate:0.0240 aloss:0.7626 eloss:2.8289 aloss2:8.2555 exploreP:0.0381
Episode:3118 meanR:9.7900 R:10.0000 rate:0.0200 aloss:0.7619 eloss:2.8333 aloss2:8.3018 exploreP:0.0381
Episode:3119 meanR:9.7900 R:9.0000 rate:0.0180 aloss:0.7368 eloss:2.7947 aloss2:8.3435 exploreP:0.0381
Episode:3120 meanR:9.7900 R:9.0000 rate:0.0180 aloss:0.7383 eloss:2.7342 aloss2:8.3844 exploreP:0.0381
Episode:3121 meanR:9.7800 R:9.0000 rate:0.0180 aloss:0.7238 eloss:2.8257 aloss2:8.3948 exploreP:0.0380
Episode:3122 meanR:9.7900 R:12.0000 rate:0.0240 aloss:0.7433 eloss:2.8048 aloss2:8.3847 exploreP:0.0380
Episode:3123 meanR:9.7800 R:9.0000 rate:0.0180 aloss:0.7306 eloss:2.8024 aloss2:8.4389 exploreP:0.0380
Episode:3124 meanR:9.7800 R:9.0000 rate:0.0180 aloss:0.7273 eloss:2.9005 aloss2:8.3185 exploreP:0.0380
Episode:3125 meanR:9.7700 R:9.0000 rate:0.0180 aloss:0.7413 eloss:2.7871 aloss2:8.4210 exploreP:0.0379
Episode:3126 meanR:9.7700 R:8.0000 rate:0.0160 aloss:0.7592 eloss:2.79

Episode:3199 meanR:9.6200 R:9.0000 rate:0.0180 aloss:0.7613 eloss:2.6434 aloss2:8.1227 exploreP:0.0360
Episode:3200 meanR:9.6400 R:10.0000 rate:0.0200 aloss:0.7488 eloss:2.6004 aloss2:8.1568 exploreP:0.0360
Episode:3201 meanR:9.6500 R:9.0000 rate:0.0180 aloss:0.8022 eloss:2.5509 aloss2:8.1820 exploreP:0.0360
Episode:3202 meanR:9.6500 R:9.0000 rate:0.0180 aloss:0.7388 eloss:2.4692 aloss2:8.2500 exploreP:0.0359
Episode:3203 meanR:9.6600 R:10.0000 rate:0.0200 aloss:0.7457 eloss:2.6248 aloss2:8.1496 exploreP:0.0359
Episode:3204 meanR:9.6600 R:9.0000 rate:0.0180 aloss:0.7240 eloss:2.5338 aloss2:8.2066 exploreP:0.0359
Episode:3205 meanR:9.6300 R:8.0000 rate:0.0160 aloss:0.7323 eloss:2.6343 aloss2:8.1075 exploreP:0.0359
Episode:3206 meanR:9.6600 R:11.0000 rate:0.0220 aloss:0.7455 eloss:2.6019 aloss2:8.1367 exploreP:0.0358
Episode:3207 meanR:9.6600 R:10.0000 rate:0.0200 aloss:0.7302 eloss:2.5754 aloss2:8.1831 exploreP:0.0358
Episode:3208 meanR:9.6600 R:10.0000 rate:0.0200 aloss:0.7608 eloss:2.

Episode:3281 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.7388 eloss:2.4523 aloss2:8.8234 exploreP:0.0341
Episode:3282 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.7055 eloss:2.4013 aloss2:8.8807 exploreP:0.0340
Episode:3283 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.6782 eloss:2.4179 aloss2:8.7902 exploreP:0.0340
Episode:3284 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.6835 eloss:2.4358 aloss2:8.8204 exploreP:0.0340
Episode:3285 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.7191 eloss:2.3937 aloss2:8.8130 exploreP:0.0340
Episode:3286 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.6998 eloss:2.4270 aloss2:8.8400 exploreP:0.0340
Episode:3287 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.7492 eloss:2.4547 aloss2:8.7625 exploreP:0.0339
Episode:3288 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.6888 eloss:2.3794 aloss2:8.8458 exploreP:0.0339
Episode:3289 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.6965 eloss:2.3927 aloss2:8.8291 exploreP:0.0339
Episode:3290 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.7262 eloss

Episode:3361 meanR:9.5800 R:10.0000 rate:0.0200 aloss:0.7198 eloss:2.6188 aloss2:8.2181 exploreP:0.0323
Episode:3362 meanR:9.5700 R:8.0000 rate:0.0160 aloss:0.6666 eloss:2.6595 aloss2:8.2029 exploreP:0.0323
Episode:3363 meanR:9.5900 R:10.0000 rate:0.0200 aloss:0.7138 eloss:2.6694 aloss2:8.1439 exploreP:0.0322
Episode:3364 meanR:9.5900 R:9.0000 rate:0.0180 aloss:0.7250 eloss:2.7886 aloss2:8.0675 exploreP:0.0322
Episode:3365 meanR:9.6200 R:12.0000 rate:0.0240 aloss:0.7206 eloss:2.7353 aloss2:8.0378 exploreP:0.0322
Episode:3366 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.7252 eloss:2.7669 aloss2:8.0251 exploreP:0.0322
Episode:3367 meanR:9.7500 R:22.0000 rate:0.0440 aloss:0.7201 eloss:2.8361 aloss2:7.9545 exploreP:0.0321
Episode:3368 meanR:9.7700 R:11.0000 rate:0.0220 aloss:0.7231 eloss:2.9192 aloss2:7.9215 exploreP:0.0321
Episode:3369 meanR:9.8800 R:20.0000 rate:0.0400 aloss:0.7502 eloss:2.8773 aloss2:7.9293 exploreP:0.0321
Episode:3370 meanR:10.0000 R:20.0000 rate:0.0400 aloss:0.7469 elos

Episode:3442 meanR:13.8700 R:10.0000 rate:0.0200 aloss:0.7427 eloss:2.3650 aloss2:9.1798 exploreP:0.0297
Episode:3443 meanR:13.8800 R:11.0000 rate:0.0220 aloss:0.6840 eloss:2.3311 aloss2:9.1331 exploreP:0.0297
Episode:3444 meanR:13.8800 R:10.0000 rate:0.0200 aloss:0.7143 eloss:2.3106 aloss2:9.1815 exploreP:0.0297
Episode:3445 meanR:13.9000 R:10.0000 rate:0.0200 aloss:0.7051 eloss:2.3299 aloss2:9.1827 exploreP:0.0297
Episode:3446 meanR:13.9000 R:9.0000 rate:0.0180 aloss:0.6739 eloss:2.2262 aloss2:9.2295 exploreP:0.0297
Episode:3447 meanR:13.8900 R:9.0000 rate:0.0180 aloss:0.6980 eloss:2.3207 aloss2:9.1819 exploreP:0.0296
Episode:3448 meanR:13.9100 R:10.0000 rate:0.0200 aloss:0.6920 eloss:2.2001 aloss2:9.2697 exploreP:0.0296
Episode:3449 meanR:13.9000 R:9.0000 rate:0.0180 aloss:0.6593 eloss:2.2480 aloss2:9.2222 exploreP:0.0296
Episode:3450 meanR:13.8900 R:9.0000 rate:0.0180 aloss:0.6864 eloss:2.2575 aloss2:9.2203 exploreP:0.0296
Episode:3451 meanR:13.9000 R:10.0000 rate:0.0200 aloss:0.69

Episode:3522 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.6881 eloss:2.5147 aloss2:7.9650 exploreP:0.0283
Episode:3523 meanR:9.4700 R:8.0000 rate:0.0160 aloss:0.6834 eloss:2.4868 aloss2:7.9652 exploreP:0.0283
Episode:3524 meanR:9.4700 R:9.0000 rate:0.0180 aloss:0.7170 eloss:2.4838 aloss2:7.9342 exploreP:0.0283
Episode:3525 meanR:9.4700 R:9.0000 rate:0.0180 aloss:0.6952 eloss:2.4842 aloss2:8.0197 exploreP:0.0282
Episode:3526 meanR:9.4600 R:8.0000 rate:0.0160 aloss:0.7045 eloss:2.5427 aloss2:7.9458 exploreP:0.0282
Episode:3527 meanR:9.4700 R:9.0000 rate:0.0180 aloss:0.7123 eloss:2.5371 aloss2:7.9578 exploreP:0.0282
Episode:3528 meanR:9.4700 R:9.0000 rate:0.0180 aloss:0.6721 eloss:2.5627 aloss2:7.9435 exploreP:0.0282
Episode:3529 meanR:9.4500 R:8.0000 rate:0.0160 aloss:0.7202 eloss:2.5401 aloss2:7.9437 exploreP:0.0282
Episode:3530 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.7100 eloss:2.5177 aloss2:7.9900 exploreP:0.0282
Episode:3531 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.6975 eloss:2.448

Episode:3604 meanR:9.4300 R:10.0000 rate:0.0200 aloss:0.6591 eloss:2.5019 aloss2:8.6325 exploreP:0.0269
Episode:3605 meanR:9.4300 R:10.0000 rate:0.0200 aloss:0.7187 eloss:2.3271 aloss2:8.7010 exploreP:0.0269
Episode:3606 meanR:9.4100 R:8.0000 rate:0.0160 aloss:0.6829 eloss:2.4388 aloss2:8.6837 exploreP:0.0269
Episode:3607 meanR:9.3700 R:9.0000 rate:0.0180 aloss:0.6936 eloss:2.4488 aloss2:8.6701 exploreP:0.0269
Episode:3608 meanR:9.3800 R:10.0000 rate:0.0200 aloss:0.6851 eloss:2.3601 aloss2:8.6719 exploreP:0.0269
Episode:3609 meanR:9.3600 R:8.0000 rate:0.0160 aloss:0.6783 eloss:2.4033 aloss2:8.7298 exploreP:0.0269
Episode:3610 meanR:9.3700 R:10.0000 rate:0.0200 aloss:0.6863 eloss:2.4618 aloss2:8.6450 exploreP:0.0268
Episode:3611 meanR:9.3700 R:10.0000 rate:0.0200 aloss:0.6852 eloss:2.4632 aloss2:8.6976 exploreP:0.0268
Episode:3612 meanR:9.3600 R:9.0000 rate:0.0180 aloss:0.6669 eloss:2.5264 aloss2:8.6423 exploreP:0.0268
Episode:3613 meanR:9.3600 R:10.0000 rate:0.0200 aloss:0.6656 eloss:2

Episode:3684 meanR:9.4300 R:9.0000 rate:0.0180 aloss:0.6802 eloss:2.4001 aloss2:8.7697 exploreP:0.0257
Episode:3685 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.6700 eloss:2.3178 aloss2:8.7925 exploreP:0.0257
Episode:3686 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.6992 eloss:2.4802 aloss2:8.7280 exploreP:0.0257
Episode:3687 meanR:9.4400 R:8.0000 rate:0.0160 aloss:0.6633 eloss:2.4968 aloss2:8.6892 exploreP:0.0257
Episode:3688 meanR:9.4400 R:9.0000 rate:0.0180 aloss:0.6613 eloss:2.5056 aloss2:8.6978 exploreP:0.0257
Episode:3689 meanR:9.4400 R:10.0000 rate:0.0200 aloss:0.6675 eloss:2.3713 aloss2:8.7240 exploreP:0.0256
Episode:3690 meanR:9.4400 R:10.0000 rate:0.0200 aloss:0.6282 eloss:2.4398 aloss2:8.6512 exploreP:0.0256
Episode:3691 meanR:9.4300 R:9.0000 rate:0.0180 aloss:0.6688 eloss:2.3925 aloss2:8.6625 exploreP:0.0256
Episode:3692 meanR:9.4200 R:9.0000 rate:0.0180 aloss:0.6794 eloss:2.3191 aloss2:8.7315 exploreP:0.0256
Episode:3693 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.7081 eloss:2.

Episode:3764 meanR:9.8600 R:21.0000 rate:0.0420 aloss:0.7543 eloss:2.6093 aloss2:8.1605 exploreP:0.0245
Episode:3765 meanR:10.0500 R:29.0000 rate:0.0580 aloss:0.7567 eloss:2.5239 aloss2:8.2568 exploreP:0.0245
Episode:3766 meanR:10.4700 R:50.0000 rate:0.1000 aloss:0.7381 eloss:2.4359 aloss2:8.3786 exploreP:0.0244
Episode:3767 meanR:10.4700 R:10.0000 rate:0.0200 aloss:0.7501 eloss:2.3963 aloss2:8.3757 exploreP:0.0244
Episode:3768 meanR:10.7100 R:33.0000 rate:0.0660 aloss:0.7531 eloss:2.4135 aloss2:8.3666 exploreP:0.0243
Episode:3769 meanR:10.7200 R:11.0000 rate:0.0220 aloss:0.7573 eloss:2.4628 aloss2:8.2812 exploreP:0.0243
Episode:3770 meanR:10.7100 R:9.0000 rate:0.0180 aloss:0.7641 eloss:2.4488 aloss2:8.2862 exploreP:0.0243
Episode:3771 meanR:10.7200 R:10.0000 rate:0.0200 aloss:0.7850 eloss:2.5199 aloss2:8.2532 exploreP:0.0243
Episode:3772 meanR:10.7300 R:10.0000 rate:0.0200 aloss:0.7527 eloss:2.4379 aloss2:8.2871 exploreP:0.0243
Episode:3773 meanR:10.7200 R:9.0000 rate:0.0180 aloss:0.8

Episode:3845 meanR:10.9500 R:8.0000 rate:0.0160 aloss:0.6356 eloss:2.4834 aloss2:9.0133 exploreP:0.0233
Episode:3846 meanR:10.9600 R:9.0000 rate:0.0180 aloss:0.6272 eloss:2.3671 aloss2:9.1400 exploreP:0.0233
Episode:3847 meanR:10.9700 R:10.0000 rate:0.0200 aloss:0.6111 eloss:2.5025 aloss2:9.1313 exploreP:0.0233
Episode:3848 meanR:10.9600 R:10.0000 rate:0.0200 aloss:0.6358 eloss:2.5134 aloss2:9.1311 exploreP:0.0233
Episode:3849 meanR:10.9800 R:10.0000 rate:0.0200 aloss:0.6484 eloss:2.3752 aloss2:9.1906 exploreP:0.0232
Episode:3850 meanR:10.9800 R:10.0000 rate:0.0200 aloss:0.6379 eloss:2.4059 aloss2:9.2685 exploreP:0.0232
Episode:3851 meanR:10.9700 R:9.0000 rate:0.0180 aloss:0.6225 eloss:2.4982 aloss2:9.2008 exploreP:0.0232
Episode:3852 meanR:10.9600 R:9.0000 rate:0.0180 aloss:0.6316 eloss:2.3874 aloss2:9.2218 exploreP:0.0232
Episode:3853 meanR:10.9600 R:10.0000 rate:0.0200 aloss:0.6226 eloss:2.3497 aloss2:9.2489 exploreP:0.0232
Episode:3854 meanR:10.9700 R:10.0000 rate:0.0200 aloss:0.60

Episode:3925 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.6450 eloss:2.3840 aloss2:8.7957 exploreP:0.0223
Episode:3926 meanR:9.6400 R:9.0000 rate:0.0180 aloss:0.5984 eloss:2.5082 aloss2:8.6646 exploreP:0.0223
Episode:3927 meanR:9.6300 R:9.0000 rate:0.0180 aloss:0.6525 eloss:2.5415 aloss2:8.5629 exploreP:0.0223
Episode:3928 meanR:9.6400 R:9.0000 rate:0.0180 aloss:0.6578 eloss:2.4180 aloss2:8.6828 exploreP:0.0223
Episode:3929 meanR:9.6700 R:11.0000 rate:0.0220 aloss:0.6136 eloss:2.4138 aloss2:8.7522 exploreP:0.0223
Episode:3930 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.6545 eloss:2.4485 aloss2:8.6083 exploreP:0.0222
Episode:3931 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.6297 eloss:2.5638 aloss2:8.5336 exploreP:0.0222
Episode:3932 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.6049 eloss:2.4205 aloss2:8.6695 exploreP:0.0222
Episode:3933 meanR:9.6800 R:10.0000 rate:0.0200 aloss:0.6474 eloss:2.4196 aloss2:8.6384 exploreP:0.0222
Episode:3934 meanR:9.6600 R:9.0000 rate:0.0180 aloss:0.6652 eloss:2

Episode:4006 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6405 eloss:2.4744 aloss2:8.0601 exploreP:0.0214
Episode:4007 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.6513 eloss:2.2765 aloss2:8.1684 exploreP:0.0214
Episode:4008 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.6443 eloss:2.3603 aloss2:8.1490 exploreP:0.0214
Episode:4009 meanR:9.5500 R:9.0000 rate:0.0180 aloss:0.6852 eloss:2.3820 aloss2:8.0797 exploreP:0.0214
Episode:4010 meanR:9.5500 R:10.0000 rate:0.0200 aloss:0.6576 eloss:2.3374 aloss2:8.1693 exploreP:0.0214
Episode:4011 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6597 eloss:2.3847 aloss2:8.1654 exploreP:0.0213
Episode:4012 meanR:9.5300 R:9.0000 rate:0.0180 aloss:0.6663 eloss:2.2362 aloss2:8.2588 exploreP:0.0213
Episode:4013 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.6818 eloss:2.4205 aloss2:8.1613 exploreP:0.0213
Episode:4014 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.6691 eloss:2.3132 aloss2:8.2038 exploreP:0.0213
Episode:4015 meanR:9.5300 R:9.0000 rate:0.0180 aloss:0.6670 eloss:2.

Episode:4085 meanR:10.1400 R:11.0000 rate:0.0220 aloss:0.6614 eloss:2.7842 aloss2:7.7251 exploreP:0.0205
Episode:4086 meanR:10.1400 R:10.0000 rate:0.0200 aloss:0.6467 eloss:2.7092 aloss2:7.7996 exploreP:0.0205
Episode:4087 meanR:10.1300 R:9.0000 rate:0.0180 aloss:0.6452 eloss:2.7020 aloss2:7.7904 exploreP:0.0205
Episode:4088 meanR:10.1500 R:10.0000 rate:0.0200 aloss:0.6340 eloss:2.7364 aloss2:7.8106 exploreP:0.0205
Episode:4089 meanR:10.1500 R:10.0000 rate:0.0200 aloss:0.6634 eloss:2.7463 aloss2:7.7852 exploreP:0.0205
Episode:4090 meanR:10.1700 R:10.0000 rate:0.0200 aloss:0.6641 eloss:2.6775 aloss2:7.8389 exploreP:0.0204
Episode:4091 meanR:10.1900 R:10.0000 rate:0.0200 aloss:0.6135 eloss:2.6766 aloss2:7.8301 exploreP:0.0204
Episode:4092 meanR:10.1900 R:10.0000 rate:0.0200 aloss:0.6799 eloss:2.6971 aloss2:7.8636 exploreP:0.0204
Episode:4093 meanR:10.2000 R:12.0000 rate:0.0240 aloss:0.6592 eloss:2.7032 aloss2:7.8565 exploreP:0.0204
Episode:4094 meanR:10.2000 R:9.0000 rate:0.0180 aloss:0.

Episode:4164 meanR:9.9500 R:10.0000 rate:0.0200 aloss:0.6044 eloss:2.5656 aloss2:8.3975 exploreP:0.0197
Episode:4165 meanR:9.9500 R:9.0000 rate:0.0180 aloss:0.6304 eloss:2.5136 aloss2:8.4283 exploreP:0.0197
Episode:4166 meanR:9.9600 R:11.0000 rate:0.0220 aloss:0.6501 eloss:2.4511 aloss2:8.5051 exploreP:0.0197
Episode:4167 meanR:9.9600 R:10.0000 rate:0.0200 aloss:0.6333 eloss:2.5251 aloss2:8.3915 exploreP:0.0197
Episode:4168 meanR:9.9600 R:10.0000 rate:0.0200 aloss:0.6340 eloss:2.4563 aloss2:8.4543 exploreP:0.0197
Episode:4169 meanR:9.9400 R:9.0000 rate:0.0180 aloss:0.6382 eloss:2.4175 aloss2:8.4471 exploreP:0.0197
Episode:4170 meanR:9.9400 R:10.0000 rate:0.0200 aloss:0.6131 eloss:2.4800 aloss2:8.4076 exploreP:0.0197
Episode:4171 meanR:9.9300 R:9.0000 rate:0.0180 aloss:0.6265 eloss:2.4140 aloss2:8.4573 exploreP:0.0196
Episode:4172 meanR:9.9200 R:10.0000 rate:0.0200 aloss:0.6666 eloss:2.3645 aloss2:8.4473 exploreP:0.0196
Episode:4173 meanR:9.9300 R:10.0000 rate:0.0200 aloss:0.6432 eloss:

Episode:4245 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6128 eloss:2.3822 aloss2:8.0877 exploreP:0.0190
Episode:4246 meanR:9.5300 R:9.0000 rate:0.0180 aloss:0.6299 eloss:2.4624 aloss2:8.0289 exploreP:0.0190
Episode:4247 meanR:9.5200 R:9.0000 rate:0.0180 aloss:0.6347 eloss:2.3648 aloss2:8.1057 exploreP:0.0190
Episode:4248 meanR:9.5200 R:10.0000 rate:0.0200 aloss:0.5958 eloss:2.4889 aloss2:8.0259 exploreP:0.0190
Episode:4249 meanR:9.5200 R:9.0000 rate:0.0180 aloss:0.6290 eloss:2.3265 aloss2:8.0758 exploreP:0.0190
Episode:4250 meanR:9.5000 R:8.0000 rate:0.0160 aloss:0.6364 eloss:2.4100 aloss2:8.0602 exploreP:0.0190
Episode:4251 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.6199 eloss:2.3901 aloss2:8.0966 exploreP:0.0189
Episode:4252 meanR:9.4800 R:9.0000 rate:0.0180 aloss:0.6357 eloss:2.3704 aloss2:8.1315 exploreP:0.0189
Episode:4253 meanR:9.4600 R:8.0000 rate:0.0160 aloss:0.6315 eloss:2.3673 aloss2:8.0717 exploreP:0.0189
Episode:4254 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.6254 eloss:2.3079

Episode:4325 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.6366 eloss:2.5851 aloss2:8.2364 exploreP:0.0183
Episode:4326 meanR:9.5300 R:12.0000 rate:0.0240 aloss:0.6275 eloss:2.5629 aloss2:8.2885 exploreP:0.0183
Episode:4327 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.6440 eloss:2.5017 aloss2:8.3131 exploreP:0.0183
Episode:4328 meanR:9.5300 R:10.0000 rate:0.0200 aloss:0.6276 eloss:2.5590 aloss2:8.3501 exploreP:0.0183
Episode:4329 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6215 eloss:2.6214 aloss2:8.2922 exploreP:0.0183
Episode:4330 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.6286 eloss:2.4386 aloss2:8.3746 exploreP:0.0183
Episode:4331 meanR:9.5300 R:9.0000 rate:0.0180 aloss:0.6442 eloss:2.4747 aloss2:8.3881 exploreP:0.0183
Episode:4332 meanR:9.5300 R:10.0000 rate:0.0200 aloss:0.6319 eloss:2.5630 aloss2:8.3365 exploreP:0.0183
Episode:4333 meanR:9.5300 R:9.0000 rate:0.0180 aloss:0.6380 eloss:2.4590 aloss2:8.3524 exploreP:0.0183
Episode:4334 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.6486 eloss:

Episode:4405 meanR:9.5700 R:10.0000 rate:0.0200 aloss:0.6173 eloss:2.5394 aloss2:8.2400 exploreP:0.0177
Episode:4406 meanR:9.5700 R:10.0000 rate:0.0200 aloss:0.5970 eloss:2.4220 aloss2:8.2772 exploreP:0.0177
Episode:4407 meanR:9.5700 R:9.0000 rate:0.0180 aloss:0.6151 eloss:2.4097 aloss2:8.3268 exploreP:0.0177
Episode:4408 meanR:9.5800 R:11.0000 rate:0.0220 aloss:0.6136 eloss:2.4245 aloss2:8.3006 exploreP:0.0177
Episode:4409 meanR:9.5700 R:9.0000 rate:0.0180 aloss:0.6212 eloss:2.3812 aloss2:8.3059 exploreP:0.0177
Episode:4410 meanR:9.5700 R:10.0000 rate:0.0200 aloss:0.6171 eloss:2.4304 aloss2:8.2694 exploreP:0.0177
Episode:4411 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.6232 eloss:2.4127 aloss2:8.3175 exploreP:0.0177
Episode:4412 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.6221 eloss:2.3908 aloss2:8.3413 exploreP:0.0177
Episode:4413 meanR:9.5600 R:9.0000 rate:0.0180 aloss:0.6152 eloss:2.4972 aloss2:8.2970 exploreP:0.0177
Episode:4414 meanR:9.5800 R:12.0000 rate:0.0240 aloss:0.6301 eloss:

Episode:4486 meanR:9.3900 R:10.0000 rate:0.0200 aloss:0.6114 eloss:2.5554 aloss2:8.3463 exploreP:0.0172
Episode:4487 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.6050 eloss:2.4556 aloss2:8.4083 exploreP:0.0171
Episode:4488 meanR:9.3900 R:10.0000 rate:0.0200 aloss:0.6448 eloss:2.5495 aloss2:8.4098 exploreP:0.0171
Episode:4489 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.6342 eloss:2.4848 aloss2:8.3496 exploreP:0.0171
Episode:4490 meanR:9.4000 R:9.0000 rate:0.0180 aloss:0.6447 eloss:2.4361 aloss2:8.3675 exploreP:0.0171
Episode:4491 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.6142 eloss:2.4608 aloss2:8.4752 exploreP:0.0171
Episode:4492 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.6366 eloss:2.5621 aloss2:8.4094 exploreP:0.0171
Episode:4493 meanR:9.4100 R:9.0000 rate:0.0180 aloss:0.6097 eloss:2.3947 aloss2:8.5071 exploreP:0.0171
Episode:4494 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.6183 eloss:2.5448 aloss2:8.4303 exploreP:0.0171
Episode:4495 meanR:9.4300 R:10.0000 rate:0.0200 aloss:0.6484 eloss:2

Episode:4566 meanR:9.3700 R:9.0000 rate:0.0180 aloss:0.6129 eloss:2.5016 aloss2:8.4554 exploreP:0.0166
Episode:4567 meanR:9.3700 R:9.0000 rate:0.0180 aloss:0.6501 eloss:2.5068 aloss2:8.4057 exploreP:0.0166
Episode:4568 meanR:9.3900 R:10.0000 rate:0.0200 aloss:0.6371 eloss:2.3724 aloss2:8.4643 exploreP:0.0166
Episode:4569 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.6465 eloss:2.4648 aloss2:8.3992 exploreP:0.0166
Episode:4570 meanR:9.4400 R:12.0000 rate:0.0240 aloss:0.5999 eloss:2.4889 aloss2:8.4412 exploreP:0.0166
Episode:4571 meanR:9.4200 R:8.0000 rate:0.0160 aloss:0.6370 eloss:2.4869 aloss2:8.4096 exploreP:0.0166
Episode:4572 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.6118 eloss:2.4087 aloss2:8.4485 exploreP:0.0166
Episode:4573 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.6232 eloss:2.4233 aloss2:8.5139 exploreP:0.0166
Episode:4574 meanR:9.4100 R:9.0000 rate:0.0180 aloss:0.6424 eloss:2.4103 aloss2:8.5224 exploreP:0.0166
Episode:4575 meanR:9.4100 R:9.0000 rate:0.0180 aloss:0.6164 eloss:2.

Episode:4647 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6381 eloss:2.3876 aloss2:8.3334 exploreP:0.0161
Episode:4648 meanR:9.5100 R:8.0000 rate:0.0160 aloss:0.6279 eloss:2.4922 aloss2:8.3418 exploreP:0.0161
Episode:4649 meanR:9.4800 R:9.0000 rate:0.0180 aloss:0.6029 eloss:2.4289 aloss2:8.3867 exploreP:0.0161
Episode:4650 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.6278 eloss:2.4884 aloss2:8.2775 exploreP:0.0161
Episode:4651 meanR:9.5000 R:9.0000 rate:0.0180 aloss:0.6346 eloss:2.5749 aloss2:8.1920 exploreP:0.0161
Episode:4652 meanR:9.5100 R:9.0000 rate:0.0180 aloss:0.6262 eloss:2.4834 aloss2:8.2712 exploreP:0.0161
Episode:4653 meanR:9.5000 R:9.0000 rate:0.0180 aloss:0.6329 eloss:2.4332 aloss2:8.3096 exploreP:0.0161
Episode:4654 meanR:9.4800 R:9.0000 rate:0.0180 aloss:0.6369 eloss:2.4941 aloss2:8.3010 exploreP:0.0161
Episode:4655 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.6314 eloss:2.3901 aloss2:8.3392 exploreP:0.0161
Episode:4656 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.6422 eloss:2.52

Episode:4727 meanR:9.3500 R:10.0000 rate:0.0200 aloss:0.6208 eloss:2.4928 aloss2:8.3014 exploreP:0.0157
Episode:4728 meanR:9.3500 R:9.0000 rate:0.0180 aloss:0.5934 eloss:2.4680 aloss2:8.3246 exploreP:0.0157
Episode:4729 meanR:9.3500 R:9.0000 rate:0.0180 aloss:0.6094 eloss:2.4867 aloss2:8.3169 exploreP:0.0157
Episode:4730 meanR:9.3500 R:8.0000 rate:0.0160 aloss:0.6198 eloss:2.4529 aloss2:8.3379 exploreP:0.0157
Episode:4731 meanR:9.3600 R:10.0000 rate:0.0200 aloss:0.6319 eloss:2.4589 aloss2:8.2948 exploreP:0.0157
Episode:4732 meanR:9.3600 R:9.0000 rate:0.0180 aloss:0.6221 eloss:2.4189 aloss2:8.3608 exploreP:0.0157
Episode:4733 meanR:9.3500 R:10.0000 rate:0.0200 aloss:0.6207 eloss:2.4274 aloss2:8.3212 exploreP:0.0157
Episode:4734 meanR:9.3600 R:10.0000 rate:0.0200 aloss:0.6165 eloss:2.4852 aloss2:8.3050 exploreP:0.0157
Episode:4735 meanR:9.3500 R:9.0000 rate:0.0180 aloss:0.6119 eloss:2.4932 aloss2:8.2804 exploreP:0.0157
Episode:4736 meanR:9.3500 R:9.0000 rate:0.0180 aloss:0.6111 eloss:2.4

Episode:4808 meanR:9.4400 R:9.0000 rate:0.0180 aloss:0.6406 eloss:2.5287 aloss2:8.3614 exploreP:0.0153
Episode:4809 meanR:9.4400 R:10.0000 rate:0.0200 aloss:0.6174 eloss:2.4518 aloss2:8.4464 exploreP:0.0153
Episode:4810 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.6013 eloss:2.4576 aloss2:8.4268 exploreP:0.0153
Episode:4811 meanR:9.4500 R:9.0000 rate:0.0180 aloss:0.6063 eloss:2.5260 aloss2:8.3858 exploreP:0.0153
Episode:4812 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.6384 eloss:2.4619 aloss2:8.3911 exploreP:0.0153
Episode:4813 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.6095 eloss:2.5547 aloss2:8.3333 exploreP:0.0153
Episode:4814 meanR:9.4500 R:9.0000 rate:0.0180 aloss:0.5940 eloss:2.4326 aloss2:8.3904 exploreP:0.0153
Episode:4815 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.6079 eloss:2.4269 aloss2:8.3630 exploreP:0.0152
Episode:4816 meanR:9.4300 R:8.0000 rate:0.0160 aloss:0.6085 eloss:2.4984 aloss2:8.3411 exploreP:0.0152
Episode:4817 meanR:9.4300 R:9.0000 rate:0.0180 aloss:0.6183 eloss:2.

Episode:4888 meanR:9.3200 R:9.0000 rate:0.0180 aloss:0.6572 eloss:2.4410 aloss2:8.3741 exploreP:0.0149
Episode:4889 meanR:9.3100 R:9.0000 rate:0.0180 aloss:0.6291 eloss:2.4981 aloss2:8.3559 exploreP:0.0149
Episode:4890 meanR:9.3000 R:8.0000 rate:0.0160 aloss:0.6149 eloss:2.4446 aloss2:8.3707 exploreP:0.0149
Episode:4891 meanR:9.2900 R:9.0000 rate:0.0180 aloss:0.6296 eloss:2.4930 aloss2:8.3288 exploreP:0.0149
Episode:4892 meanR:9.2800 R:9.0000 rate:0.0180 aloss:0.6383 eloss:2.4407 aloss2:8.3831 exploreP:0.0149
Episode:4893 meanR:9.2900 R:10.0000 rate:0.0200 aloss:0.6187 eloss:2.5817 aloss2:8.3015 exploreP:0.0149
Episode:4894 meanR:9.2900 R:9.0000 rate:0.0180 aloss:0.5971 eloss:2.5000 aloss2:8.3151 exploreP:0.0149
Episode:4895 meanR:9.2900 R:9.0000 rate:0.0180 aloss:0.6147 eloss:2.5161 aloss2:8.3578 exploreP:0.0149
Episode:4896 meanR:9.2800 R:9.0000 rate:0.0180 aloss:0.6215 eloss:2.5728 aloss2:8.2929 exploreP:0.0149
Episode:4897 meanR:9.2900 R:11.0000 rate:0.0220 aloss:0.6157 eloss:2.475

Episode:4968 meanR:9.4000 R:9.0000 rate:0.0180 aloss:0.6508 eloss:2.3818 aloss2:8.3358 exploreP:0.0145
Episode:4969 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.5962 eloss:2.4684 aloss2:8.3903 exploreP:0.0145
Episode:4970 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.6167 eloss:2.4327 aloss2:8.3504 exploreP:0.0145
Episode:4971 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.6268 eloss:2.5228 aloss2:8.2848 exploreP:0.0145
Episode:4972 meanR:9.4000 R:9.0000 rate:0.0180 aloss:0.6259 eloss:2.4246 aloss2:8.3687 exploreP:0.0145
Episode:4973 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.6450 eloss:2.4583 aloss2:8.3488 exploreP:0.0145
Episode:4974 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.6087 eloss:2.5284 aloss2:8.3218 exploreP:0.0145
Episode:4975 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.6200 eloss:2.6004 aloss2:8.2673 exploreP:0.0145
Episode:4976 meanR:9.3800 R:9.0000 rate:0.0180 aloss:0.6244 eloss:2.5311 aloss2:8.2592 exploreP:0.0145
Episode:4977 meanR:9.3700 R:8.0000 rate:0.0160 aloss:0.6605 eloss:2.39

Episode:5048 meanR:9.3400 R:10.0000 rate:0.0200 aloss:0.6194 eloss:2.5744 aloss2:8.2240 exploreP:0.0142
Episode:5049 meanR:9.3200 R:8.0000 rate:0.0160 aloss:0.6390 eloss:2.3623 aloss2:8.3518 exploreP:0.0142
Episode:5050 meanR:9.3200 R:10.0000 rate:0.0200 aloss:0.6592 eloss:2.4557 aloss2:8.2978 exploreP:0.0142
Episode:5051 meanR:9.3100 R:9.0000 rate:0.0180 aloss:0.5917 eloss:2.4910 aloss2:8.3390 exploreP:0.0142
Episode:5052 meanR:9.3100 R:9.0000 rate:0.0180 aloss:0.6083 eloss:2.4641 aloss2:8.3178 exploreP:0.0142
Episode:5053 meanR:9.3000 R:10.0000 rate:0.0200 aloss:0.6316 eloss:2.3944 aloss2:8.4250 exploreP:0.0142
Episode:5054 meanR:9.3000 R:10.0000 rate:0.0200 aloss:0.6446 eloss:2.5818 aloss2:8.2583 exploreP:0.0142
Episode:5055 meanR:9.2900 R:8.0000 rate:0.0160 aloss:0.6138 eloss:2.5517 aloss2:8.2965 exploreP:0.0142
Episode:5056 meanR:9.2800 R:8.0000 rate:0.0160 aloss:0.6467 eloss:2.5143 aloss2:8.3156 exploreP:0.0142
Episode:5057 meanR:9.2800 R:10.0000 rate:0.0200 aloss:0.5898 eloss:2.

Episode:5129 meanR:9.4700 R:9.0000 rate:0.0180 aloss:0.6438 eloss:2.5204 aloss2:8.1979 exploreP:0.0139
Episode:5130 meanR:9.4700 R:10.0000 rate:0.0200 aloss:0.6412 eloss:2.5168 aloss2:8.2364 exploreP:0.0139
Episode:5131 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.6587 eloss:2.5113 aloss2:8.2399 exploreP:0.0139
Episode:5132 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.6762 eloss:2.4984 aloss2:8.2358 exploreP:0.0139
Episode:5133 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.6156 eloss:2.5238 aloss2:8.2109 exploreP:0.0139
Episode:5134 meanR:9.4700 R:10.0000 rate:0.0200 aloss:0.6105 eloss:2.5983 aloss2:8.1623 exploreP:0.0139
Episode:5135 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.6612 eloss:2.4875 aloss2:8.2458 exploreP:0.0139
Episode:5136 meanR:9.4500 R:8.0000 rate:0.0160 aloss:0.5910 eloss:2.5699 aloss2:8.2002 exploreP:0.0139
Episode:5137 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.6085 eloss:2.5454 aloss2:8.1913 exploreP:0.0139
Episode:5138 meanR:9.4700 R:11.0000 rate:0.0220 aloss:0.6291 eloss:2.

Episode:5209 meanR:9.6900 R:10.0000 rate:0.0200 aloss:0.7277 eloss:2.6757 aloss2:8.4602 exploreP:0.0136
Episode:5210 meanR:9.7100 R:10.0000 rate:0.0200 aloss:0.6615 eloss:2.7011 aloss2:8.4697 exploreP:0.0136
Episode:5211 meanR:9.8700 R:25.0000 rate:0.0500 aloss:0.7343 eloss:2.7006 aloss2:8.4528 exploreP:0.0136
Episode:5212 meanR:9.8700 R:10.0000 rate:0.0200 aloss:0.7098 eloss:2.7544 aloss2:8.3976 exploreP:0.0136
Episode:5213 meanR:9.8700 R:10.0000 rate:0.0200 aloss:0.7070 eloss:2.8717 aloss2:8.2675 exploreP:0.0136
Episode:5214 meanR:9.8700 R:10.0000 rate:0.0200 aloss:0.7325 eloss:2.7362 aloss2:8.4136 exploreP:0.0136
Episode:5215 meanR:10.0600 R:29.0000 rate:0.0580 aloss:0.7548 eloss:2.9017 aloss2:8.1699 exploreP:0.0136
Episode:5216 meanR:10.2500 R:28.0000 rate:0.0560 aloss:0.7655 eloss:2.9637 aloss2:8.1300 exploreP:0.0136
Episode:5217 meanR:10.6700 R:52.0000 rate:0.1040 aloss:0.8054 eloss:3.1357 aloss2:7.9173 exploreP:0.0136
Episode:5218 meanR:11.2300 R:66.0000 rate:0.1320 aloss:0.8229

Episode:5290 meanR:14.0400 R:9.0000 rate:0.0180 aloss:0.6623 eloss:2.3666 aloss2:8.7387 exploreP:0.0132
Episode:5291 meanR:14.0400 R:10.0000 rate:0.0200 aloss:0.6512 eloss:2.2989 aloss2:8.7655 exploreP:0.0132
Episode:5292 meanR:14.0500 R:10.0000 rate:0.0200 aloss:0.6490 eloss:2.4583 aloss2:8.6778 exploreP:0.0132
Episode:5293 meanR:14.0600 R:10.0000 rate:0.0200 aloss:0.6410 eloss:2.5119 aloss2:8.6429 exploreP:0.0132
Episode:5294 meanR:14.0600 R:10.0000 rate:0.0200 aloss:0.6205 eloss:2.5165 aloss2:8.7153 exploreP:0.0132
Episode:5295 meanR:14.0600 R:8.0000 rate:0.0160 aloss:0.6506 eloss:2.3297 aloss2:8.7689 exploreP:0.0132
Episode:5296 meanR:14.0700 R:10.0000 rate:0.0200 aloss:0.6502 eloss:2.5075 aloss2:8.6408 exploreP:0.0132
Episode:5297 meanR:14.0500 R:8.0000 rate:0.0160 aloss:0.6259 eloss:2.4392 aloss2:8.6478 exploreP:0.0132
Episode:5298 meanR:14.0600 R:10.0000 rate:0.0200 aloss:0.6509 eloss:2.4370 aloss2:8.7085 exploreP:0.0132
Episode:5299 meanR:14.0600 R:10.0000 rate:0.0200 aloss:0.6

Episode:5370 meanR:9.5600 R:9.0000 rate:0.0180 aloss:0.6066 eloss:2.5859 aloss2:8.0546 exploreP:0.0130
Episode:5371 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.5977 eloss:2.6360 aloss2:7.9647 exploreP:0.0130
Episode:5372 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.5917 eloss:2.6370 aloss2:8.0180 exploreP:0.0130
Episode:5373 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6024 eloss:2.6107 aloss2:8.0730 exploreP:0.0130
Episode:5374 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.6227 eloss:2.5832 aloss2:8.0826 exploreP:0.0130
Episode:5375 meanR:9.5600 R:9.0000 rate:0.0180 aloss:0.5845 eloss:2.6286 aloss2:7.9727 exploreP:0.0130
Episode:5376 meanR:9.5600 R:9.0000 rate:0.0180 aloss:0.5558 eloss:2.5439 aloss2:8.0992 exploreP:0.0130
Episode:5377 meanR:9.5600 R:9.0000 rate:0.0180 aloss:0.5830 eloss:2.5002 aloss2:8.1028 exploreP:0.0129
Episode:5378 meanR:9.5400 R:9.0000 rate:0.0180 aloss:0.6025 eloss:2.5512 aloss2:8.0550 exploreP:0.0129
Episode:5379 meanR:9.5400 R:10.0000 rate:0.0200 aloss:0.5812 eloss:2.58

Episode:5450 meanR:9.7400 R:9.0000 rate:0.0180 aloss:0.5956 eloss:2.4654 aloss2:8.0802 exploreP:0.0127
Episode:5451 meanR:9.7300 R:8.0000 rate:0.0160 aloss:0.5968 eloss:2.4108 aloss2:8.1624 exploreP:0.0127
Episode:5452 meanR:9.7300 R:10.0000 rate:0.0200 aloss:0.5675 eloss:2.4166 aloss2:8.1210 exploreP:0.0127
Episode:5453 meanR:9.7400 R:11.0000 rate:0.0220 aloss:0.5806 eloss:2.4565 aloss2:8.1724 exploreP:0.0127
Episode:5454 meanR:9.7300 R:9.0000 rate:0.0180 aloss:0.6104 eloss:2.4406 aloss2:8.1603 exploreP:0.0127
Episode:5455 meanR:9.7100 R:8.0000 rate:0.0160 aloss:0.5733 eloss:2.4537 aloss2:8.1645 exploreP:0.0127
Episode:5456 meanR:9.7000 R:9.0000 rate:0.0180 aloss:0.5570 eloss:2.5406 aloss2:8.0665 exploreP:0.0127
Episode:5457 meanR:9.7000 R:10.0000 rate:0.0200 aloss:0.5649 eloss:2.4589 aloss2:8.1379 exploreP:0.0127
Episode:5458 meanR:9.7000 R:10.0000 rate:0.0200 aloss:0.5572 eloss:2.5177 aloss2:8.0969 exploreP:0.0127
Episode:5459 meanR:9.6800 R:8.0000 rate:0.0160 aloss:0.5443 eloss:2.4

Episode:5531 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.5608 eloss:2.6056 aloss2:8.0186 exploreP:0.0125
Episode:5532 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.5720 eloss:2.4315 aloss2:8.1220 exploreP:0.0125
Episode:5533 meanR:9.4000 R:9.0000 rate:0.0180 aloss:0.5777 eloss:2.5866 aloss2:7.9950 exploreP:0.0125
Episode:5534 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.6111 eloss:2.4877 aloss2:8.0624 exploreP:0.0125
Episode:5535 meanR:9.4200 R:9.0000 rate:0.0180 aloss:0.5827 eloss:2.5435 aloss2:7.9874 exploreP:0.0125
Episode:5536 meanR:9.4100 R:8.0000 rate:0.0160 aloss:0.5793 eloss:2.5076 aloss2:8.0113 exploreP:0.0125
Episode:5537 meanR:9.4100 R:9.0000 rate:0.0180 aloss:0.5722 eloss:2.5312 aloss2:8.0256 exploreP:0.0125
Episode:5538 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.5865 eloss:2.4981 aloss2:8.0206 exploreP:0.0125
Episode:5539 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.5647 eloss:2.4982 aloss2:8.0227 exploreP:0.0125
Episode:5540 meanR:9.3800 R:8.0000 rate:0.0160 aloss:0.5975 eloss:2.

Episode:5613 meanR:9.2500 R:10.0000 rate:0.0200 aloss:0.5801 eloss:2.4066 aloss2:8.2115 exploreP:0.0124
Episode:5614 meanR:9.2600 R:9.0000 rate:0.0180 aloss:0.5765 eloss:2.3882 aloss2:8.1855 exploreP:0.0124
Episode:5615 meanR:9.2700 R:10.0000 rate:0.0200 aloss:0.5905 eloss:2.4609 aloss2:8.1663 exploreP:0.0124
Episode:5616 meanR:9.2900 R:10.0000 rate:0.0200 aloss:0.5950 eloss:2.4138 aloss2:8.1759 exploreP:0.0124
Episode:5617 meanR:9.3000 R:10.0000 rate:0.0200 aloss:0.5740 eloss:2.4406 aloss2:8.1653 exploreP:0.0123
Episode:5618 meanR:9.3000 R:10.0000 rate:0.0200 aloss:0.5631 eloss:2.5213 aloss2:8.1836 exploreP:0.0123
Episode:5619 meanR:9.3000 R:10.0000 rate:0.0200 aloss:0.5869 eloss:2.4530 aloss2:8.2045 exploreP:0.0123
Episode:5620 meanR:9.3000 R:8.0000 rate:0.0160 aloss:0.5893 eloss:2.5035 aloss2:8.1631 exploreP:0.0123
Episode:5621 meanR:9.3100 R:10.0000 rate:0.0200 aloss:0.5911 eloss:2.4907 aloss2:8.1893 exploreP:0.0123
Episode:5622 meanR:9.3000 R:9.0000 rate:0.0180 aloss:0.5812 eloss:

Episode:5692 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.5779 eloss:2.5922 aloss2:7.9828 exploreP:0.0122
Episode:5693 meanR:9.4200 R:12.0000 rate:0.0240 aloss:0.6332 eloss:2.6081 aloss2:7.9336 exploreP:0.0122
Episode:5694 meanR:9.4300 R:10.0000 rate:0.0200 aloss:0.6088 eloss:2.6679 aloss2:7.9305 exploreP:0.0122
Episode:5695 meanR:9.4300 R:10.0000 rate:0.0200 aloss:0.6407 eloss:2.6017 aloss2:7.9732 exploreP:0.0122
Episode:5696 meanR:9.4400 R:10.0000 rate:0.0200 aloss:0.6442 eloss:2.6873 aloss2:7.9740 exploreP:0.0122
Episode:5697 meanR:9.4400 R:9.0000 rate:0.0180 aloss:0.6276 eloss:2.7025 aloss2:7.9076 exploreP:0.0122
Episode:5698 meanR:9.4400 R:10.0000 rate:0.0200 aloss:0.6426 eloss:2.5745 aloss2:8.0664 exploreP:0.0122
Episode:5699 meanR:9.4300 R:8.0000 rate:0.0160 aloss:0.6156 eloss:2.5854 aloss2:8.0825 exploreP:0.0122
Episode:5700 meanR:9.4200 R:9.0000 rate:0.0180 aloss:0.6295 eloss:2.6481 aloss2:8.0546 exploreP:0.0122
Episode:5701 meanR:9.4300 R:10.0000 rate:0.0200 aloss:0.6162 eloss:2

Episode:5773 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.6614 eloss:2.4724 aloss2:8.5106 exploreP:0.0120
Episode:5774 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.6169 eloss:2.4847 aloss2:8.4664 exploreP:0.0120
Episode:5775 meanR:10.0100 R:9.0000 rate:0.0180 aloss:0.6233 eloss:2.5621 aloss2:8.4431 exploreP:0.0120
Episode:5776 meanR:10.0000 R:9.0000 rate:0.0180 aloss:0.6716 eloss:2.5682 aloss2:8.4594 exploreP:0.0120
Episode:5777 meanR:10.0100 R:9.0000 rate:0.0180 aloss:0.6473 eloss:2.5448 aloss2:8.4734 exploreP:0.0120
Episode:5778 meanR:10.0000 R:9.0000 rate:0.0180 aloss:0.6633 eloss:2.4262 aloss2:8.5160 exploreP:0.0120
Episode:5779 meanR:9.9800 R:9.0000 rate:0.0180 aloss:0.6653 eloss:2.4262 aloss2:8.5426 exploreP:0.0120
Episode:5780 meanR:9.9900 R:10.0000 rate:0.0200 aloss:0.6452 eloss:2.4763 aloss2:8.4862 exploreP:0.0120
Episode:5781 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.6432 eloss:2.5288 aloss2:8.4078 exploreP:0.0120
Episode:5782 meanR:10.0100 R:9.0000 rate:0.0180 aloss:0.6168 e

Episode:5854 meanR:9.4500 R:8.0000 rate:0.0160 aloss:0.6217 eloss:2.4935 aloss2:8.4600 exploreP:0.0119
Episode:5855 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.6245 eloss:2.4354 aloss2:8.4805 exploreP:0.0119
Episode:5856 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.6715 eloss:2.4606 aloss2:8.4134 exploreP:0.0119
Episode:5857 meanR:9.4500 R:9.0000 rate:0.0180 aloss:0.5889 eloss:2.4908 aloss2:8.4436 exploreP:0.0119
Episode:5858 meanR:9.4700 R:11.0000 rate:0.0220 aloss:0.6134 eloss:2.4444 aloss2:8.4712 exploreP:0.0119
Episode:5859 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.6271 eloss:2.4898 aloss2:8.4543 exploreP:0.0119
Episode:5860 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.6337 eloss:2.5353 aloss2:8.4034 exploreP:0.0119
Episode:5861 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.6190 eloss:2.4595 aloss2:8.5241 exploreP:0.0119
Episode:5862 meanR:9.4800 R:9.0000 rate:0.0180 aloss:0.6083 eloss:2.4655 aloss2:8.5567 exploreP:0.0119
Episode:5863 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.6157 eloss:

Episode:5935 meanR:9.3700 R:10.0000 rate:0.0200 aloss:0.6585 eloss:2.3625 aloss2:8.4232 exploreP:0.0117
Episode:5936 meanR:9.3900 R:11.0000 rate:0.0220 aloss:0.5990 eloss:2.4703 aloss2:8.3624 exploreP:0.0117
Episode:5937 meanR:9.4900 R:19.0000 rate:0.0380 aloss:0.6384 eloss:2.4658 aloss2:8.3922 exploreP:0.0117
Episode:5938 meanR:9.4800 R:8.0000 rate:0.0160 aloss:0.6364 eloss:2.4282 aloss2:8.3445 exploreP:0.0117
Episode:5939 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.6297 eloss:2.4993 aloss2:8.4040 exploreP:0.0117
Episode:5940 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.6152 eloss:2.4615 aloss2:8.4144 exploreP:0.0117
Episode:5941 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.6036 eloss:2.5121 aloss2:8.4209 exploreP:0.0117
Episode:5942 meanR:9.5000 R:10.0000 rate:0.0200 aloss:0.5865 eloss:2.4734 aloss2:8.4107 exploreP:0.0117
Episode:5943 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.5964 eloss:2.4603 aloss2:8.3894 exploreP:0.0117
Episode:5944 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.6477 eloss:2

Episode:6015 meanR:9.6200 R:9.0000 rate:0.0180 aloss:0.6254 eloss:2.5954 aloss2:8.3261 exploreP:0.0116
Episode:6016 meanR:9.6400 R:11.0000 rate:0.0220 aloss:0.6147 eloss:2.3810 aloss2:8.3843 exploreP:0.0116
Episode:6017 meanR:9.6300 R:9.0000 rate:0.0180 aloss:0.6280 eloss:2.5429 aloss2:8.3323 exploreP:0.0116
Episode:6018 meanR:9.6200 R:9.0000 rate:0.0180 aloss:0.5770 eloss:2.5886 aloss2:8.3132 exploreP:0.0116
Episode:6019 meanR:9.6300 R:11.0000 rate:0.0220 aloss:0.6171 eloss:2.4484 aloss2:8.3734 exploreP:0.0116
Episode:6020 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.6388 eloss:2.4622 aloss2:8.3739 exploreP:0.0116
Episode:6021 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.6217 eloss:2.4211 aloss2:8.4333 exploreP:0.0116
Episode:6022 meanR:9.6000 R:9.0000 rate:0.0180 aloss:0.5911 eloss:2.4531 aloss2:8.3809 exploreP:0.0116
Episode:6023 meanR:9.5900 R:9.0000 rate:0.0180 aloss:0.6208 eloss:2.5000 aloss2:8.4766 exploreP:0.0116
Episode:6024 meanR:9.5900 R:8.0000 rate:0.0160 aloss:0.6099 eloss:2.4

Episode:6096 meanR:10.1400 R:10.0000 rate:0.0200 aloss:0.6243 eloss:2.4928 aloss2:8.3246 exploreP:0.0115
Episode:6097 meanR:10.1400 R:8.0000 rate:0.0160 aloss:0.6186 eloss:2.4010 aloss2:8.4119 exploreP:0.0115
Episode:6098 meanR:10.1500 R:10.0000 rate:0.0200 aloss:0.6336 eloss:2.5051 aloss2:8.3282 exploreP:0.0115
Episode:6099 meanR:10.1600 R:10.0000 rate:0.0200 aloss:0.6139 eloss:2.4942 aloss2:8.3205 exploreP:0.0115
Episode:6100 meanR:10.1600 R:9.0000 rate:0.0180 aloss:0.6106 eloss:2.4462 aloss2:8.3605 exploreP:0.0115
Episode:6101 meanR:10.1500 R:9.0000 rate:0.0180 aloss:0.5974 eloss:2.4285 aloss2:8.3806 exploreP:0.0115
Episode:6102 meanR:10.1600 R:10.0000 rate:0.0200 aloss:0.6394 eloss:2.4300 aloss2:8.4504 exploreP:0.0115
Episode:6103 meanR:10.1700 R:9.0000 rate:0.0180 aloss:0.5721 eloss:2.4967 aloss2:8.4108 exploreP:0.0115
Episode:6104 meanR:10.1600 R:10.0000 rate:0.0200 aloss:0.6147 eloss:2.4970 aloss2:8.3929 exploreP:0.0115
Episode:6105 meanR:10.1700 R:10.0000 rate:0.0200 aloss:0.58

Episode:6175 meanR:9.9900 R:11.0000 rate:0.0220 aloss:0.6076 eloss:2.4708 aloss2:8.3795 exploreP:0.0114
Episode:6176 meanR:9.9900 R:10.0000 rate:0.0200 aloss:0.5893 eloss:2.4006 aloss2:8.4371 exploreP:0.0114
Episode:6177 meanR:9.9900 R:10.0000 rate:0.0200 aloss:0.6055 eloss:2.4018 aloss2:8.4265 exploreP:0.0114
Episode:6178 meanR:9.9900 R:9.0000 rate:0.0180 aloss:0.5979 eloss:2.5016 aloss2:8.4172 exploreP:0.0114
Episode:6179 meanR:10.0000 R:10.0000 rate:0.0200 aloss:0.5901 eloss:2.4243 aloss2:8.4593 exploreP:0.0114
Episode:6180 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.6239 eloss:2.5289 aloss2:8.4076 exploreP:0.0114
Episode:6181 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.5982 eloss:2.5293 aloss2:8.4157 exploreP:0.0114
Episode:6182 meanR:10.0100 R:10.0000 rate:0.0200 aloss:0.6084 eloss:2.4609 aloss2:8.4132 exploreP:0.0114
Episode:6183 meanR:10.0100 R:9.0000 rate:0.0180 aloss:0.6124 eloss:2.4234 aloss2:8.4789 exploreP:0.0114
Episode:6184 meanR:10.0200 R:10.0000 rate:0.0200 aloss:0.6337

Episode:6256 meanR:10.0600 R:9.0000 rate:0.0180 aloss:0.6065 eloss:2.3818 aloss2:8.5102 exploreP:0.0113
Episode:6257 meanR:10.0700 R:10.0000 rate:0.0200 aloss:0.6104 eloss:2.4589 aloss2:8.4127 exploreP:0.0113
Episode:6258 meanR:9.9900 R:10.0000 rate:0.0200 aloss:0.6001 eloss:2.4167 aloss2:8.3608 exploreP:0.0113
Episode:6259 meanR:9.9000 R:9.0000 rate:0.0180 aloss:0.5974 eloss:2.4433 aloss2:8.4076 exploreP:0.0113
Episode:6260 meanR:9.8900 R:9.0000 rate:0.0180 aloss:0.5783 eloss:2.5366 aloss2:8.3241 exploreP:0.0113
Episode:6261 meanR:9.8900 R:9.0000 rate:0.0180 aloss:0.6114 eloss:2.4582 aloss2:8.3459 exploreP:0.0112
Episode:6262 meanR:9.8900 R:10.0000 rate:0.0200 aloss:0.5885 eloss:2.4697 aloss2:8.4176 exploreP:0.0112
Episode:6263 meanR:9.9000 R:10.0000 rate:0.0200 aloss:0.5898 eloss:2.5205 aloss2:8.3269 exploreP:0.0112
Episode:6264 meanR:9.9000 R:9.0000 rate:0.0180 aloss:0.5729 eloss:2.4394 aloss2:8.3995 exploreP:0.0112
Episode:6265 meanR:9.9000 R:10.0000 rate:0.0200 aloss:0.5851 eloss:

Episode:6337 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.6071 eloss:2.4427 aloss2:8.4092 exploreP:0.0112
Episode:6338 meanR:9.4900 R:10.0000 rate:0.0200 aloss:0.5814 eloss:2.3869 aloss2:8.4870 exploreP:0.0112
Episode:6339 meanR:9.4900 R:9.0000 rate:0.0180 aloss:0.5881 eloss:2.5183 aloss2:8.3247 exploreP:0.0112
Episode:6340 meanR:9.4800 R:8.0000 rate:0.0160 aloss:0.6390 eloss:2.4641 aloss2:8.4597 exploreP:0.0112
Episode:6341 meanR:9.4800 R:10.0000 rate:0.0200 aloss:0.5901 eloss:2.5493 aloss2:8.3730 exploreP:0.0112
Episode:6342 meanR:9.4800 R:9.0000 rate:0.0180 aloss:0.5931 eloss:2.5454 aloss2:8.3131 exploreP:0.0112
Episode:6343 meanR:9.4600 R:8.0000 rate:0.0160 aloss:0.6011 eloss:2.4018 aloss2:8.4864 exploreP:0.0112
Episode:6344 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.5778 eloss:2.5123 aloss2:8.4291 exploreP:0.0112
Episode:6345 meanR:9.4400 R:8.0000 rate:0.0160 aloss:0.5627 eloss:2.5089 aloss2:8.4326 exploreP:0.0112
Episode:6346 meanR:9.4400 R:9.0000 rate:0.0180 aloss:0.5601 eloss:2.47

Episode:6417 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.5789 eloss:2.5489 aloss2:8.3243 exploreP:0.0111
Episode:6418 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.5966 eloss:2.5385 aloss2:8.3234 exploreP:0.0111
Episode:6419 meanR:9.4100 R:8.0000 rate:0.0160 aloss:0.5904 eloss:2.5063 aloss2:8.3262 exploreP:0.0111
Episode:6420 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.5995 eloss:2.4783 aloss2:8.2883 exploreP:0.0111
Episode:6421 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.5872 eloss:2.5633 aloss2:8.2654 exploreP:0.0111
Episode:6422 meanR:9.4400 R:11.0000 rate:0.0220 aloss:0.5851 eloss:2.5314 aloss2:8.2295 exploreP:0.0111
Episode:6423 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.5856 eloss:2.4864 aloss2:8.2375 exploreP:0.0111
Episode:6424 meanR:9.4400 R:8.0000 rate:0.0160 aloss:0.6099 eloss:2.4076 aloss2:8.3614 exploreP:0.0111
Episode:6425 meanR:9.4300 R:9.0000 rate:0.0180 aloss:0.5886 eloss:2.4480 aloss2:8.2845 exploreP:0.0111
Episode:6426 meanR:9.4200 R:9.0000 rate:0.0180 aloss:0.6074 eloss:2

Episode:6497 meanR:9.6200 R:9.0000 rate:0.0180 aloss:0.6201 eloss:2.4977 aloss2:8.4415 exploreP:0.0110
Episode:6498 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.5932 eloss:2.4038 aloss2:8.4761 exploreP:0.0110
Episode:6499 meanR:9.6500 R:10.0000 rate:0.0200 aloss:0.5834 eloss:2.5447 aloss2:8.3322 exploreP:0.0110
Episode:6500 meanR:9.6600 R:11.0000 rate:0.0220 aloss:0.5669 eloss:2.4738 aloss2:8.4080 exploreP:0.0110
Episode:6501 meanR:9.9300 R:35.0000 rate:0.0700 aloss:0.5849 eloss:2.4934 aloss2:8.3884 exploreP:0.0110
Episode:6502 meanR:9.9000 R:8.0000 rate:0.0160 aloss:0.5712 eloss:2.4643 aloss2:8.3804 exploreP:0.0110
Episode:6503 meanR:10.1600 R:34.0000 rate:0.0680 aloss:0.5821 eloss:2.5030 aloss2:8.3827 exploreP:0.0110
Episode:6504 meanR:10.1400 R:9.0000 rate:0.0180 aloss:0.5942 eloss:2.4698 aloss2:8.2923 exploreP:0.0110
Episode:6505 meanR:10.1400 R:10.0000 rate:0.0200 aloss:0.5777 eloss:2.4762 aloss2:8.3674 exploreP:0.0110
Episode:6506 meanR:10.1300 R:9.0000 rate:0.0180 aloss:0.6182 elo

Episode:6578 meanR:10.7000 R:10.0000 rate:0.0200 aloss:0.5891 eloss:2.5093 aloss2:8.2905 exploreP:0.0109
Episode:6579 meanR:10.7000 R:10.0000 rate:0.0200 aloss:0.5887 eloss:2.4160 aloss2:8.3458 exploreP:0.0109
Episode:6580 meanR:10.6900 R:9.0000 rate:0.0180 aloss:0.5772 eloss:2.4883 aloss2:8.2954 exploreP:0.0109
Episode:6581 meanR:10.7000 R:10.0000 rate:0.0200 aloss:0.5646 eloss:2.5255 aloss2:8.2773 exploreP:0.0109
Episode:6582 meanR:10.7200 R:10.0000 rate:0.0200 aloss:0.5563 eloss:2.4492 aloss2:8.4235 exploreP:0.0109
Episode:6583 meanR:10.7300 R:9.0000 rate:0.0180 aloss:0.5626 eloss:2.4644 aloss2:8.3130 exploreP:0.0109
Episode:6584 meanR:10.7200 R:9.0000 rate:0.0180 aloss:0.5900 eloss:2.4204 aloss2:8.4025 exploreP:0.0109
Episode:6585 meanR:10.7100 R:10.0000 rate:0.0200 aloss:0.5973 eloss:2.4499 aloss2:8.3021 exploreP:0.0109
Episode:6586 meanR:10.7100 R:9.0000 rate:0.0180 aloss:0.5605 eloss:2.5099 aloss2:8.3385 exploreP:0.0109
Episode:6587 meanR:10.7100 R:9.0000 rate:0.0180 aloss:0.574

Episode:6657 meanR:9.9500 R:9.0000 rate:0.0180 aloss:0.5710 eloss:2.4649 aloss2:8.4729 exploreP:0.0108
Episode:6658 meanR:9.9500 R:10.0000 rate:0.0200 aloss:0.5408 eloss:2.5225 aloss2:8.3819 exploreP:0.0108
Episode:6659 meanR:9.9500 R:8.0000 rate:0.0160 aloss:0.5518 eloss:2.4447 aloss2:8.4700 exploreP:0.0108
Episode:6660 meanR:9.9600 R:9.0000 rate:0.0180 aloss:0.5792 eloss:2.5307 aloss2:8.3582 exploreP:0.0108
Episode:6661 meanR:9.9600 R:10.0000 rate:0.0200 aloss:0.5646 eloss:2.5407 aloss2:8.3176 exploreP:0.0108
Episode:6662 meanR:9.9500 R:9.0000 rate:0.0180 aloss:0.5740 eloss:2.5223 aloss2:8.3310 exploreP:0.0108
Episode:6663 meanR:9.9500 R:10.0000 rate:0.0200 aloss:0.5337 eloss:2.5691 aloss2:8.2552 exploreP:0.0108
Episode:6664 meanR:9.9500 R:10.0000 rate:0.0200 aloss:0.5626 eloss:2.4788 aloss2:8.3428 exploreP:0.0108
Episode:6665 meanR:9.9400 R:9.0000 rate:0.0180 aloss:0.5489 eloss:2.4639 aloss2:8.4288 exploreP:0.0108
Episode:6666 meanR:9.9300 R:9.0000 rate:0.0180 aloss:0.5635 eloss:2.4

Episode:6737 meanR:10.1200 R:11.0000 rate:0.0220 aloss:0.5488 eloss:2.4400 aloss2:8.5128 exploreP:0.0108
Episode:6738 meanR:10.1200 R:10.0000 rate:0.0200 aloss:0.5467 eloss:2.6131 aloss2:8.2515 exploreP:0.0108
Episode:6739 meanR:10.1100 R:9.0000 rate:0.0180 aloss:0.5777 eloss:2.4874 aloss2:8.3769 exploreP:0.0108
Episode:6740 meanR:10.1100 R:10.0000 rate:0.0200 aloss:0.5606 eloss:2.3829 aloss2:8.4562 exploreP:0.0108
Episode:6741 meanR:10.1100 R:9.0000 rate:0.0180 aloss:0.5513 eloss:2.5291 aloss2:8.3006 exploreP:0.0108
Episode:6742 meanR:10.1200 R:10.0000 rate:0.0200 aloss:0.5375 eloss:2.4813 aloss2:8.3817 exploreP:0.0108
Episode:6743 meanR:10.1300 R:10.0000 rate:0.0200 aloss:0.5804 eloss:2.4751 aloss2:8.3219 exploreP:0.0108
Episode:6744 meanR:10.1500 R:12.0000 rate:0.0240 aloss:0.5650 eloss:2.5840 aloss2:8.2573 exploreP:0.0108
Episode:6745 meanR:10.1600 R:9.0000 rate:0.0180 aloss:0.5499 eloss:2.4721 aloss2:8.4341 exploreP:0.0108
Episode:6746 meanR:10.1700 R:10.0000 rate:0.0200 aloss:0.5

Episode:6817 meanR:10.5700 R:10.0000 rate:0.0200 aloss:0.5328 eloss:2.5035 aloss2:8.3514 exploreP:0.0107
Episode:6818 meanR:10.5800 R:10.0000 rate:0.0200 aloss:0.5286 eloss:2.4874 aloss2:8.3644 exploreP:0.0107
Episode:6819 meanR:10.5800 R:9.0000 rate:0.0180 aloss:0.5366 eloss:2.5022 aloss2:8.4092 exploreP:0.0107
Episode:6820 meanR:10.5700 R:9.0000 rate:0.0180 aloss:0.5577 eloss:2.4325 aloss2:8.3615 exploreP:0.0107
Episode:6821 meanR:10.5700 R:10.0000 rate:0.0200 aloss:0.5585 eloss:2.5117 aloss2:8.3151 exploreP:0.0107
Episode:6822 meanR:10.3100 R:10.0000 rate:0.0200 aloss:0.5396 eloss:2.5054 aloss2:8.2931 exploreP:0.0107
Episode:6823 meanR:10.3000 R:9.0000 rate:0.0180 aloss:0.5584 eloss:2.4793 aloss2:8.2963 exploreP:0.0107
Episode:6824 meanR:10.3100 R:10.0000 rate:0.0200 aloss:0.5477 eloss:2.4788 aloss2:8.3099 exploreP:0.0107
Episode:6825 meanR:10.3100 R:10.0000 rate:0.0200 aloss:0.5505 eloss:2.4580 aloss2:8.4046 exploreP:0.0107
Episode:6826 meanR:10.3100 R:9.0000 rate:0.0180 aloss:0.54

Episode:6896 meanR:10.2300 R:10.0000 rate:0.0200 aloss:0.5458 eloss:2.4662 aloss2:8.4353 exploreP:0.0107
Episode:6897 meanR:10.2400 R:10.0000 rate:0.0200 aloss:0.5485 eloss:2.4727 aloss2:8.4827 exploreP:0.0107
Episode:6898 meanR:10.2500 R:10.0000 rate:0.0200 aloss:0.5647 eloss:2.5486 aloss2:8.4097 exploreP:0.0107
Episode:6899 meanR:10.2600 R:10.0000 rate:0.0200 aloss:0.5759 eloss:2.5058 aloss2:8.3765 exploreP:0.0107
Episode:6900 meanR:10.2700 R:10.0000 rate:0.0200 aloss:0.5520 eloss:2.4315 aloss2:8.5028 exploreP:0.0107
Episode:6901 meanR:10.2800 R:9.0000 rate:0.0180 aloss:0.5686 eloss:2.4090 aloss2:8.4926 exploreP:0.0107
Episode:6902 meanR:10.2800 R:9.0000 rate:0.0180 aloss:0.5630 eloss:2.4260 aloss2:8.4205 exploreP:0.0107
Episode:6903 meanR:10.5400 R:35.0000 rate:0.0700 aloss:0.5454 eloss:2.5190 aloss2:8.3809 exploreP:0.0107
Episode:6904 meanR:10.5600 R:11.0000 rate:0.0220 aloss:0.5513 eloss:2.4980 aloss2:8.3987 exploreP:0.0107
Episode:6905 meanR:10.5800 R:10.0000 rate:0.0200 aloss:0.

Episode:6976 meanR:10.2400 R:11.0000 rate:0.0220 aloss:0.5537 eloss:2.4832 aloss2:8.3609 exploreP:0.0106
Episode:6977 meanR:10.2500 R:9.0000 rate:0.0180 aloss:0.5443 eloss:2.4775 aloss2:8.5672 exploreP:0.0106
Episode:6978 meanR:10.2400 R:8.0000 rate:0.0160 aloss:0.5401 eloss:2.5288 aloss2:8.3164 exploreP:0.0106
Episode:6979 meanR:10.2300 R:9.0000 rate:0.0180 aloss:0.5390 eloss:2.6007 aloss2:8.2813 exploreP:0.0106
Episode:6980 meanR:10.2200 R:9.0000 rate:0.0180 aloss:0.5524 eloss:2.4421 aloss2:8.4530 exploreP:0.0106
Episode:6981 meanR:10.2100 R:8.0000 rate:0.0160 aloss:0.5439 eloss:2.4504 aloss2:8.4190 exploreP:0.0106
Episode:6982 meanR:10.2100 R:10.0000 rate:0.0200 aloss:0.5535 eloss:2.5119 aloss2:8.3592 exploreP:0.0106
Episode:6983 meanR:10.2100 R:10.0000 rate:0.0200 aloss:0.5454 eloss:2.4916 aloss2:8.3971 exploreP:0.0106
Episode:6984 meanR:10.2000 R:10.0000 rate:0.0200 aloss:0.5401 eloss:2.4582 aloss2:8.4803 exploreP:0.0106
Episode:6985 meanR:10.2100 R:10.0000 rate:0.0200 aloss:0.531

Episode:7056 meanR:9.9300 R:9.0000 rate:0.0180 aloss:0.5555 eloss:2.4689 aloss2:8.3511 exploreP:0.0106
Episode:7057 meanR:9.9300 R:10.0000 rate:0.0200 aloss:0.5429 eloss:2.4114 aloss2:8.3850 exploreP:0.0106
Episode:7058 meanR:9.9100 R:8.0000 rate:0.0160 aloss:0.5117 eloss:2.4406 aloss2:8.4578 exploreP:0.0106
Episode:7059 meanR:9.9000 R:8.0000 rate:0.0160 aloss:0.5801 eloss:2.3413 aloss2:8.4657 exploreP:0.0106
Episode:7060 meanR:9.9200 R:10.0000 rate:0.0200 aloss:0.5638 eloss:2.4895 aloss2:8.2849 exploreP:0.0106
Episode:7061 meanR:9.9300 R:11.0000 rate:0.0220 aloss:0.5439 eloss:2.5205 aloss2:8.2925 exploreP:0.0106
Episode:7062 meanR:9.9300 R:10.0000 rate:0.0200 aloss:0.5241 eloss:2.5213 aloss2:8.3676 exploreP:0.0106
Episode:7063 meanR:9.6500 R:10.0000 rate:0.0200 aloss:0.5671 eloss:2.4894 aloss2:8.3572 exploreP:0.0106
Episode:7064 meanR:9.6400 R:8.0000 rate:0.0160 aloss:0.5091 eloss:2.5823 aloss2:8.3501 exploreP:0.0106
Episode:7065 meanR:9.6500 R:10.0000 rate:0.0200 aloss:0.5187 eloss:2

Episode:7136 meanR:9.7000 R:10.0000 rate:0.0200 aloss:0.5404 eloss:2.4652 aloss2:8.3169 exploreP:0.0105
Episode:7137 meanR:9.7100 R:10.0000 rate:0.0200 aloss:0.5122 eloss:2.4539 aloss2:8.4022 exploreP:0.0105
Episode:7138 meanR:9.7200 R:10.0000 rate:0.0200 aloss:0.5374 eloss:2.5368 aloss2:8.2610 exploreP:0.0105
Episode:7139 meanR:9.7100 R:9.0000 rate:0.0180 aloss:0.5158 eloss:2.4901 aloss2:8.3042 exploreP:0.0105
Episode:7140 meanR:9.7400 R:12.0000 rate:0.0240 aloss:0.5331 eloss:2.4876 aloss2:8.3305 exploreP:0.0105
Episode:7141 meanR:9.7300 R:9.0000 rate:0.0180 aloss:0.5480 eloss:2.4513 aloss2:8.3770 exploreP:0.0105
Episode:7142 meanR:9.7400 R:10.0000 rate:0.0200 aloss:0.5071 eloss:2.5331 aloss2:8.2550 exploreP:0.0105
Episode:7143 meanR:9.7400 R:9.0000 rate:0.0180 aloss:0.5178 eloss:2.4264 aloss2:8.4254 exploreP:0.0105
Episode:7144 meanR:9.7300 R:9.0000 rate:0.0180 aloss:0.5609 eloss:2.5076 aloss2:8.2432 exploreP:0.0105
Episode:7145 meanR:9.7500 R:11.0000 rate:0.0220 aloss:0.5388 eloss:2

Episode:7218 meanR:9.8000 R:9.0000 rate:0.0180 aloss:0.5338 eloss:2.4658 aloss2:8.3233 exploreP:0.0105
Episode:7219 meanR:9.7900 R:9.0000 rate:0.0180 aloss:0.5668 eloss:2.4813 aloss2:8.3011 exploreP:0.0105
Episode:7220 meanR:9.8100 R:11.0000 rate:0.0220 aloss:0.5419 eloss:2.4391 aloss2:8.3997 exploreP:0.0105
Episode:7221 meanR:9.8100 R:9.0000 rate:0.0180 aloss:0.5202 eloss:2.4294 aloss2:8.3794 exploreP:0.0105
Episode:7222 meanR:9.7100 R:9.0000 rate:0.0180 aloss:0.5133 eloss:2.5255 aloss2:8.3323 exploreP:0.0105
Episode:7223 meanR:9.7100 R:9.0000 rate:0.0180 aloss:0.5598 eloss:2.5638 aloss2:8.2280 exploreP:0.0105
Episode:7224 meanR:9.6900 R:8.0000 rate:0.0160 aloss:0.5509 eloss:2.5684 aloss2:8.3409 exploreP:0.0105
Episode:7225 meanR:9.6900 R:9.0000 rate:0.0180 aloss:0.5674 eloss:2.5240 aloss2:8.2693 exploreP:0.0105
Episode:7226 meanR:9.6800 R:9.0000 rate:0.0180 aloss:0.5402 eloss:2.4995 aloss2:8.3060 exploreP:0.0105
Episode:7227 meanR:9.6900 R:10.0000 rate:0.0200 aloss:0.5204 eloss:2.538

Episode:7298 meanR:9.9700 R:9.0000 rate:0.0180 aloss:0.5373 eloss:2.4963 aloss2:8.3277 exploreP:0.0104
Episode:7299 meanR:9.9700 R:10.0000 rate:0.0200 aloss:0.5613 eloss:2.4904 aloss2:8.3236 exploreP:0.0104
Episode:7300 meanR:9.9600 R:8.0000 rate:0.0160 aloss:0.5106 eloss:2.4652 aloss2:8.4231 exploreP:0.0104
Episode:7301 meanR:9.9700 R:10.0000 rate:0.0200 aloss:0.5336 eloss:2.3570 aloss2:8.5702 exploreP:0.0104
Episode:7302 meanR:9.9700 R:9.0000 rate:0.0180 aloss:0.5511 eloss:2.4788 aloss2:8.4849 exploreP:0.0104
Episode:7303 meanR:9.9700 R:9.0000 rate:0.0180 aloss:0.5441 eloss:2.4809 aloss2:8.6023 exploreP:0.0104
Episode:7304 meanR:9.9600 R:9.0000 rate:0.0180 aloss:0.5380 eloss:2.5131 aloss2:8.4741 exploreP:0.0104
Episode:7305 meanR:9.9700 R:10.0000 rate:0.0200 aloss:0.5534 eloss:2.4585 aloss2:8.5169 exploreP:0.0104
Episode:7306 meanR:9.9500 R:10.0000 rate:0.0200 aloss:0.5323 eloss:2.4975 aloss2:8.4688 exploreP:0.0104
Episode:7307 meanR:9.9400 R:9.0000 rate:0.0180 aloss:0.5349 eloss:2.5

Episode:7378 meanR:9.6300 R:9.0000 rate:0.0180 aloss:0.5284 eloss:2.4727 aloss2:8.4969 exploreP:0.0104
Episode:7379 meanR:9.6200 R:9.0000 rate:0.0180 aloss:0.5150 eloss:2.4545 aloss2:8.5439 exploreP:0.0104
Episode:7380 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.5306 eloss:2.4224 aloss2:8.5487 exploreP:0.0104
Episode:7381 meanR:9.6300 R:10.0000 rate:0.0200 aloss:0.5027 eloss:2.5408 aloss2:8.5124 exploreP:0.0104
Episode:7382 meanR:9.6400 R:10.0000 rate:0.0200 aloss:0.5235 eloss:2.5722 aloss2:8.4236 exploreP:0.0104
Episode:7383 meanR:9.6400 R:9.0000 rate:0.0180 aloss:0.5371 eloss:2.4732 aloss2:8.4468 exploreP:0.0104
Episode:7384 meanR:9.6600 R:10.0000 rate:0.0200 aloss:0.4990 eloss:2.4969 aloss2:8.4842 exploreP:0.0104
Episode:7385 meanR:9.6700 R:9.0000 rate:0.0180 aloss:0.5301 eloss:2.4929 aloss2:8.4328 exploreP:0.0104
Episode:7386 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.5443 eloss:2.4695 aloss2:8.4554 exploreP:0.0104
Episode:7387 meanR:9.6700 R:10.0000 rate:0.0200 aloss:0.5419 eloss:2

Episode:7459 meanR:9.5600 R:10.0000 rate:0.0200 aloss:0.4606 eloss:2.5320 aloss2:8.0702 exploreP:0.0104
Episode:7460 meanR:9.5500 R:8.0000 rate:0.0160 aloss:0.4534 eloss:2.4893 aloss2:8.1198 exploreP:0.0104
Episode:7461 meanR:9.5500 R:10.0000 rate:0.0200 aloss:0.4276 eloss:2.5326 aloss2:8.0840 exploreP:0.0104
Episode:7462 meanR:9.5500 R:9.0000 rate:0.0180 aloss:0.4528 eloss:2.5578 aloss2:8.0591 exploreP:0.0104
Episode:7463 meanR:9.5500 R:9.0000 rate:0.0180 aloss:0.4451 eloss:2.5484 aloss2:8.0684 exploreP:0.0104
Episode:7464 meanR:9.5200 R:9.0000 rate:0.0180 aloss:0.4464 eloss:2.5045 aloss2:8.1538 exploreP:0.0104
Episode:7465 meanR:9.5200 R:9.0000 rate:0.0180 aloss:0.4493 eloss:2.5341 aloss2:8.0703 exploreP:0.0104
Episode:7466 meanR:9.5200 R:9.0000 rate:0.0180 aloss:0.4171 eloss:2.5589 aloss2:8.1365 exploreP:0.0104
Episode:7467 meanR:9.5000 R:9.0000 rate:0.0180 aloss:0.4486 eloss:2.5810 aloss2:8.0205 exploreP:0.0104
Episode:7468 meanR:9.5100 R:10.0000 rate:0.0200 aloss:0.4329 eloss:2.51

Episode:7539 meanR:9.3900 R:8.0000 rate:0.0160 aloss:0.5050 eloss:2.5887 aloss2:8.2566 exploreP:0.0104
Episode:7540 meanR:9.3900 R:9.0000 rate:0.0180 aloss:0.4849 eloss:2.5970 aloss2:8.2026 exploreP:0.0104
Episode:7541 meanR:9.4000 R:10.0000 rate:0.0200 aloss:0.5037 eloss:2.6196 aloss2:8.1519 exploreP:0.0104
Episode:7542 meanR:9.4100 R:10.0000 rate:0.0200 aloss:0.4984 eloss:2.6289 aloss2:8.1433 exploreP:0.0104
Episode:7543 meanR:9.4200 R:9.0000 rate:0.0180 aloss:0.4830 eloss:2.5255 aloss2:8.3014 exploreP:0.0104
Episode:7544 meanR:9.4100 R:9.0000 rate:0.0180 aloss:0.5357 eloss:2.5644 aloss2:8.1848 exploreP:0.0104
Episode:7545 meanR:9.4200 R:10.0000 rate:0.0200 aloss:0.5166 eloss:2.6399 aloss2:8.1414 exploreP:0.0104
Episode:7546 meanR:9.4500 R:11.0000 rate:0.0220 aloss:0.4761 eloss:2.6438 aloss2:8.1161 exploreP:0.0104
Episode:7547 meanR:9.4500 R:9.0000 rate:0.0180 aloss:0.4727 eloss:2.6069 aloss2:8.2639 exploreP:0.0104
Episode:7548 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.5080 eloss:2.

Episode:7619 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.4548 eloss:2.5388 aloss2:8.4699 exploreP:0.0103
Episode:7620 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.4495 eloss:2.4983 aloss2:8.4652 exploreP:0.0103
Episode:7621 meanR:9.4600 R:9.0000 rate:0.0180 aloss:0.4413 eloss:2.5391 aloss2:8.4618 exploreP:0.0103
Episode:7622 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.4816 eloss:2.5590 aloss2:8.3302 exploreP:0.0103
Episode:7623 meanR:9.4600 R:10.0000 rate:0.0200 aloss:0.4702 eloss:2.5768 aloss2:8.2942 exploreP:0.0103
Episode:7624 meanR:9.4500 R:8.0000 rate:0.0160 aloss:0.4883 eloss:2.5050 aloss2:8.4791 exploreP:0.0103
Episode:7625 meanR:9.4400 R:9.0000 rate:0.0180 aloss:0.4658 eloss:2.5456 aloss2:8.4347 exploreP:0.0103
Episode:7626 meanR:9.4400 R:10.0000 rate:0.0200 aloss:0.4681 eloss:2.6066 aloss2:8.2414 exploreP:0.0103
Episode:7627 meanR:9.4500 R:10.0000 rate:0.0200 aloss:0.4655 eloss:2.5490 aloss2:8.3653 exploreP:0.0103
Episode:7628 meanR:9.4300 R:8.0000 rate:0.0160 aloss:0.4972 eloss:2.5

Episode:7700 meanR:10.2100 R:8.0000 rate:0.0160 aloss:0.4947 eloss:2.6041 aloss2:8.1175 exploreP:0.0103
Episode:7701 meanR:10.2100 R:10.0000 rate:0.0200 aloss:0.5083 eloss:2.5536 aloss2:8.3048 exploreP:0.0103
Episode:7702 meanR:10.2200 R:10.0000 rate:0.0200 aloss:0.5054 eloss:2.6637 aloss2:8.0980 exploreP:0.0103
Episode:7703 meanR:10.2100 R:8.0000 rate:0.0160 aloss:0.4876 eloss:2.5818 aloss2:8.2339 exploreP:0.0103
Episode:7704 meanR:10.2200 R:10.0000 rate:0.0200 aloss:0.4679 eloss:2.5839 aloss2:8.2324 exploreP:0.0103
Episode:7705 meanR:10.2400 R:12.0000 rate:0.0240 aloss:0.4770 eloss:2.6162 aloss2:8.2110 exploreP:0.0103
Episode:7706 meanR:10.2400 R:9.0000 rate:0.0180 aloss:0.4804 eloss:2.5827 aloss2:8.2308 exploreP:0.0103
Episode:7707 meanR:10.2500 R:11.0000 rate:0.0220 aloss:0.4893 eloss:2.5891 aloss2:8.2047 exploreP:0.0103
Episode:7708 meanR:10.2600 R:10.0000 rate:0.0200 aloss:0.4836 eloss:2.6442 aloss2:8.2133 exploreP:0.0103
Episode:7709 meanR:10.2700 R:10.0000 rate:0.0200 aloss:0.4

Episode:7780 meanR:10.3200 R:9.0000 rate:0.0180 aloss:0.4836 eloss:2.6368 aloss2:8.0244 exploreP:0.0103
Episode:7781 meanR:10.3200 R:9.0000 rate:0.0180 aloss:0.4557 eloss:2.6619 aloss2:7.9723 exploreP:0.0103
Episode:7782 meanR:10.3300 R:10.0000 rate:0.0200 aloss:0.4774 eloss:2.6175 aloss2:8.0658 exploreP:0.0103
Episode:7783 meanR:10.3200 R:10.0000 rate:0.0200 aloss:0.5004 eloss:2.5741 aloss2:8.0681 exploreP:0.0103
Episode:7784 meanR:10.3000 R:8.0000 rate:0.0160 aloss:0.5078 eloss:2.4890 aloss2:8.1794 exploreP:0.0103
Episode:7785 meanR:10.3100 R:10.0000 rate:0.0200 aloss:0.4894 eloss:2.5716 aloss2:8.0947 exploreP:0.0103
Episode:7786 meanR:10.3000 R:9.0000 rate:0.0180 aloss:0.4921 eloss:2.6311 aloss2:8.0835 exploreP:0.0103
Episode:7787 meanR:10.3000 R:9.0000 rate:0.0180 aloss:0.4875 eloss:2.6266 aloss2:8.0159 exploreP:0.0103
Episode:7788 meanR:10.2900 R:9.0000 rate:0.0180 aloss:0.4703 eloss:2.5962 aloss2:8.0951 exploreP:0.0103
Episode:7789 meanR:10.2900 R:10.0000 rate:0.0200 aloss:0.4820

Episode:7860 meanR:11.2900 R:10.0000 rate:0.0200 aloss:0.5064 eloss:2.5691 aloss2:8.0901 exploreP:0.0103
Episode:7861 meanR:11.2900 R:9.0000 rate:0.0180 aloss:0.4685 eloss:2.5992 aloss2:8.0359 exploreP:0.0103
Episode:7862 meanR:11.0800 R:9.0000 rate:0.0180 aloss:0.4716 eloss:2.6244 aloss2:8.0618 exploreP:0.0103
Episode:7863 meanR:11.0800 R:9.0000 rate:0.0180 aloss:0.4670 eloss:2.6359 aloss2:7.9964 exploreP:0.0103
Episode:7864 meanR:10.8400 R:10.0000 rate:0.0200 aloss:0.4909 eloss:2.5543 aloss2:8.0253 exploreP:0.0103
Episode:7865 meanR:10.8400 R:10.0000 rate:0.0200 aloss:0.4808 eloss:2.6720 aloss2:8.0093 exploreP:0.0103
Episode:7866 meanR:10.8400 R:9.0000 rate:0.0180 aloss:0.4551 eloss:2.6440 aloss2:7.9831 exploreP:0.0103
Episode:7867 meanR:10.8600 R:10.0000 rate:0.0200 aloss:0.4603 eloss:2.6023 aloss2:8.0351 exploreP:0.0103
Episode:7868 meanR:10.8600 R:10.0000 rate:0.0200 aloss:0.4666 eloss:2.5533 aloss2:8.0407 exploreP:0.0103
Episode:7869 meanR:10.8500 R:9.0000 rate:0.0180 aloss:0.494

Episode:7940 meanR:10.2400 R:34.0000 rate:0.0680 aloss:0.4867 eloss:2.6036 aloss2:7.9904 exploreP:0.0102
Episode:7941 meanR:10.2300 R:9.0000 rate:0.0180 aloss:0.5002 eloss:2.6052 aloss2:7.9335 exploreP:0.0102
Episode:7942 meanR:10.4900 R:35.0000 rate:0.0700 aloss:0.4784 eloss:2.5926 aloss2:8.0190 exploreP:0.0102
Episode:7943 meanR:10.7100 R:32.0000 rate:0.0640 aloss:0.4830 eloss:2.5680 aloss2:8.1229 exploreP:0.0102
Episode:7944 meanR:10.7100 R:10.0000 rate:0.0200 aloss:0.4483 eloss:2.5882 aloss2:8.0449 exploreP:0.0102
Episode:7945 meanR:10.7100 R:9.0000 rate:0.0180 aloss:0.4668 eloss:2.6189 aloss2:8.1380 exploreP:0.0102
Episode:7946 meanR:10.7100 R:10.0000 rate:0.0200 aloss:0.4496 eloss:2.6321 aloss2:8.0774 exploreP:0.0102
Episode:7947 meanR:10.7200 R:10.0000 rate:0.0200 aloss:0.4972 eloss:2.5497 aloss2:8.0537 exploreP:0.0102
Episode:7948 meanR:10.7300 R:11.0000 rate:0.0220 aloss:0.4699 eloss:2.6264 aloss2:8.0467 exploreP:0.0102
Episode:7949 meanR:10.7400 R:10.0000 rate:0.0200 aloss:0.

Episode:8019 meanR:11.7300 R:10.0000 rate:0.0200 aloss:0.4622 eloss:2.6259 aloss2:8.1055 exploreP:0.0102
Episode:8020 meanR:11.7200 R:8.0000 rate:0.0160 aloss:0.4702 eloss:2.5353 aloss2:8.2542 exploreP:0.0102
Episode:8021 meanR:11.7200 R:9.0000 rate:0.0180 aloss:0.4383 eloss:2.5353 aloss2:8.2401 exploreP:0.0102
Episode:8022 meanR:11.7200 R:10.0000 rate:0.0200 aloss:0.4488 eloss:2.5465 aloss2:8.2059 exploreP:0.0102
Episode:8023 meanR:11.7300 R:10.0000 rate:0.0200 aloss:0.4710 eloss:2.5662 aloss2:8.1504 exploreP:0.0102
Episode:8024 meanR:11.8900 R:25.0000 rate:0.0500 aloss:0.4490 eloss:2.5180 aloss2:8.2236 exploreP:0.0102
Episode:8025 meanR:11.8900 R:10.0000 rate:0.0200 aloss:0.4747 eloss:2.5389 aloss2:8.2504 exploreP:0.0102
Episode:8026 meanR:11.8900 R:9.0000 rate:0.0180 aloss:0.4564 eloss:2.5403 aloss2:8.1500 exploreP:0.0102
Episode:8027 meanR:11.8900 R:10.0000 rate:0.0200 aloss:0.4510 eloss:2.5339 aloss2:8.1491 exploreP:0.0102
Episode:8028 meanR:11.8900 R:9.0000 rate:0.0180 aloss:0.44

Episode:8099 meanR:11.1500 R:10.0000 rate:0.0200 aloss:0.4711 eloss:2.6404 aloss2:8.1894 exploreP:0.0102
Episode:8100 meanR:11.1500 R:10.0000 rate:0.0200 aloss:0.4539 eloss:2.6060 aloss2:8.2000 exploreP:0.0102
Episode:8101 meanR:11.4000 R:35.0000 rate:0.0700 aloss:0.4570 eloss:2.5456 aloss2:8.2607 exploreP:0.0102
Episode:8102 meanR:11.0900 R:9.0000 rate:0.0180 aloss:0.4569 eloss:2.6234 aloss2:8.1489 exploreP:0.0102
Episode:8103 meanR:11.0800 R:9.0000 rate:0.0180 aloss:0.4791 eloss:2.5600 aloss2:8.2219 exploreP:0.0102
Episode:8104 meanR:11.2100 R:22.0000 rate:0.0440 aloss:0.4583 eloss:2.5428 aloss2:8.2392 exploreP:0.0102
Episode:8105 meanR:11.1900 R:10.0000 rate:0.0200 aloss:0.4598 eloss:2.5769 aloss2:8.1583 exploreP:0.0102
Episode:8106 meanR:11.4500 R:36.0000 rate:0.0720 aloss:0.4555 eloss:2.5459 aloss2:8.1531 exploreP:0.0102
Episode:8107 meanR:11.4400 R:9.0000 rate:0.0180 aloss:0.4440 eloss:2.5294 aloss2:8.0997 exploreP:0.0102
Episode:8108 meanR:11.5700 R:22.0000 rate:0.0440 aloss:0.4

Episode:8178 meanR:13.6000 R:22.0000 rate:0.0440 aloss:0.5155 eloss:2.6919 aloss2:7.9899 exploreP:0.0102
Episode:8179 meanR:13.4900 R:10.0000 rate:0.0200 aloss:0.5311 eloss:2.7461 aloss2:7.9732 exploreP:0.0102
Episode:8180 meanR:13.5900 R:20.0000 rate:0.0400 aloss:0.5064 eloss:2.6701 aloss2:8.0530 exploreP:0.0102
Episode:8181 meanR:13.5800 R:9.0000 rate:0.0180 aloss:0.5236 eloss:2.6856 aloss2:7.9880 exploreP:0.0102
Episode:8182 meanR:13.5900 R:10.0000 rate:0.0200 aloss:0.5122 eloss:2.6660 aloss2:8.0408 exploreP:0.0102
Episode:8183 meanR:13.6000 R:10.0000 rate:0.0200 aloss:0.4845 eloss:2.6335 aloss2:8.1038 exploreP:0.0102
Episode:8184 meanR:13.7200 R:21.0000 rate:0.0420 aloss:0.4873 eloss:2.6274 aloss2:8.1443 exploreP:0.0102
Episode:8185 meanR:13.7400 R:10.0000 rate:0.0200 aloss:0.4948 eloss:2.6573 aloss2:8.1014 exploreP:0.0102
Episode:8186 meanR:13.5900 R:9.0000 rate:0.0180 aloss:0.4780 eloss:2.6343 aloss2:8.0807 exploreP:0.0102
Episode:8187 meanR:13.5900 R:10.0000 rate:0.0200 aloss:0.

Episode:8258 meanR:11.5800 R:10.0000 rate:0.0200 aloss:0.4483 eloss:2.5768 aloss2:7.9559 exploreP:0.0102
Episode:8259 meanR:11.5700 R:9.0000 rate:0.0180 aloss:0.4568 eloss:2.4854 aloss2:8.0631 exploreP:0.0102
Episode:8260 meanR:11.5700 R:10.0000 rate:0.0200 aloss:0.4230 eloss:2.5389 aloss2:8.0099 exploreP:0.0102
Episode:8261 meanR:11.4000 R:9.0000 rate:0.0180 aloss:0.4343 eloss:2.5500 aloss2:7.9476 exploreP:0.0102
Episode:8262 meanR:11.2500 R:9.0000 rate:0.0180 aloss:0.4343 eloss:2.5205 aloss2:7.9964 exploreP:0.0102
Episode:8263 meanR:11.2500 R:10.0000 rate:0.0200 aloss:0.4360 eloss:2.5927 aloss2:7.9229 exploreP:0.0102
Episode:8264 meanR:11.1100 R:10.0000 rate:0.0200 aloss:0.4345 eloss:2.5798 aloss2:7.9669 exploreP:0.0102
Episode:8265 meanR:10.9400 R:9.0000 rate:0.0180 aloss:0.4422 eloss:2.4934 aloss2:8.0292 exploreP:0.0102
Episode:8266 meanR:10.9300 R:9.0000 rate:0.0180 aloss:0.4408 eloss:2.5257 aloss2:7.9867 exploreP:0.0102
Episode:8267 meanR:10.9400 R:10.0000 rate:0.0200 aloss:0.419

Episode:8338 meanR:9.8400 R:8.0000 rate:0.0160 aloss:0.4827 eloss:2.4845 aloss2:8.2360 exploreP:0.0101
Episode:8339 meanR:9.8500 R:10.0000 rate:0.0200 aloss:0.4584 eloss:2.5462 aloss2:8.2519 exploreP:0.0101
Episode:8340 meanR:9.8600 R:10.0000 rate:0.0200 aloss:0.4469 eloss:2.5679 aloss2:8.2334 exploreP:0.0101
Episode:8341 meanR:9.8400 R:8.0000 rate:0.0160 aloss:0.4775 eloss:2.5221 aloss2:8.2299 exploreP:0.0101
Episode:8342 meanR:9.8600 R:10.0000 rate:0.0200 aloss:0.4407 eloss:2.5768 aloss2:8.1855 exploreP:0.0101
Episode:8343 meanR:9.8600 R:9.0000 rate:0.0180 aloss:0.4377 eloss:2.5870 aloss2:8.1659 exploreP:0.0101
Episode:8344 meanR:9.8500 R:9.0000 rate:0.0180 aloss:0.4515 eloss:2.5022 aloss2:8.2977 exploreP:0.0101
Episode:8345 meanR:9.8400 R:9.0000 rate:0.0180 aloss:0.4443 eloss:2.5779 aloss2:8.1847 exploreP:0.0101
Episode:8346 meanR:9.8300 R:9.0000 rate:0.0180 aloss:0.4373 eloss:2.6144 aloss2:8.1922 exploreP:0.0101
Episode:8347 meanR:9.8200 R:9.0000 rate:0.0180 aloss:0.4607 eloss:2.63

Episode:8419 meanR:10.4100 R:9.0000 rate:0.0180 aloss:0.4837 eloss:2.6042 aloss2:8.0854 exploreP:0.0101
Episode:8420 meanR:10.4300 R:10.0000 rate:0.0200 aloss:0.4683 eloss:2.6222 aloss2:8.0745 exploreP:0.0101
Episode:8421 meanR:10.5500 R:22.0000 rate:0.0440 aloss:0.4694 eloss:2.6704 aloss2:8.0439 exploreP:0.0101
Episode:8422 meanR:10.8100 R:35.0000 rate:0.0700 aloss:0.4771 eloss:2.6010 aloss2:8.0926 exploreP:0.0101
Episode:8423 meanR:10.8000 R:8.0000 rate:0.0160 aloss:0.4715 eloss:2.6365 aloss2:8.0228 exploreP:0.0101
Episode:8424 meanR:10.8000 R:10.0000 rate:0.0200 aloss:0.4858 eloss:2.6199 aloss2:8.0753 exploreP:0.0101
Episode:8425 meanR:10.8000 R:10.0000 rate:0.0200 aloss:0.4555 eloss:2.5375 aloss2:8.1847 exploreP:0.0101
Episode:8426 meanR:10.7800 R:8.0000 rate:0.0160 aloss:0.4716 eloss:2.6280 aloss2:8.0927 exploreP:0.0101
Episode:8427 meanR:10.7600 R:8.0000 rate:0.0160 aloss:0.4848 eloss:2.5227 aloss2:8.2548 exploreP:0.0101
Episode:8428 meanR:10.7600 R:9.0000 rate:0.0180 aloss:0.457

# Visualizing training

Below I'll plot the total rewards for each episode. I'm plotting the rolling average too, in blue.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[N:] - cumsum[:-N]) / N 

In [None]:
eps, arr = np.array(episode_rewards_list).T
smoothed_arr = running_mean(arr, 10)
plt.plot(eps[-len(smoothed_arr):], smoothed_arr)
plt.plot(eps, arr, color='grey', alpha=0.3)
plt.xlabel('Episode')
plt.ylabel('Total rewards')

In [None]:
eps, arr = np.array(rewards_list).T
smoothed_arr = running_mean(arr, 10)
plt.plot(eps[-len(smoothed_arr):], smoothed_arr)
plt.plot(eps, arr, color='grey', alpha=0.3)
plt.xlabel('Episode')
plt.ylabel('Total rewards')

In [None]:
eps, arr = np.array(aloss_list).T
smoothed_arr = running_mean(arr, 10)
plt.plot(eps[-len(smoothed_arr):], smoothed_arr)
plt.plot(eps, arr, color='grey', alpha=0.3)
plt.xlabel('Episode')
plt.ylabel('Act losses')

In [None]:
eps, arr = np.array(eloss_list).T
smoothed_arr = running_mean(arr, 10)
plt.plot(eps[-len(smoothed_arr):], smoothed_arr)
plt.plot(eps, arr, color='grey', alpha=0.3)
plt.xlabel('Episode')
plt.ylabel('Env losses')

In [None]:
eps, arr = np.array(aloss2_list).T
smoothed_arr = running_mean(arr, 10)
plt.plot(eps[-len(smoothed_arr):], smoothed_arr)
plt.plot(eps, arr, color='grey', alpha=0.3)
plt.xlabel('Episode')
plt.ylabel('Act losses 2')

## Testing

Let's checkout how our trained agent plays the game.

In [24]:
import gym
env = gym.make('CartPole-v0')
env = gym.make('CartPole-v1')

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, 'checkpoints/model.ckpt')    
    #saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    
    # Episodes/epochs
    for _ in range(10):
        state = env.reset()
        total_reward = 0

        # Steps/batches
        while True:
            env.render()
            action_logits = sess.run(model.actions_logits, feed_dict={model.states: state.reshape([1, -1])})
            action = np.argmax(action_logits)
            state, reward, done, _ = env.step(action)
            total_reward += reward
            if done:
                print('total_reward: {}'.format(total_reward))
                break
                
env.close()

INFO:tensorflow:Restoring parameters from checkpoints/model.ckpt
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
total_reward: 500.0
