CartPole-v0游戏比较简单，基本要求就是控制下面的cart移动使上面的pole保持垂直不倒。这个任务只有两个离散动作，要么向左用力，要么向右用力。而state状态就是这个cart的位置和速度， pole的角度和角速度，4维的特征。坚持到200分的奖励则为过关。

In [1]:
import gym
import tensorflow as tf
import numpy as np
import random
from collections import deque

In [2]:
# 衰减因子GAMMA
GAMMA = 0.9

# EPSILON的初始值
INITIAL_EPSILON = 0.5
# EPSILON的最终值
FINAL_EPSILON = 0.01

#经验回放表的大小
REPLAY_SIZE = 10000

# 批量梯度下降的样本数m
BATCH_SIZE = 32


# 迭代轮次
EPISODE = 3000

# Step limitation in an episode
STEP = 300

TEST = 10

In [3]:
class DQN():
  def __init__(self, env):
    # 初始化经验回放表
    self.replay_buffer = deque()
    
    # 初始化参数
    self.time_step = 0
    self.epsilon = INITIAL_EPSILON
    self.state_dim = env.observation_space.shape[0]
    self.action_dim = env.action_space.n

    self.create_Q_network()
    self.create_training_method()

    # 初始化session
    self.session = tf.InteractiveSession()
    self.session.run(tf.global_variables_initializer())

  def create_Q_network(self):
    # 初始化网络参数
    # 状态网络的参数
    W1 = self.weight_variable([self.state_dim,20])
    b1 = self.bias_variable([20])
    
    # 动作网络的参数
    W2 = self.weight_variable([20,self.action_dim])
    b2 = self.bias_variable([self.action_dim])
    
    # 输入层
    self.state_input = tf.placeholder("float",[None,self.state_dim])
    # 隐藏层
    h_layer = tf.nn.relu(tf.matmul(self.state_input,W1) + b1)
    # 输出是Q值
    self.Q_value = tf.matmul(h_layer,W2) + b2

  def create_training_method(self):
    # 对动作进行one hot编码
    self.action_input = tf.placeholder("float",[None,self.action_dim])
    # 当前目标Q值y
    self.y_input = tf.placeholder("float",[None])
    
    # 动作
    Q_action = tf.reduce_sum(tf.multiply(self.Q_value,self.action_input),reduction_indices = 1)
    
    # 代价
    self.cost = tf.reduce_mean(tf.square(self.y_input - Q_action))
    # 优化
    self.optimizer = tf.train.AdamOptimizer(0.0001).minimize(self.cost)

  def perceive(self,state,action,reward,next_state,done):
    one_hot_action = np.zeros(self.action_dim)
    one_hot_action[action] = 1
    self.replay_buffer.append((state,one_hot_action,reward,next_state,done))
    if len(self.replay_buffer) > REPLAY_SIZE:
      self.replay_buffer.popleft()

    if len(self.replay_buffer) > BATCH_SIZE:
      self.train_Q_network()

  def train_Q_network(self):
    self.time_step += 1
    # Step 1: obtain random minibatch from replay memory
    minibatch = random.sample(self.replay_buffer,BATCH_SIZE)
    state_batch = [data[0] for data in minibatch]
    action_batch = [data[1] for data in minibatch]
    reward_batch = [data[2] for data in minibatch]
    next_state_batch = [data[3] for data in minibatch]

    # Step 2: calculate y
    y_batch = []
    Q_value_batch = self.Q_value.eval(feed_dict={self.state_input:next_state_batch})
    for i in range(0,BATCH_SIZE):
      done = minibatch[i][4]
      if done:
        y_batch.append(reward_batch[i])
      else :
        y_batch.append(reward_batch[i] + GAMMA * np.max(Q_value_batch[i]))

    self.optimizer.run(feed_dict={
      self.y_input:y_batch,
      self.action_input:action_batch,
      self.state_input:state_batch
      })

  def egreedy_action(self,state):
    Q_value = self.Q_value.eval(feed_dict = {
      self.state_input:[state]
      })[0]
    if random.random() <= self.epsilon:
        self.epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / 10000
        return random.randint(0,self.action_dim - 1)
    else:
        self.epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / 10000
        return np.argmax(Q_value)

  def action(self,state):
    return np.argmax(self.Q_value.eval(feed_dict = {
      self.state_input:[state]
      })[0])

  def weight_variable(self,shape):
    initial = tf.truncated_normal(shape)
    return tf.Variable(initial)

  def bias_variable(self,shape):
    initial = tf.constant(0.01, shape = shape)
    return tf.Variable(initial)

In [4]:
import gym
import matplotlib.pyplot as plt
%matplotlib inline
# 通过 gym.make 获得 CartPole 游戏的环境
env = gym.make('CartPole-v0')

#初始化DQN网络
agent = DQN(env)

for episode in range(EPISODE):
    # a) 初始化游戏
    state = env.reset()
    # 训练
    for step in range(STEP):
        # b) 贪婪法选择动作
        action = agent.egreedy_action(state)
        next_state,reward,done,_ = env.step(action)
        reward = -1 if done else 0.1
        agent.perceive(state,action,reward,next_state,done)
        state = next_state
        if done:
            break
    # 每100个episodes测试一次
    if episode % 100 == 0:
        total_reward = 0
        for i in range(TEST):
            state = env.reset()
            for j in range(STEP):
                render = lambda : plt.imshow(env.render(mode='rgb_array'))
                #env.render()
                action = agent.action(state) # direct action for test
                state,reward,done,_ = env.step(action)
                total_reward += reward
                if done:
                    break
    ave_reward = total_reward/TEST
    print ('episode: ',episode,'Evaluation Average Reward:',ave_reward)

episode:  0 Evaluation Average Reward: 25.5
episode:  1 Evaluation Average Reward: 25.5
episode:  2 Evaluation Average Reward: 25.5
episode:  3 Evaluation Average Reward: 25.5
episode:  4 Evaluation Average Reward: 25.5
episode:  5 Evaluation Average Reward: 25.5
episode:  6 Evaluation Average Reward: 25.5
episode:  7 Evaluation Average Reward: 25.5
episode:  8 Evaluation Average Reward: 25.5
episode:  9 Evaluation Average Reward: 25.5
episode:  10 Evaluation Average Reward: 25.5
episode:  11 Evaluation Average Reward: 25.5
episode:  12 Evaluation Average Reward: 25.5
episode:  13 Evaluation Average Reward: 25.5
episode:  14 Evaluation Average Reward: 25.5
episode:  15 Evaluation Average Reward: 25.5
episode:  16 Evaluation Average Reward: 25.5
episode:  17 Evaluation Average Reward: 25.5
episode:  18 Evaluation Average Reward: 25.5
episode:  19 Evaluation Average Reward: 25.5
episode:  20 Evaluation Average Reward: 25.5
episode:  21 Evaluation Average Reward: 25.5
episode:  22 Evaluat

episode:  182 Evaluation Average Reward: 16.1
episode:  183 Evaluation Average Reward: 16.1
episode:  184 Evaluation Average Reward: 16.1
episode:  185 Evaluation Average Reward: 16.1
episode:  186 Evaluation Average Reward: 16.1
episode:  187 Evaluation Average Reward: 16.1
episode:  188 Evaluation Average Reward: 16.1
episode:  189 Evaluation Average Reward: 16.1
episode:  190 Evaluation Average Reward: 16.1
episode:  191 Evaluation Average Reward: 16.1
episode:  192 Evaluation Average Reward: 16.1
episode:  193 Evaluation Average Reward: 16.1
episode:  194 Evaluation Average Reward: 16.1
episode:  195 Evaluation Average Reward: 16.1
episode:  196 Evaluation Average Reward: 16.1
episode:  197 Evaluation Average Reward: 16.1
episode:  198 Evaluation Average Reward: 16.1
episode:  199 Evaluation Average Reward: 16.1
episode:  200 Evaluation Average Reward: 51.4
episode:  201 Evaluation Average Reward: 51.4
episode:  202 Evaluation Average Reward: 51.4
episode:  203 Evaluation Average R

episode:  362 Evaluation Average Reward: 68.2
episode:  363 Evaluation Average Reward: 68.2
episode:  364 Evaluation Average Reward: 68.2
episode:  365 Evaluation Average Reward: 68.2
episode:  366 Evaluation Average Reward: 68.2
episode:  367 Evaluation Average Reward: 68.2
episode:  368 Evaluation Average Reward: 68.2
episode:  369 Evaluation Average Reward: 68.2
episode:  370 Evaluation Average Reward: 68.2
episode:  371 Evaluation Average Reward: 68.2
episode:  372 Evaluation Average Reward: 68.2
episode:  373 Evaluation Average Reward: 68.2
episode:  374 Evaluation Average Reward: 68.2
episode:  375 Evaluation Average Reward: 68.2
episode:  376 Evaluation Average Reward: 68.2
episode:  377 Evaluation Average Reward: 68.2
episode:  378 Evaluation Average Reward: 68.2
episode:  379 Evaluation Average Reward: 68.2
episode:  380 Evaluation Average Reward: 68.2
episode:  381 Evaluation Average Reward: 68.2
episode:  382 Evaluation Average Reward: 68.2
episode:  383 Evaluation Average R

episode:  541 Evaluation Average Reward: 90.4
episode:  542 Evaluation Average Reward: 90.4
episode:  543 Evaluation Average Reward: 90.4
episode:  544 Evaluation Average Reward: 90.4
episode:  545 Evaluation Average Reward: 90.4
episode:  546 Evaluation Average Reward: 90.4
episode:  547 Evaluation Average Reward: 90.4
episode:  548 Evaluation Average Reward: 90.4
episode:  549 Evaluation Average Reward: 90.4
episode:  550 Evaluation Average Reward: 90.4
episode:  551 Evaluation Average Reward: 90.4
episode:  552 Evaluation Average Reward: 90.4
episode:  553 Evaluation Average Reward: 90.4
episode:  554 Evaluation Average Reward: 90.4
episode:  555 Evaluation Average Reward: 90.4
episode:  556 Evaluation Average Reward: 90.4
episode:  557 Evaluation Average Reward: 90.4
episode:  558 Evaluation Average Reward: 90.4
episode:  559 Evaluation Average Reward: 90.4
episode:  560 Evaluation Average Reward: 90.4
episode:  561 Evaluation Average Reward: 90.4
episode:  562 Evaluation Average R

episode:  717 Evaluation Average Reward: 159.8
episode:  718 Evaluation Average Reward: 159.8
episode:  719 Evaluation Average Reward: 159.8
episode:  720 Evaluation Average Reward: 159.8
episode:  721 Evaluation Average Reward: 159.8
episode:  722 Evaluation Average Reward: 159.8
episode:  723 Evaluation Average Reward: 159.8
episode:  724 Evaluation Average Reward: 159.8
episode:  725 Evaluation Average Reward: 159.8
episode:  726 Evaluation Average Reward: 159.8
episode:  727 Evaluation Average Reward: 159.8
episode:  728 Evaluation Average Reward: 159.8
episode:  729 Evaluation Average Reward: 159.8
episode:  730 Evaluation Average Reward: 159.8
episode:  731 Evaluation Average Reward: 159.8
episode:  732 Evaluation Average Reward: 159.8
episode:  733 Evaluation Average Reward: 159.8
episode:  734 Evaluation Average Reward: 159.8
episode:  735 Evaluation Average Reward: 159.8
episode:  736 Evaluation Average Reward: 159.8
episode:  737 Evaluation Average Reward: 159.8
episode:  738

episode:  892 Evaluation Average Reward: 190.5
episode:  893 Evaluation Average Reward: 190.5
episode:  894 Evaluation Average Reward: 190.5
episode:  895 Evaluation Average Reward: 190.5
episode:  896 Evaluation Average Reward: 190.5
episode:  897 Evaluation Average Reward: 190.5
episode:  898 Evaluation Average Reward: 190.5
episode:  899 Evaluation Average Reward: 190.5
episode:  900 Evaluation Average Reward: 195.6
episode:  901 Evaluation Average Reward: 195.6
episode:  902 Evaluation Average Reward: 195.6
episode:  903 Evaluation Average Reward: 195.6
episode:  904 Evaluation Average Reward: 195.6
episode:  905 Evaluation Average Reward: 195.6
episode:  906 Evaluation Average Reward: 195.6
episode:  907 Evaluation Average Reward: 195.6
episode:  908 Evaluation Average Reward: 195.6
episode:  909 Evaluation Average Reward: 195.6
episode:  910 Evaluation Average Reward: 195.6
episode:  911 Evaluation Average Reward: 195.6
episode:  912 Evaluation Average Reward: 195.6
episode:  913

episode:  1065 Evaluation Average Reward: 195.0
episode:  1066 Evaluation Average Reward: 195.0
episode:  1067 Evaluation Average Reward: 195.0
episode:  1068 Evaluation Average Reward: 195.0
episode:  1069 Evaluation Average Reward: 195.0
episode:  1070 Evaluation Average Reward: 195.0
episode:  1071 Evaluation Average Reward: 195.0
episode:  1072 Evaluation Average Reward: 195.0
episode:  1073 Evaluation Average Reward: 195.0
episode:  1074 Evaluation Average Reward: 195.0
episode:  1075 Evaluation Average Reward: 195.0
episode:  1076 Evaluation Average Reward: 195.0
episode:  1077 Evaluation Average Reward: 195.0
episode:  1078 Evaluation Average Reward: 195.0
episode:  1079 Evaluation Average Reward: 195.0
episode:  1080 Evaluation Average Reward: 195.0
episode:  1081 Evaluation Average Reward: 195.0
episode:  1082 Evaluation Average Reward: 195.0
episode:  1083 Evaluation Average Reward: 195.0
episode:  1084 Evaluation Average Reward: 195.0
episode:  1085 Evaluation Average Reward

episode:  1236 Evaluation Average Reward: 200.0
episode:  1237 Evaluation Average Reward: 200.0
episode:  1238 Evaluation Average Reward: 200.0
episode:  1239 Evaluation Average Reward: 200.0
episode:  1240 Evaluation Average Reward: 200.0
episode:  1241 Evaluation Average Reward: 200.0
episode:  1242 Evaluation Average Reward: 200.0
episode:  1243 Evaluation Average Reward: 200.0
episode:  1244 Evaluation Average Reward: 200.0
episode:  1245 Evaluation Average Reward: 200.0
episode:  1246 Evaluation Average Reward: 200.0
episode:  1247 Evaluation Average Reward: 200.0
episode:  1248 Evaluation Average Reward: 200.0
episode:  1249 Evaluation Average Reward: 200.0
episode:  1250 Evaluation Average Reward: 200.0
episode:  1251 Evaluation Average Reward: 200.0
episode:  1252 Evaluation Average Reward: 200.0
episode:  1253 Evaluation Average Reward: 200.0
episode:  1254 Evaluation Average Reward: 200.0
episode:  1255 Evaluation Average Reward: 200.0
episode:  1256 Evaluation Average Reward

episode:  1407 Evaluation Average Reward: 200.0
episode:  1408 Evaluation Average Reward: 200.0
episode:  1409 Evaluation Average Reward: 200.0
episode:  1410 Evaluation Average Reward: 200.0
episode:  1411 Evaluation Average Reward: 200.0
episode:  1412 Evaluation Average Reward: 200.0
episode:  1413 Evaluation Average Reward: 200.0
episode:  1414 Evaluation Average Reward: 200.0
episode:  1415 Evaluation Average Reward: 200.0
episode:  1416 Evaluation Average Reward: 200.0
episode:  1417 Evaluation Average Reward: 200.0
episode:  1418 Evaluation Average Reward: 200.0
episode:  1419 Evaluation Average Reward: 200.0
episode:  1420 Evaluation Average Reward: 200.0
episode:  1421 Evaluation Average Reward: 200.0
episode:  1422 Evaluation Average Reward: 200.0
episode:  1423 Evaluation Average Reward: 200.0
episode:  1424 Evaluation Average Reward: 200.0
episode:  1425 Evaluation Average Reward: 200.0
episode:  1426 Evaluation Average Reward: 200.0
episode:  1427 Evaluation Average Reward

episode:  1578 Evaluation Average Reward: 200.0
episode:  1579 Evaluation Average Reward: 200.0
episode:  1580 Evaluation Average Reward: 200.0
episode:  1581 Evaluation Average Reward: 200.0
episode:  1582 Evaluation Average Reward: 200.0
episode:  1583 Evaluation Average Reward: 200.0
episode:  1584 Evaluation Average Reward: 200.0
episode:  1585 Evaluation Average Reward: 200.0
episode:  1586 Evaluation Average Reward: 200.0
episode:  1587 Evaluation Average Reward: 200.0
episode:  1588 Evaluation Average Reward: 200.0
episode:  1589 Evaluation Average Reward: 200.0
episode:  1590 Evaluation Average Reward: 200.0
episode:  1591 Evaluation Average Reward: 200.0
episode:  1592 Evaluation Average Reward: 200.0
episode:  1593 Evaluation Average Reward: 200.0
episode:  1594 Evaluation Average Reward: 200.0
episode:  1595 Evaluation Average Reward: 200.0
episode:  1596 Evaluation Average Reward: 200.0
episode:  1597 Evaluation Average Reward: 200.0
episode:  1598 Evaluation Average Reward

episode:  1749 Evaluation Average Reward: 200.0
episode:  1750 Evaluation Average Reward: 200.0
episode:  1751 Evaluation Average Reward: 200.0
episode:  1752 Evaluation Average Reward: 200.0
episode:  1753 Evaluation Average Reward: 200.0
episode:  1754 Evaluation Average Reward: 200.0
episode:  1755 Evaluation Average Reward: 200.0
episode:  1756 Evaluation Average Reward: 200.0
episode:  1757 Evaluation Average Reward: 200.0
episode:  1758 Evaluation Average Reward: 200.0
episode:  1759 Evaluation Average Reward: 200.0
episode:  1760 Evaluation Average Reward: 200.0
episode:  1761 Evaluation Average Reward: 200.0
episode:  1762 Evaluation Average Reward: 200.0
episode:  1763 Evaluation Average Reward: 200.0
episode:  1764 Evaluation Average Reward: 200.0
episode:  1765 Evaluation Average Reward: 200.0
episode:  1766 Evaluation Average Reward: 200.0
episode:  1767 Evaluation Average Reward: 200.0
episode:  1768 Evaluation Average Reward: 200.0
episode:  1769 Evaluation Average Reward

episode:  1920 Evaluation Average Reward: 200.0
episode:  1921 Evaluation Average Reward: 200.0
episode:  1922 Evaluation Average Reward: 200.0
episode:  1923 Evaluation Average Reward: 200.0
episode:  1924 Evaluation Average Reward: 200.0
episode:  1925 Evaluation Average Reward: 200.0
episode:  1926 Evaluation Average Reward: 200.0
episode:  1927 Evaluation Average Reward: 200.0
episode:  1928 Evaluation Average Reward: 200.0
episode:  1929 Evaluation Average Reward: 200.0
episode:  1930 Evaluation Average Reward: 200.0
episode:  1931 Evaluation Average Reward: 200.0
episode:  1932 Evaluation Average Reward: 200.0
episode:  1933 Evaluation Average Reward: 200.0
episode:  1934 Evaluation Average Reward: 200.0
episode:  1935 Evaluation Average Reward: 200.0
episode:  1936 Evaluation Average Reward: 200.0
episode:  1937 Evaluation Average Reward: 200.0
episode:  1938 Evaluation Average Reward: 200.0
episode:  1939 Evaluation Average Reward: 200.0
episode:  1940 Evaluation Average Reward

episode:  2091 Evaluation Average Reward: 200.0
episode:  2092 Evaluation Average Reward: 200.0
episode:  2093 Evaluation Average Reward: 200.0
episode:  2094 Evaluation Average Reward: 200.0
episode:  2095 Evaluation Average Reward: 200.0
episode:  2096 Evaluation Average Reward: 200.0
episode:  2097 Evaluation Average Reward: 200.0
episode:  2098 Evaluation Average Reward: 200.0
episode:  2099 Evaluation Average Reward: 200.0
episode:  2100 Evaluation Average Reward: 200.0
episode:  2101 Evaluation Average Reward: 200.0
episode:  2102 Evaluation Average Reward: 200.0
episode:  2103 Evaluation Average Reward: 200.0
episode:  2104 Evaluation Average Reward: 200.0
episode:  2105 Evaluation Average Reward: 200.0
episode:  2106 Evaluation Average Reward: 200.0
episode:  2107 Evaluation Average Reward: 200.0
episode:  2108 Evaluation Average Reward: 200.0
episode:  2109 Evaluation Average Reward: 200.0
episode:  2110 Evaluation Average Reward: 200.0
episode:  2111 Evaluation Average Reward

episode:  2262 Evaluation Average Reward: 200.0
episode:  2263 Evaluation Average Reward: 200.0
episode:  2264 Evaluation Average Reward: 200.0
episode:  2265 Evaluation Average Reward: 200.0
episode:  2266 Evaluation Average Reward: 200.0
episode:  2267 Evaluation Average Reward: 200.0
episode:  2268 Evaluation Average Reward: 200.0
episode:  2269 Evaluation Average Reward: 200.0
episode:  2270 Evaluation Average Reward: 200.0
episode:  2271 Evaluation Average Reward: 200.0
episode:  2272 Evaluation Average Reward: 200.0
episode:  2273 Evaluation Average Reward: 200.0
episode:  2274 Evaluation Average Reward: 200.0
episode:  2275 Evaluation Average Reward: 200.0
episode:  2276 Evaluation Average Reward: 200.0
episode:  2277 Evaluation Average Reward: 200.0
episode:  2278 Evaluation Average Reward: 200.0
episode:  2279 Evaluation Average Reward: 200.0
episode:  2280 Evaluation Average Reward: 200.0
episode:  2281 Evaluation Average Reward: 200.0
episode:  2282 Evaluation Average Reward

episode:  2433 Evaluation Average Reward: 200.0
episode:  2434 Evaluation Average Reward: 200.0
episode:  2435 Evaluation Average Reward: 200.0
episode:  2436 Evaluation Average Reward: 200.0
episode:  2437 Evaluation Average Reward: 200.0
episode:  2438 Evaluation Average Reward: 200.0
episode:  2439 Evaluation Average Reward: 200.0
episode:  2440 Evaluation Average Reward: 200.0
episode:  2441 Evaluation Average Reward: 200.0
episode:  2442 Evaluation Average Reward: 200.0
episode:  2443 Evaluation Average Reward: 200.0
episode:  2444 Evaluation Average Reward: 200.0
episode:  2445 Evaluation Average Reward: 200.0
episode:  2446 Evaluation Average Reward: 200.0
episode:  2447 Evaluation Average Reward: 200.0
episode:  2448 Evaluation Average Reward: 200.0
episode:  2449 Evaluation Average Reward: 200.0
episode:  2450 Evaluation Average Reward: 200.0
episode:  2451 Evaluation Average Reward: 200.0
episode:  2452 Evaluation Average Reward: 200.0
episode:  2453 Evaluation Average Reward

episode:  2604 Evaluation Average Reward: 200.0
episode:  2605 Evaluation Average Reward: 200.0
episode:  2606 Evaluation Average Reward: 200.0
episode:  2607 Evaluation Average Reward: 200.0
episode:  2608 Evaluation Average Reward: 200.0
episode:  2609 Evaluation Average Reward: 200.0
episode:  2610 Evaluation Average Reward: 200.0
episode:  2611 Evaluation Average Reward: 200.0
episode:  2612 Evaluation Average Reward: 200.0
episode:  2613 Evaluation Average Reward: 200.0
episode:  2614 Evaluation Average Reward: 200.0
episode:  2615 Evaluation Average Reward: 200.0
episode:  2616 Evaluation Average Reward: 200.0
episode:  2617 Evaluation Average Reward: 200.0
episode:  2618 Evaluation Average Reward: 200.0
episode:  2619 Evaluation Average Reward: 200.0
episode:  2620 Evaluation Average Reward: 200.0
episode:  2621 Evaluation Average Reward: 200.0
episode:  2622 Evaluation Average Reward: 200.0
episode:  2623 Evaluation Average Reward: 200.0
episode:  2624 Evaluation Average Reward

episode:  2775 Evaluation Average Reward: 200.0
episode:  2776 Evaluation Average Reward: 200.0
episode:  2777 Evaluation Average Reward: 200.0
episode:  2778 Evaluation Average Reward: 200.0
episode:  2779 Evaluation Average Reward: 200.0
episode:  2780 Evaluation Average Reward: 200.0
episode:  2781 Evaluation Average Reward: 200.0
episode:  2782 Evaluation Average Reward: 200.0
episode:  2783 Evaluation Average Reward: 200.0
episode:  2784 Evaluation Average Reward: 200.0
episode:  2785 Evaluation Average Reward: 200.0
episode:  2786 Evaluation Average Reward: 200.0
episode:  2787 Evaluation Average Reward: 200.0
episode:  2788 Evaluation Average Reward: 200.0
episode:  2789 Evaluation Average Reward: 200.0
episode:  2790 Evaluation Average Reward: 200.0
episode:  2791 Evaluation Average Reward: 200.0
episode:  2792 Evaluation Average Reward: 200.0
episode:  2793 Evaluation Average Reward: 200.0
episode:  2794 Evaluation Average Reward: 200.0
episode:  2795 Evaluation Average Reward

episode:  2946 Evaluation Average Reward: 133.6
episode:  2947 Evaluation Average Reward: 133.6
episode:  2948 Evaluation Average Reward: 133.6
episode:  2949 Evaluation Average Reward: 133.6
episode:  2950 Evaluation Average Reward: 133.6
episode:  2951 Evaluation Average Reward: 133.6
episode:  2952 Evaluation Average Reward: 133.6
episode:  2953 Evaluation Average Reward: 133.6
episode:  2954 Evaluation Average Reward: 133.6
episode:  2955 Evaluation Average Reward: 133.6
episode:  2956 Evaluation Average Reward: 133.6
episode:  2957 Evaluation Average Reward: 133.6
episode:  2958 Evaluation Average Reward: 133.6
episode:  2959 Evaluation Average Reward: 133.6
episode:  2960 Evaluation Average Reward: 133.6
episode:  2961 Evaluation Average Reward: 133.6
episode:  2962 Evaluation Average Reward: 133.6
episode:  2963 Evaluation Average Reward: 133.6
episode:  2964 Evaluation Average Reward: 133.6
episode:  2965 Evaluation Average Reward: 133.6
episode:  2966 Evaluation Average Reward