In [13]:
# 载入相关套件
import gym
from gym import envs

## 随机行动

In [None]:
# 参数设定
no = 50        # 比赛回合数

In [14]:
# 载入 木棒台车(CartPole) 游戏
env = gym.make("CartPole-v0")

# 重置
observation = env.reset()
all_rewards=[] # 每回合总报酬
all_steps=[] # 每回合总步数
total_rewards = 0
total_steps=0

while no > 0:   # 执行 50 比赛回合数
    # 随机行动
    action = env.action_space.sample() 
    total_steps+=1

    # 触动下一步
    observation, reward, done, info = env.step(action)
    # 累计报酬
    total_rewards += reward

    # 比赛回合结束，重置
    if done:
        observation = env.reset()
        all_rewards.append(total_rewards)
        all_steps.append(total_steps)
        total_rewards = 0
        total_steps=0
        no-=1

env.close()

In [15]:
# 显示执行结果
print('回合\t报酬\t结果')
for i, (rewards, steps) in enumerate(zip(all_rewards, all_steps)):
    result = 'Win' if steps >= 200 else 'Loss'
    print(f'{i}\t{rewards}\t{result}')

回合	报酬	结果
0	18.0	Loss
1	55.0	Loss
2	38.0	Loss
3	11.0	Loss
4	34.0	Loss
5	51.0	Loss
6	15.0	Loss
7	20.0	Loss
8	11.0	Loss
9	32.0	Loss
10	13.0	Loss
11	22.0	Loss
12	21.0	Loss
13	15.0	Loss
14	57.0	Loss
15	11.0	Loss
16	46.0	Loss
17	16.0	Loss
18	16.0	Loss
19	10.0	Loss
20	17.0	Loss
21	13.0	Loss
22	15.0	Loss
23	19.0	Loss
24	14.0	Loss
25	32.0	Loss
26	14.0	Loss
27	10.0	Loss
28	21.0	Loss
29	15.0	Loss
30	32.0	Loss
31	21.0	Loss
32	16.0	Loss
33	13.0	Loss
34	15.0	Loss
35	14.0	Loss
36	27.0	Loss
37	21.0	Loss
38	12.0	Loss
39	34.0	Loss
40	28.0	Loss
41	28.0	Loss
42	30.0	Loss
43	27.0	Loss
44	20.0	Loss
45	11.0	Loss
46	14.0	Loss
47	36.0	Loss
48	26.0	Loss
49	38.0	Loss


## 传统解法

In [16]:
import math 

# 参数设定
left, right = 0, 1  # 台车行进方向
max_angle = 8       # 偏右8度以上，就往右前进，偏左也是同样处理

In [17]:
class Agent:
    # 初始化
    def __init__(self):
        self.direction = left
        self.last_direction=right
        
    # 自订策略
    def act(self, observation):
        # 台车位置、台车速度、平衡杆角度、平衡杆速度
        cart_position, cart_velocity, pole_angle, pole_velocity = observation
        
        '''
        行动策略：
        1. 设定每次行动采一左一右，尽量不离中心点。
        2. 平衡杆角度偏右8度以上，就往右前进，直到角度偏右小于8度。
        3. 反之，偏左也是同样处理。
        '''
        if pole_angle < math.radians(max_angle) and \
            pole_angle > math.radians(-max_angle):
            self.direction = (self.last_direction + 1) % 2
        elif pole_angle >= math.radians(max_angle):
            self.direction = right
        else:
            self.direction = left

        self.last_direction = self.direction
        
        return self.direction  

In [18]:
no = 50        # 比赛回合数

# 载入 木棒台车(CartPole) 游戏
env = gym.make("CartPole-v0")

# 重置
observation = env.reset()
all_rewards=[] # 每回合总报酬
all_steps=[] # 每回合总步数
total_rewards = 0
total_steps=0

agent = Agent()
while no > 0:   # 执行 50 比赛回合数
    # 行动
    action = agent.act(observation) #env.action_space.sample()
    total_steps+=1

    # 触动下一步
    observation, reward, done, info = env.step(action)
    # 累计报酬
    total_rewards += reward

    # 比赛回合结束，重置
    if done:
        observation = env.reset()
        all_rewards.append(total_rewards)
        total_rewards = 0
        all_steps.append(total_steps)
        total_steps = 0
        no-=1

env.close()

In [19]:
# 显示执行结果
print('回合\t报酬\t结果')
for i, (rewards, steps) in enumerate(zip(all_rewards, all_steps)):
    result = 'Win' if steps >= 200 else 'Loss'
    print(f'{i}\t{rewards}\t{result}')

回合	报酬	结果
0	103.0	Loss
1	86.0	Loss
2	116.0	Loss
3	125.0	Loss
4	119.0	Loss
5	117.0	Loss
6	165.0	Loss
7	55.0	Loss
8	200.0	Win
9	88.0	Loss
10	99.0	Loss
11	45.0	Loss
12	90.0	Loss
13	69.0	Loss
14	75.0	Loss
15	70.0	Loss
16	48.0	Loss
17	107.0	Loss
18	98.0	Loss
19	51.0	Loss
20	51.0	Loss
21	93.0	Loss
22	122.0	Loss
23	91.0	Loss
24	100.0	Loss
25	92.0	Loss
26	121.0	Loss
27	65.0	Loss
28	128.0	Loss
29	104.0	Loss
30	77.0	Loss
31	110.0	Loss
32	71.0	Loss
33	167.0	Loss
34	108.0	Loss
35	69.0	Loss
36	46.0	Loss
37	131.0	Loss
38	105.0	Loss
39	43.0	Loss
40	121.0	Loss
41	68.0	Loss
42	81.0	Loss
43	52.0	Loss
44	52.0	Loss
45	174.0	Loss
46	42.0	Loss
47	73.0	Loss
48	95.0	Loss
49	68.0	Loss
