/
toy_2d_2g.py
188 lines (162 loc) · 6.09 KB
/
toy_2d_2g.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Maze environment with the deceptive reward
"""
import numpy as np
#Toy 2D Grid environment adapted from: https://github.com/dtak/hip-mdp-public/blob/master/grid_simulator/grid.py
# 目标区域的中心坐标
goals = [[26, 26],
[15, 0],
[0, 15]]
# 目标的奖励值
goal_reward = [100, 5, 8]
# 目标区域的半径d
goal_radii = [[2.5, 2.5],
[2.5, 2.5,],
[2.5, 2.5,],]
# 迷宫的边长
x_range = [0, 26]
y_range = [0, 26]
# 动作的数量:上下左右
num_actions = 4
# 格子的边长
step_size = 0.4
class Grid(object):
"""
This is a 2D Grid environment for simple RL tasks.
"""
def __init__(
self, start_state=[0, 0], step_size=step_size, goal_reward=goal_radii,
x_range=x_range, y_range=y_range, num_actions=num_actions, **kwargs
):
"""
Initialize the environment: creating the box, setting a start and goal region
Later -- might include other obstacles etc.
"""
# 动作的数量:上下左右
self.num_actions = num_actions
# 迷宫的边长
self.x_range = x_range
self.y_range = y_range
# self.goal_radius = 0.5
# self.goal = [1.0,0]
self.spec = "gridworld"
self.reset(start_state, step_size, goal_reward, **kwargs)
def reset(self, start_state=[0, 0], step_size=step_size, goal_reward=goal_reward,
goal=goals, goal_radius=goal_radii, x_std=0, y_std=0, **kwargs
):
"""
Reset Environment.
"""
self.t = 0
self.step_size = step_size
self.start_state = start_state
self.state = start_state
self.goal = goal
self.x_std = x_std
self.y_std = y_std
self.goal_reward = goal_reward
self.goal_radius = goal_radius
# else:
# self.goal_radius = [[4, 4],
# [1, 1],]
def print_goals(self):
return self.goal
def observe(self):
"""Return the agent's position."""
return self.state
def get_action_effect(self, action):
"""
Set the effect direction of the action -- actual movement will
involve step size and possibly involve action error.
"""
if action == 0:
return [1, 0]
elif action == 1:
return [-1, 0]
elif action == 2:
return [0, 1]
elif action == 3:
return [0, -1]
def get_next_state(self, state, action):
"""
Take action from state, and return the next state.
"""
action_effect = self.get_action_effect(action)
new_x = state[0] + (self.step_size * action_effect[0]) + np.random.normal(0, self.x_std)
new_y = state[1] + (self.step_size * action_effect[1]) + np.random.normal(0, self.y_std)
next_state = [new_x, new_y]
return next_state
def _valid_crossing(self, state=None, next_state=None, action=None):
"""Determine if the agent has crossed the maze boundary."""
if state is None:
state = self.state
action = self.action
if next_state is None:
next_state = self.get_next_state(state, action)
#Check for moving out of box in x direction
if next_state[0] < np.min(self.x_range) or next_state[0] > np.max(self.x_range):
# print "Going out of x bounds"
return False
elif next_state[1] < np.min(self.y_range) or next_state[1] > np.max(self.y_range):
# print "Going out of y bounds"
return False
else:
return True
def _in_goal(self, state=None):
"""Determine if the agent has arrived the goal."""
if state is None:
state = self.state
each_goal = []
for goal_i, radius_i in zip(self.goal, self.goal_radius):
if (abs(np.array(state[0]) - np.array(goal_i[0])) <= radius_i[0]):
if (abs(np.array(state[1]) - np.array(goal_i[1])) <= radius_i[1]):
print("Reached goal: ", goal_i)
each_goal.append(1)
else:
each_goal.append(0)
# if np.sum(each_goal) >= 1:
# return True
# else:
# return False
return each_goal
def which_goal(self, state=None):
"""Determine which the goal the agent has arrived at."""
if state is None:
state = self.state
each_goal = []
for goal_i, radius_i in zip(self.goal, self.goal_radius):
if (abs(np.array(state[0]) - np.array(goal_i[0])) <= radius_i[0]):
if (abs(np.array(state[1]) - np.array(goal_i[1])) <= radius_i[1]):
return goal_i
return None
def calc_reward(self, state=None, action=None, next_state=None, **kw):
"""Calculate the reward of each step."""
each_goal = self._in_goal(state=next_state)
if state is None:
state = self.state
action = self.action
if next_state is None:
next_state = self.get_next_state(state, action)
if self._valid_crossing(state=state, next_state=next_state, action=action) and \
np.sum(each_goal):
for idx, goal in enumerate(each_goal):
if goal != 0:
return self.goal_reward[idx]
elif self._valid_crossing(state=state, next_state=next_state, action=action) and \
not np.sum(each_goal):
return -0.5
else:
return -1.
def step(self, action, **kwargs):
"""Perform actions in the maze."""
info = {}
self.t += 1
self.action = action
reward = self.calc_reward()
if self._valid_crossing():
self.state = self.get_next_state(self.state, action)
goal = self.which_goal()
info["goal_idx"] = goal
return self.observe(), reward, np.sum(self._in_goal()), info