/
gym_forward_walker_servo.py
192 lines (163 loc) · 8.57 KB
/
gym_forward_walker_servo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# This files is forked from
# https://github.com/openai/roboschool/blob/63209bf0d0ab08a8872787307a116ab44f40d90d/roboschool/gym_forward_walker.py
# LICENSE: https://github.com/openai/roboschool/blob/63209bf0d0ab08a8872787307a116ab44f40d90d/LICENSE.md
from roboschool.scene_abstract import cpp_household
from roboschool.scene_stadium import SinglePlayerStadiumScene
from roboschool.multiplayer import SharedMemoryClientEnv
from roboschool.gym_mujoco_xml_env import RoboschoolMujocoXmlEnv
import gym, gym.spaces, gym.utils, gym.utils.seeding
import numpy as np
import os, sys
from env.stadium_scene import CustomStadiumScene
class RoboschoolForwardWalkerServo(SharedMemoryClientEnv):
def __init__(self, power):
self.power = power
self.camera_x = 0
self.walk_target_x = 1e3 # kilometer away
self.walk_target_y = 0
self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, -0.5
self.camera_x = 0
self.camera_y = 4.3
self.camera_z = 45.0
self.camera_follow = 0
def create_single_player_scene(self):
return CustomStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
def robot_specific_reset(self):
for j in self.ordered_joints:
j.reset_current_position(self.np_random.uniform( low=-0.1, high=0.1 ), 0)
self.feet = [self.parts[f] for f in self.foot_list]
self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
self.scene.actor_introduce(self)
self.initial_z = None
def move_robot(self, init_x, init_y, init_z):
"Used by multiplayer stadium to move sideways, to another running lane."
self.cpp_robot.query_position()
pose = self.cpp_robot.root_part.pose()
pose.move_xyz(init_x, init_y, init_z) # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact
self.cpp_robot.set_pose(pose)
self.start_pos_x, self.start_pos_y, self.start_pos_z = init_x, init_y, init_z
def apply_action(self, a):
assert( np.isfinite(a).all() )
for n,j in enumerate(self.ordered_joints):
#j.set_motor_torque( self.power*j.power_coef*float(np.clip(a[n], -1, +1)) )
lower, upper = j.limits()[0:2]
#j.set_servo_target(self.power*j.power_coef*float(np.clip(a[n], -1, +1)), 0.1, 0.1, 1)
j.set_servo_target(float(np.clip(a[n], lower, upper)), 0.1, 3.0, 40)
def calc_state(self):
j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
# even elements [0::2] position, scaled to -1..+1 between limits
# odd elements [1::2] angular speed, scaled to show -1..+1
self.joint_speeds = j[1::2]
self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
body_pose = self.robot_body.pose()
parts_xyz = np.array( [p.pose().xyz() for p in self.parts.values()] ).flatten()
self.body_xyz = (parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z
self.body_rpy = body_pose.rpy()
z = self.body_xyz[2]
r, p, yaw = self.body_rpy
if self.initial_z==None:
self.initial_z = z
self.walk_target_theta = np.arctan2( self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0] )
self.walk_target_dist = np.linalg.norm( [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]] )
self.angle_to_target = self.walk_target_theta - yaw
self.rot_minus_yaw = np.array(
[[np.cos(-yaw), -np.sin(-yaw), 0],
[np.sin(-yaw), np.cos(-yaw), 0],
[ 0, 0, 1]]
)
vx, vy, vz = np.dot(self.rot_minus_yaw, self.robot_body.speed()) # rotate speed back to body point of view
more = np.array([
z-self.initial_z,
np.sin(self.angle_to_target), np.cos(self.angle_to_target),
0.3*vx, 0.3*vy, 0.3*vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here
r, p], dtype=np.float32)
return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
def calc_potential(self):
# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
# all rewards have rew/frame units and close to 1.0
return - self.walk_target_dist / self.scene.dt
electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small
foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
foot_ground_object_names = set(["floor"]) # to distinguish ground and other objects
joints_at_limit_cost = -0.2 # discourage stuck joints
def _step(self, a):
if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
self.apply_action(a)
self.scene.global_step()
state = self.calc_state() # also calculates self.joints_at_limit
alive = float(self.alive_bonus(state[0]+self.initial_z, self.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch
done = alive < 0
if not np.isfinite(state).all():
print("~INF~", state)
done = True
potential_old = self.potential
self.potential = self.calc_potential()
progress = float(self.potential - potential_old)
feet_collision_cost = 0.0
for i,f in enumerate(self.feet):
contact_names = set(x.name for x in f.contact_list())
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
self.feet_contact[i] = 1.0 if (self.foot_ground_object_names & contact_names) else 0.0
if contact_names - self.foot_ground_object_names:
feet_collision_cost += self.foot_collision_cost
electricity_cost = self.electricity_cost * float(np.abs(a*self.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit)
self.rewards = [
alive,
progress,
electricity_cost,
joints_at_limit_cost,
feet_collision_cost
]
self.frame += 1
if (done and not self.done) or self.frame==self.spec.timestep_limit:
self.episode_over(self.frame)
self.done += done # 2 == 1+True
self.reward += sum(self.rewards)
self.HUD(state, a, done)
return state, sum(self.rewards), bool(done), {}
def episode_over(self, frames):
pass
def camera_adjust(self):
#self.camera_dramatic()
self.camera_simple_follow()
def camera_simple_follow(self):
x, y, z = self.body_xyz
self.camera_x = 0.98*self.camera_x + (1-0.98)*x
self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0)
def camera_dramatic(self):
pose = self.robot_body.pose()
speed = self.robot_body.speed()
x, y, z = pose.xyz()
if 1:
camx, camy, camz = speed[0], speed[1], 2.2
else:
camx, camy, camz = self.walk_target_x - x, self.walk_target_y - y, 2.2
n = np.linalg.norm([camx, camy])
if n > 2.0 and self.frame > 50:
self.camera_follow = 1
if n < 0.5:
self.camera_follow = 0
if self.camera_follow:
camx /= 0.1 + n
camx *= 2.2
camy /= 0.1 + n
camy *= 2.8
if self.frame < 1000:
camx *= -1
camy *= -1
camx += x
camy += y
camz = 1.8
else:
camx = x
camy = y + 4.3
camz = 2.2
#print("%05i" % self.frame, self.camera_follow, camy)
smoothness = 0.97
self.camera_x = smoothness*self.camera_x + (1-smoothness)*camx
self.camera_y = smoothness*self.camera_y + (1-smoothness)*camy
self.camera_z = smoothness*self.camera_z + (1-smoothness)*camz
self.camera.move_and_look_at(self.camera_x, self.camera_y, self.camera_z, x, y, 0.6)