Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fast Foward Gym Solo #55

Merged
merged 25 commits into from
Mar 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a87084c
Comitting for push
goobta Feb 16, 2021
2cb8ac6
TorsoIMU should be normalized?
goobta Feb 16, 2021
e13091e
Write tests for motor encoder observation space
goobta Feb 16, 2021
c7f9d3f
Normalize MotorEncoders
goobta Feb 16, 2021
2c8c407
Updates rewards tests to account for tolerance change
goobta Feb 16, 2021
a8cda83
Add more cases for normalized actions
goobta Feb 16, 2021
dbcbcec
Remove useless impots
goobta Feb 16, 2021
9cd677c
Create runner for functional testing the normalization
goobta Feb 16, 2021
bfda81d
Currently satisfied with how observations/actions/rewardswork
goobta Feb 17, 2021
57dacb7
Merge branch 'master' into goobta-normalized-obs
goobta Feb 17, 2021
9159d67
Merge branch 'master' into goobta-normalized-obs
goobta Feb 17, 2021
a728e4a
Remove hardcoded assets folder
goobta Mar 13, 2021
c73f0cf
Make assets a submodule
goobta Mar 13, 2021
2812a8d
Update assets submodule to have v3 models
goobta Mar 13, 2021
43f4f30
Try to get CI working?
goobta Mar 13, 2021
df0085a
Try to use public endpoint instead
goobta Mar 13, 2021
2617705
Try to use git ssh keys instead
goobta Mar 13, 2021
18abda1
Update realtime script
goobta Mar 13, 2021
2815217
Change bounds on normalization script
goobta Mar 13, 2021
b79bb26
Revert back to normal
goobta Mar 13, 2021
486d1f0
Add interactive pos control for solo8mmr
goobta Mar 13, 2021
b7de171
Add realtime script for MMR model
goobta Mar 13, 2021
38f3402
Fully reset the simulation cause of memory leaks
goobta Mar 13, 2021
57abd87
Merge all conflicts
goobta Mar 13, 2021
5714b51
Clean up realtime file
goobta Mar 13, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/python-test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:

steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "gym_solo/assets"]
path = gym_solo/assets
url = git@github.com:WPI-MMR/assets.git
88 changes: 88 additions & 0 deletions examples/solo8_mmr/interactive_pos_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""A demo for the Solo8 MMR with configurable position control on the
the joints.
"""

import gym
import numpy as np

from gym_solo.envs import solo8v2vanilla
from gym_solo.core import obs
from gym_solo.core import rewards
from gym_solo.core import termination as terms


if __name__ == '__main__':
config = solo8v2vanilla.Solo8VanillaConfig()
config.urdf_path = 'assets/solo8_URDF_v3/solo8_URDF_v3.urdf'
env: solo8v2vanilla.Solo8VanillaEnv = gym.make('solo8vanilla-v0', use_gui=True,
realtime=True, config=config)

env.obs_factory.register_observation(obs.TorsoIMU(env.robot))
env.termination_factory.register_termination(terms.PerpetualTermination())

flat = rewards.FlatTorsoReward(env.robot, hard_margin=.1, soft_margin=np.pi)
height = rewards.TorsoHeightReward(env.robot, 0.33698, 0.025, 0.15)

small_control = rewards.SmallControlReward(env.robot, margin=10)
no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.5,
soft_margin=3)

stand = rewards.AdditiveReward()
stand.client = env.client
stand.add_term(0.5, flat)
stand.add_term(0.5, height)

home_pos = rewards.MultiplicitiveReward(1, stand, small_control, no_move)
env.reward_factory.register_reward(1, home_pos)

joint_params = []
num_joints = env.client.getNumJoints(env.robot)

for joint in range(num_joints):
joint_params.append(env.client.addUserDebugParameter(
'Joint {}'.format(
env.client.getJointInfo(env.robot, joint)[1].decode('UTF-8')),
-2 * np.pi, 2 * np.pi, 0))

camera_params = {
'fov': env.client.addUserDebugParameter('fov', 30, 150, 80),
'distance': env.client.addUserDebugParameter('distance', .1, 5, 1.5),
'yaw': env.client.addUserDebugParameter('yaw', -90, 90, 0),
'pitch': env.client.addUserDebugParameter('pitch', -90, 90, -10),
'roll': env.client.addUserDebugParameter('roll', -90, 90, 0),
}


try:
print("""\n
=============================================
Solo 8 MMR Position Control

Simulation Active.

Exit with ^C.
=============================================
""")

done = False
cnt = 0
while not done:
user_joints = [env.client.readUserDebugParameter(param)
for param in joint_params]
obs, reward, done, info = env.step(user_joints)

if cnt % 100 == 0:
config.render_fov = env.client.readUserDebugParameter(
camera_params['fov'])
config.render_cam_distance = env.client.readUserDebugParameter(
camera_params['distance'])
config.render_yaw = env.client.readUserDebugParameter(
camera_params['yaw'])
config.render_pitch = env.client.readUserDebugParameter(
camera_params['pitch'])
config.render_roll = env.client.readUserDebugParameter(
camera_params['roll'])
env.render()
cnt += 1
except KeyboardInterrupt:
pass
42 changes: 42 additions & 0 deletions examples/solo8_mmr/realtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
""" A demo for the Solo MMR with realtime control.

This environment is designed to act like a real robot would. There is no concept
of a "timestep"; rather the step command should be interprated as sending
position values to the robot's PID controllers.
"""
import gym
import numpy as np

import gym_solo
from gym_solo.envs import solo8v2vanilla_realtime
from gym_solo.core import obs


if __name__ == '__main__':
config = solo8v2vanilla_realtime.RealtimeSolo8VanillaConfig()
config.urdf_path = 'assets/solo8_URDF_v3/solo8_URDF_v3.urdf'

env = gym.make('solo8vanilla-realtime-v0', config=config)
env.obs_factory.register_observation(obs.TorsoIMU(env.robot))

try:
print("""\n
=============================================
Solo 8 v2 Vanilla Realtime Simulation

Simulation Active.

Exit with ^C.
=============================================
""")

while True:
pos = float(input('Which position do you want to set all the joints to?: '))
if pos == 69.:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is up with this xD?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh that's the value to reset the robot lol. I'll open up another PR and get rid of it lmfao

num_bodies = env.client.getNumBodies()
else:
action = np.full(env.action_space.shape, pos)
env.step(action)

except KeyboardInterrupt:
pass
22 changes: 14 additions & 8 deletions examples/solo8_vanilla/interactive_pos_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@
env.obs_factory.register_observation(obs.TorsoIMU(env.robot))
env.termination_factory.register_termination(terms.PerpetualTermination())

flat = rewards.FlatTorsoReward(env.robot, hard_margin=.1, soft_margin=.1)
small_control = rewards.SmallControlReward(env.robot, margin=.1)
no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.1,
soft_margin=.1)
flat = rewards.FlatTorsoReward(env.robot, hard_margin=.1, soft_margin=np.pi)
height = rewards.TorsoHeightReward(env.robot, 0.33698, 0.025, 0.15)

small_control = rewards.SmallControlReward(env.robot, margin=10)
no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.5,
soft_margin=3)

stand = rewards.AdditiveReward()
stand.client = env.client
stand.add_term(0.5, flat)
stand.add_term(0.5, height)

env.reward_factory.register_reward(.33, flat)
env.reward_factory.register_reward(.33, small_control)
env.reward_factory.register_reward(.33, no_move)
home_pos = rewards.MultiplicitiveReward(1, stand, small_control, no_move)
env.reward_factory.register_reward(1, home_pos)

joint_params = []
num_joints = env.client.getNumJoints(env.robot)
Expand Down Expand Up @@ -63,7 +69,7 @@
user_joints = [env.client.readUserDebugParameter(param)
for param in joint_params]
obs, reward, done, info = env.step(user_joints)

if cnt % 100 == 0:
config.render_fov = env.client.readUserDebugParameter(
camera_params['fov'])
Expand Down
114 changes: 114 additions & 0 deletions examples/solo8_vanilla/normalization_debugging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""A demo for the Solo8 v2 Vanilla to mess around with normalized vs
unnormalized actions and observations.
"""

import gym
import numpy as np

import matplotlib.pyplot as plt

from gym_solo.envs import solo8v2vanilla
from gym_solo.core import obs as solo_obs
from gym_solo.core import rewards
from gym_solo.core import termination as terms


fig = plt.figure()
obs_ax = fig.add_subplot(1, 2, 1)
rewards_ax = fig.add_subplot(1, 2, 2)


if __name__ == '__main__':
config = solo8v2vanilla.Solo8VanillaConfig()
config.max_motor_rotation = np.pi / 2

env: solo8v2vanilla.Solo8VanillaEnv = gym.make('solo8vanilla-v0', use_gui=True,
config=config,
normalize_observations=True)

env.obs_factory.register_observation(solo_obs.TorsoIMU(env.robot))
env.obs_factory.register_observation(solo_obs.MotorEncoder(
env.robot, max_rotation=config.max_motor_rotation))
env.termination_factory.register_termination(terms.PerpetualTermination())

flat = rewards.FlatTorsoReward(env.robot, hard_margin=.05, soft_margin=np.pi)
height = rewards.TorsoHeightReward(env.robot, 0.33698, 0.01, 0.15)

small_control = rewards.SmallControlReward(env.robot, margin=10)
no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.5,
soft_margin=3)

stand = rewards.AdditiveReward()
stand.client = env.client
stand.add_term(0.5, flat)
stand.add_term(0.5, height)

home_pos = rewards.MultiplicitiveReward(1, stand, small_control, no_move)
env.reward_factory.register_reward(1, home_pos)

joint_params = []
num_joints = env.client.getNumJoints(env.robot)

for joint in range(num_joints):
joint_params.append(env.client.addUserDebugParameter(
'Joint {}'.format(
env.client.getJointInfo(env.robot, joint)[1].decode('UTF-8')),
-config.max_motor_rotation, config.max_motor_rotation, 0))

camera_params = {
'fov': env.client.addUserDebugParameter('fov', 30, 150, 80),
'distance': env.client.addUserDebugParameter('distance', .1, 5, 1.5),
'yaw': env.client.addUserDebugParameter('yaw', -90, 90, 0),
'pitch': env.client.addUserDebugParameter('pitch', -90, 90, -10),
'roll': env.client.addUserDebugParameter('roll', -90, 90, 0),
}

try:
print("""\n
=============================================
Solo 8 v2 Vanilla Normalization Debugging

Simulation Active.

Exit with ^C.
=============================================
""")

done = False
cnt = 0
while not done:
user_joints = [env.client.readUserDebugParameter(param)
for param in joint_params]
obs, reward, done, info = env.step(user_joints)

if cnt % 100 == 0:
config.render_fov = env.client.readUserDebugParameter(
camera_params['fov'])
config.render_cam_distance = env.client.readUserDebugParameter(
camera_params['distance'])
config.render_yaw = env.client.readUserDebugParameter(
camera_params['yaw'])
config.render_pitch = env.client.readUserDebugParameter(
camera_params['pitch'])
config.render_roll = env.client.readUserDebugParameter(
camera_params['roll'])
env.render()

obs_ax.cla()
obs_ax.set_title('Observations')
obs_ax.bar(np.arange(len(obs)), obs, tick_label=info['labels'])

rewards_ax.cla()
rewards_ax.set_title('Rewards')
rewards_ax.bar(np.arange(5), [flat.compute(),
height.compute(),
small_control.compute(),
no_move.compute(),
reward],
tick_label=('flat', 'height', 'small control', 'no move',
'overall'))

plt.pause(1e-8)
cnt += 1
except KeyboardInterrupt:
pass
13 changes: 8 additions & 5 deletions examples/solo8_vanilla/realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
import numpy as np

import gym_solo
from gym_solo.envs import solo8v2vanilla
from gym_solo.envs import solo8v2vanilla_realtime
from gym_solo.core import obs


if __name__ == '__main__':
env = gym.make('solo8vanilla-realtime-v0')
config = solo8v2vanilla_realtime.RealtimeSolo8VanillaConfig()
env = gym.make('solo8vanilla-realtime-v0', config=config)
env.obs_factory.register_observation(obs.TorsoIMU(env.robot))

try:
Expand All @@ -27,11 +28,13 @@
=============================================
""")

env.reset()
while True:
pos = float(input('Which position do you want to set all the joints to?: '))
action = np.full(env.action_space.shape, pos)
env.step(action)
if pos == 69.:
env.reset()
else:
action = np.full(env.action_space.shape, pos)
env.step(action)

except KeyboardInterrupt:
pass
1 change: 1 addition & 0 deletions gym_solo/assets
Submodule assets added at ddb88d
Loading