53 python/gps/agent/agent.py 100644 → 100755
@@ -26,6 +26,7 @@ def __init__(self, hyperparams):

self.x_data_types = self._hyperparams['state_include']
self.obs_data_types = self._hyperparams['obs_include']
self.meta_data_types = self._hyperparams['meta_include']

# List of indices for each data type in state X.
self._state_idx, i = [], 0
@@ -42,10 +43,21 @@ def __init__(self, hyperparams):
self._obs_idx.append(list(range(i, i+dim)))
i += dim
self.dO = i

# List of indices for each data type in meta data.
self._meta_idx, i = [], 0
for sensor in self.meta_data_types:
dim = self._hyperparams['sensor_dims'][sensor]
self._meta_idx.append(list(range(i, i+dim)))
i += dim
self.dM = i

self._x_data_idx = {d: i for d, i in zip(self.x_data_types,
self._state_idx)}
self._obs_data_idx = {d: i for d, i in zip(self.obs_data_types,
self._obs_idx)}
self._meta_data_idx = {d: i for d, i in zip(self.meta_data_types,
self._meta_idx)}

@abc.abstractmethod
def sample(self, policy, condition, verbose=True, save=True):
@@ -130,6 +142,47 @@ def pack_data_obs(self, existing_mat, data_to_insert, data_types,
self._obs_data_idx[data_types[i]][-1] + 1)
existing_mat[index] = data_to_insert

def pack_data_meta(self, existing_mat, data_to_insert, data_types,
axes=None):
"""
Update the meta data matrix with new data.
Args:
existing_mat: Current meta data matrix.
data_to_insert: New data to insert into the existing matrix.
data_types: Name of the sensors to insert data for.
axes: Which axes to insert data. Defaults to the last axes.
"""
num_sensor = len(data_types)
if axes is None:
# If axes not specified, assume indexing on last dimensions.
axes = list(range(-1, -num_sensor - 1, -1))
else:
# Make sure number of sensors and axes are consistent.
if num_sensor != len(axes):
raise ValueError(
'Length of sensors (%d) must equal length of axes (%d)',
num_sensor, len(axes)
)

# Shape checks.
insert_shape = list(existing_mat.shape)
for i in range(num_sensor):
# Make sure to slice along X.
if existing_mat.shape[axes[i]] != self.dM:
raise ValueError('Axes must be along an dX=%d dimensional axis',
self.dM)
insert_shape[axes[i]] = len(self._meta_data_idx[data_types[i]])
if tuple(insert_shape) != data_to_insert.shape:
raise ValueError('Data has shape %s. Expected %s',
data_to_insert.shape, tuple(insert_shape))

# Actually perform the slice.
index = [slice(None) for _ in range(len(existing_mat.shape))]
for i in range(num_sensor):
index[axes[i]] = slice(self._meta_data_idx[data_types[i]][0],
self._meta_data_idx[data_types[i]][-1] + 1)
existing_mat[index] = data_to_insert

def pack_data_x(self, existing_mat, data_to_insert, data_types, axes=None):
"""
Update the state matrix with new data.
5 python/gps/agent/config.py 100644 → 100755
@@ -65,6 +65,11 @@
# AgentMuJoCo
AGENT_MUJOCO = {
'substeps': 1,
'camera_pos': np.array([2., 3., 2., 0., 0., 0.]),
'image_width': 640,
'image_height': 480,
'image_channels': 3,
'meta_include': []
}

AGENT_BOX2D = {
121 python/gps/agent/mjc/agent_mjc.py 100644 → 100755
@@ -10,10 +10,15 @@
from gps.agent.config import AGENT_MUJOCO
from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, \
END_EFFECTOR_POINT_JACOBIANS, ACTION
END_EFFECTOR_POINT_JACOBIANS, ACTION, RGB_IMAGE, RGB_IMAGE_SIZE, \
DOMAIN_IMAGE, DOMAIN_IMAGE_SIZE
from gps.sample.sample import Sample


import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# IMAGE_WIDTH = 40
# IMAGE_HEIGHT = 32
# IMAGE_CHANNELS = 3

class AgentMuJoCo(Agent):
"""
All communication between the algorithms and MuJoCo is done through
@@ -33,7 +38,7 @@ def _setup_conditions(self):
"""
conds = self._hyperparams['conditions']
for field in ('x0', 'x0var', 'pos_body_idx', 'pos_body_offset',
'noisy_body_idx', 'noisy_body_var'):
'noisy_body_idx', 'noisy_body_var', 'filename'):
self._hyperparams[field] = setup(self._hyperparams[field], conds)

def _setup_world(self, filename):
@@ -42,39 +47,55 @@ def _setup_world(self, filename):
Args:
filename: Path to XML file containing the world information.
"""
self._world = mjcpy.MJCWorld(filename)
self._world = []
self._model = []

self._model = self._world.get_model()
self._model = [copy.deepcopy(self._model)
for _ in range(self._hyperparams['conditions'])]
# Initialize Mujoco worlds. If there's only one xml file, create a single world object,
# otherwise create a different world for each condition.
if not isinstance(filename, list):
self._world = mjcpy.MJCWorld(filename)
self._model = self._world.get_model()
self._world = [self._world
for _ in range(self._hyperparams['conditions'])]
self._model = [copy.deepcopy(self._model)
for _ in range(self._hyperparams['conditions'])]
else:
for i in range(self._hyperparams['conditions']):
self._world.append(mjcpy.MJCWorld(self._hyperparams['filename'][i]))
self._model.append(self._world[i].get_model())

for i in range(self._hyperparams['conditions']):
for j in range(len(self._hyperparams['pos_body_idx'][i])):
idx = self._hyperparams['pos_body_idx'][i][j]
self._model[i]['body_pos'][idx, :] += \
self._hyperparams['pos_body_offset'][i]
self._world.set_model(self._model[i])
self._world[i].set_model(self._model[i])
x0 = self._hyperparams['x0'][i]
idx = len(x0) // 2
data = {'qpos': x0[:idx], 'qvel': x0[idx:]}
self._world.set_data(data)
self._world.kinematics()

# Initialize x0.
self._data = self._world.get_data()
eepts = self._data['site_xpos'].flatten()
self._world[i].set_data(data)
self._world[i].kinematics()

self._joint_idx = list(range(self._model[0]['nq']))
self._vel_idx = [i + self._model[0]['nq'] for i in self._joint_idx]

# Initialize x0.
self.x0 = []
for x0 in self._hyperparams['x0']:
for i in range(self._hyperparams['conditions']):
if END_EFFECTOR_POINTS in self.x_data_types:
eepts = self._world[i].get_data()['site_xpos'].flatten()
self.x0.append(
np.concatenate([x0, eepts, np.zeros_like(eepts)])
np.concatenate([self._hyperparams['x0'][i], eepts, np.zeros_like(eepts)])
)
else:
self.x0.append(x0)
self.x0.append(self._hyperparams['x0'][i])

cam_pos = self._hyperparams['camera_pos']
for i in range(self._hyperparams['conditions']):
self._world[i].init_viewer(640,
480,
cam_pos[0], cam_pos[1], cam_pos[2],
cam_pos[3], cam_pos[4], cam_pos[5])

def sample(self, policy, condition, verbose=True, save=True):
"""
@@ -102,19 +123,23 @@ def sample(self, policy, condition, verbose=True, save=True):
var = self._hyperparams['noisy_body_var'][condition][i]
self._model[condition]['body_pos'][idx, :] += \
var * np.random.randn(1, 3)
self._world.set_model(self._model[condition])
self._world[condition].set_model(self._model[condition])
for t in range(self.T):
X_t = new_sample.get_X(t=t)
obs_t = new_sample.get_obs(t=t)
# img = self._get_image_from_obs(obs_t)
# plt.imsave('/home/coline/exp_data/gps_images/test'+str(t)+'.png', img)
# meta = new_sample.get_meta()
mj_U = policy.act(X_t, obs_t, t, noise[t, :])
U[t, :] = mj_U
if verbose:
self._world.plot(mj_X)
self._world[condition].plot(mj_X)
# obs = self._world[condition].get_image()
if (t + 1) < self.T:
for _ in range(self._hyperparams['substeps']):
mj_X, _ = self._world.step(mj_X, mj_U)
mj_X, _ = self._world[condition].step(mj_X, mj_U)
#TODO: Some hidden state stuff will go here.
self._data = self._world.get_data()
self._data = self._world[condition].get_data()
self._set_sample(new_sample, mj_X, t, condition)
new_sample.set(ACTION, U)
if save:
@@ -132,15 +157,37 @@ def _init_sample(self, condition):
self._hyperparams['x0'][condition][self._joint_idx], t=0)
sample.set(JOINT_VELOCITIES,
self._hyperparams['x0'][condition][self._vel_idx], t=0)
self._data = self._world.get_data()
self._data = self._world[condition].get_data()
eepts = self._data['site_xpos'].flatten()
sample.set(END_EFFECTOR_POINTS, eepts, t=0)
sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)
jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
for site in range(eepts.shape[0] // 3):
idx = site * 3
jac[idx:(idx+3), :] = self._world.get_jac_site(site)
jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site)
sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)

# save initial image to meta data
self._world[condition].plot(self._hyperparams['x0'][condition])
img = self._world[condition].get_image_scaled(self._hyperparams['image_width'],
self._hyperparams['image_height'])
# mjcpy image shape is [height, width, channels], dim-shuffle it for later conv-net processing,
# and flatten for storage
img_data = np.transpose(img["img"], (2, 1, 0)).flatten()
# if initial image is an observation, replicate it for each time step
if DOMAIN_IMAGE in self.obs_data_types:
sample.set(DOMAIN_IMAGE, np.tile(img_data, (self.T, 1)), t=None)
else:
sample.set(DOMAIN_IMAGE, img_data, t=None)
sample.set(DOMAIN_IMAGE_SIZE, np.array([self._hyperparams['image_channels'],
self._hyperparams['image_width'],
self._hyperparams['image_height']]), t=None)
# only save subsequent images if image is part of observation
if RGB_IMAGE in self.obs_data_types:
sample.set(RGB_IMAGE, img_data, t=0)
sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'],
self._hyperparams['image_width'],
self._hyperparams['image_height']], t=None)
return sample

def _set_sample(self, sample, mj_X, t, condition):
@@ -162,5 +209,29 @@ def _set_sample(self, sample, mj_X, t, condition):
jac = np.zeros([curr_eepts.shape[0], self._model[condition]['nq']])
for site in range(curr_eepts.shape[0] // 3):
idx = site * 3
jac[idx:(idx+3), :] = self._world.get_jac_site(site)
jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site)
sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=t+1)
if RGB_IMAGE in self.obs_data_types:
img = self._world[condition].get_image_scaled(self._hyperparams['image_width'],
self._hyperparams['image_height'])
sample.set(RGB_IMAGE, np.transpose(img["img"], (2, 1, 0)).flatten(), t=t+1)

def _get_image_from_obs(self, obs):
imstart = 0
imend = 0
IMAGE_CHANNELS = self._hyperparams['image_channels']
IMAGE_WIDTH = self._hyperparams['image_width']
IMAGE_HEIGHT = self._hyperparams['image_height']
for sensor in self._hyperparams['obs_include']:
# Assumes only one of RGB_IMAGE or DOMAIN_IMAGE is present
if sensor == RGB_IMAGE or sensor == DOMAIN_IMAGE:
imend = imstart + self._hyperparams['sensor_dims'][sensor]
break
else:
imstart += self._hyperparams['sensor_dims'][sensor]
img = obs[imstart:imend]
# import pdb
# pdb.set_trace()
img = img.reshape((IMAGE_CHANNELS,IMAGE_WIDTH, IMAGE_HEIGHT))
img = np.transpose(img, [1,2,0])
return img
6 python/gps/algorithm/algorithm.py 100644 → 100755
@@ -23,7 +23,9 @@ def __init__(self, hyperparams):
config.update(hyperparams)
self._hyperparams = config

self.M = hyperparams['conditions']
# self.M = hyperparams['conditions']
self._cond_idx = hyperparams['train_conditions']
self.M = len(self._cond_idx)
self.iteration_count = 0

# Grab a few values from the agent.
@@ -48,7 +50,7 @@ def __init__(self, hyperparams):
dynamics = self._hyperparams['dynamics']
self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
init_traj_distr = extract_condition(
self._hyperparams['init_traj_distr'], m
self._hyperparams['init_traj_distr'], self._cond_idx[m]
)
self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)

@@ -18,3 +18,11 @@ def act(self, x, obs, t, noise):
A dU dimensional action vector.
"""
raise NotImplementedError("Must be implemented in subclass.")

def set_meta_data(self, meta):
"""
Set meta data for policy (e.g., domain image, multi modal observation sizes)
Args:
meta: meta data.
"""
return
@@ -44,7 +44,7 @@ def __init__(self, hyperparams):
self._max_clusters = self._hyperparams['max_clusters']
self._strength = self._hyperparams['strength']

def update(self, samples, policy_opt, all_samples, retrain=True):
def update(self, samples, policy_opt, all_samples, retrain=True, m=None):
""" Update prior with additional data. """
X, obs = samples.get_X(), samples.get_obs()
all_X, all_obs = all_samples.get_X(), all_samples.get_obs()
@@ -65,6 +65,8 @@ def update(self, samples, policy_opt, all_samples, retrain=True):
self.X = self.X[start:, :, :]
self.obs = self.obs[start:, :, :]
# Evaluate policy at samples to get mean policy action.
if m is not None:
Upol = policy_opt.prob(self.obs.copy(), m)[0]
Upol = policy_opt.prob(self.obs.copy())[0]
# Create dataset.
N = self.X.shape[0]
127 python/gps/gps_main.py 100644 → 100755
@@ -27,6 +27,10 @@ class GPSMain(object):
def __init__(self, config):
self._hyperparams = config
self._conditions = config['common']['conditions']
# TODO: add default
self._train_idx = config['common']['train_conditions']
self._test_idx = config['common']['test_conditions']

self._data_files_dir = config['common']['data_files_dir']

self.agent = config['agent']['type'](config['agent'])
@@ -47,51 +51,20 @@ def run(self, itr_load=None):
itr_start = self._initialize(itr_load)

for itr in range(itr_start, self._hyperparams['iterations']):
for cond in range(self._conditions):
for cond in self._train_idx:
for i in range(self._hyperparams['num_samples']):
self._take_sample(itr, cond, i)

traj_sample_lists = [
self.agent.get_samples(cond, -self._hyperparams['num_samples'])
for cond in range(self._conditions)
for cond in self._train_idx
]
self._take_iteration(itr, traj_sample_lists)
pol_sample_lists = self._take_policy_samples()
self._log_data(itr, traj_sample_lists, pol_sample_lists)

self._end()

def test_policy(self, itr, N):
"""
Take N policy samples of the algorithm state at iteration itr,
for testing the policy to see how it is behaving.
(Called directly from the command line --policy flag).
Args:
itr: the iteration from which to take policy samples
N: the number of policy samples to take
Returns: None
"""
algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr
self.algorithm = self.data_logger.unpickle(algorithm_file)
if self.algorithm is None:
print("Error: cannot find '%s.'" % algorithm_file)
os._exit(1) # called instead of sys.exit(), since t
traj_sample_lists = self.data_logger.unpickle(self._data_files_dir +
('traj_sample_itr_%02d.pkl' % itr))

pol_sample_lists = self._take_policy_samples(N)
self.data_logger.pickle(
self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
copy.copy(pol_sample_lists)
)

if self.gui:
self.gui.update(itr, self.algorithm, self.agent,
traj_sample_lists, pol_sample_lists)
self.gui.set_status_text(('Took %d policy sample(s) from ' +
'algorithm state at iteration %d.\n' +
'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') % (N, itr, itr))

def _initialize(self, itr_load):
"""
Initialize from the specified iteration.
@@ -111,7 +84,7 @@ def _initialize(self, itr_load):
if self.algorithm is None:
print("Error: cannot find '%s.'" % algorithm_file)
os._exit(1) # called instead of sys.exit(), since this is in a thread

if self.gui:
traj_sample_lists = self.data_logger.unpickle(self._data_files_dir +
('traj_sample_itr_%02d.pkl' % itr_load))
@@ -182,29 +155,20 @@ def _take_iteration(self, itr, sample_lists):
"""
if self.gui:
self.gui.set_status_text('Calculating.')
self.gui.start_display_calculating()
self.algorithm.iteration(sample_lists)
if self.gui:
self.gui.stop_display_calculating()

def _take_policy_samples(self, N=None):
"""
Take samples from the policy to see how it's doing.
Args:
N : number of policy samples to take per condition
Returns: None
"""
def _take_policy_samples(self):
""" Take samples from the policy to see how it's doing. """
if 'verbose_policy_trials' not in self._hyperparams:
return None
if not N:
N = self._hyperparams['verbose_policy_trials']
if self.gui:
self.gui.set_status_text('Taking policy samples.')
pol_samples = [[None for _ in range(N)] for _ in range(self._conditions)]
for cond in range(self._conditions):
for i in range(N):
pol_samples = [[None for _ in range(self._hyperparams['verbose_policy_trials'])]
for _ in self._test_idx]
for cond in range(len(self._test_idx)):
for i in range(self._hyperparams['verbose_policy_trials']):
pol_samples[cond][i] = self.agent.sample(
self.algorithm.policy_opt.policy, cond,
self.algorithm.policy_opt.policy, self._test_idx[cond],
verbose=True, save=False)
return [SampleList(samples) for samples in pol_samples]

@@ -224,6 +188,8 @@ def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
self.gui.save_figure(
self._data_files_dir + ('figure_itr_%02d.png' % itr)
)
if 'no_sample_logging' in self._hyperparams['common']:
return
self.data_logger.pickle(
self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr),
copy.copy(self.algorithm)
@@ -238,13 +204,8 @@ def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
copy.copy(pol_sample_lists)
)


def _end(self):
""" Finish running and exit. """
target = self._hyperparams['algorithm']['cost']['data_types'].values()[0]['target_state']
self.algorithm.policy_opt.policy.pickle_policy(self.agent.dO, self.agent.dU,
self._data_files_dir + 'policies/',
goal_state=target)
if self.gui:
self.gui.set_status_text('Training complete.')
self.gui.end_mode()
@@ -261,45 +222,22 @@ def main():
help='run target setup')
parser.add_argument('-r', '--resume', metavar='N', type=int,
help='resume training from iter N')
parser.add_argument('-p', '--policy', metavar='N', type=int,
help='take N policy samples (for BADMM only)')
args = parser.parse_args()

exp_name = args.experiment
resume_training_itr = args.resume
test_policy_N = args.policy

exp_dir = 'experiments/' + exp_name + '/'
hyperparams_file = exp_dir + 'hyperparams.py'

if args.new:
from shutil import copy

if os.path.exists(exp_dir):
sys.exit("Experiment '%s' already exists.\nPlease remove '%s'." %
(exp_name, exp_dir))
os.makedirs(exp_dir)

prev_exp_file = '.previous_experiment'
prev_exp_dir = None
try:
with open(prev_exp_file, 'r') as f:
prev_exp_dir = f.readline()
copy(prev_exp_dir + 'hyperparams.py', exp_dir)
if os.path.exists(prev_exp_dir + 'targets.npz'):
copy(prev_exp_dir + 'targets.npz', exp_dir)
except IOError as e:
with open(hyperparams_file, 'w') as f:
f.write('# To get started, copy over hyperparams from another experiment.\n' +
'# Visit rll.berkeley.edu/gps/hyperparams.html for documentation.')
with open(prev_exp_file, 'w') as f:
f.write(exp_dir)

exit_msg = ("Experiment '%s' created.\nhyperparams file: '%s'" %
(exp_name, hyperparams_file))
if prev_exp_dir and os.path.exists(prev_exp_dir):
exit_msg += "\ncopied from : '%shyperparams.py'" % prev_exp_dir
sys.exit(exit_msg)
open(hyperparams_file, 'w')
sys.exit("Experiment '%s' created.\nhyperparams file: '%s'" %
(exp_name, hyperparams_file))

if not os.path.exists(hyperparams_file):
sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" %
@@ -319,33 +257,6 @@ def main():
plt.show()
except ImportError:
sys.exit('ROS required for target setup.')
elif test_policy_N:
import random
import numpy as np
import matplotlib.pyplot as plt

random.seed(0)
np.random.seed(0)

data_files_dir = exp_dir + 'data_files/'
data_filenames = os.listdir(data_files_dir)
algorithm_prefix = 'algorithm_itr_'
algorithm_filenames = [f for f in data_filenames if f.startswith(algorithm_prefix)]
current_algorithm = sorted(algorithm_filenames, reverse=True)[0]
current_itr = int(current_algorithm[len(algorithm_prefix):len(algorithm_prefix)+2])

gps = GPSMain(hyperparams.config)
if hyperparams.config['gui_on']:
test_policy = threading.Thread(
target=lambda: gps.test_policy(itr=current_itr, N=test_policy_N)
)
test_policy.daemon = True
test_policy.start()

plt.ioff()
plt.show()
else:
gps.test_policy(itr=current_itr, N=test_policy_N)
else:
import random
import numpy as np
17 python/gps/sample/sample.py 100644 → 100755
@@ -17,6 +17,7 @@ def __init__(self, agent):
self.dX = agent.dX
self.dU = agent.dU
self.dO = agent.dO
self.dM = agent.dM

# Dictionary containing the sample data from various sensors.
self._data = {}
@@ -25,13 +26,16 @@ def __init__(self, agent):
self._X.fill(np.nan)
self._obs = np.empty((self.T, self.dO))
self._obs.fill(np.nan)
self._meta = np.empty(self.dM)
self._meta.fill(np.nan)

def set(self, sensor_name, sensor_data, t=None):
""" Set trajectory data for a particular sensor. """
if t is None:
self._data[sensor_name] = sensor_data
self._X.fill(np.nan) # Invalidate existing X.
self._obs.fill(np.nan) # Invalidate existing obs.
self._meta.fill(np.nan) # Invalidate existing meta data.
else:
if sensor_name not in self._data:
self._data[sensor_name] = \
@@ -69,11 +73,24 @@ def get_obs(self, t=None):
for data_type in self._data:
if data_type not in self.agent.obs_data_types:
continue
if data_type in self.agent.meta_data_types:
continue
data = (self._data[data_type] if t is None
else self._data[data_type][t, :])
self.agent.pack_data_obs(obs, data, data_types=[data_type])
return obs

def get_meta(self):
""" Get the meta data. Put it together if not precomputed. """
meta = self._meta
if np.any(np.isnan(meta)):
for data_type in self._data:
if data_type not in self.agent.meta_data_types:
continue
data = self._data[data_type]
self.agent.pack_data_meta(meta, data, data_types=[data_type])
return meta

# For pickling.
def __getstate__(self):
state = self.__dict__.copy()
81 src/3rdparty/mjcpy2/mjcpy2.cpp 100644 → 100755
@@ -54,7 +54,9 @@ class PyMJCWorld2 {

PyMJCWorld2(const std::string& loadfile);
bp::object Step(const bn::ndarray& x, const bn::ndarray& u);
void Plot(const bn::ndarray& x);
void Plot(const bn::ndarray& x);
void InitCam(float cx,float cy,float cz,float px,float py,float pz);
void InitViewer(int width, int height, float cx,float cy,float cz,float px,float py,float pz);
void Idle(const bn::ndarray& x);
bn::ndarray GetCOMMulti(const bn::ndarray& x);
bn::ndarray GetJacSite(int site);
@@ -63,14 +65,18 @@ class PyMJCWorld2 {
void SetModel(bp::dict d);
bp::dict GetData();
void SetData(bp::dict d);
bn::ndarray GetImage(const bn::ndarray& x);
bp::dict GetImage();
bp::dict GetImageScaled(int width, int height);
void SetNumSteps(int n) {m_numSteps=n;}
void SetCamera(float x, float y, float z, float px, float py, float pz);

~PyMJCWorld2();
private:
// PyMJCWorld(const PyMJCWorld&) {}

void _PlotInit();
void _PlotInit(float x, float y, float z, float px, float py, float pz);
void _PlotInit(int width, int height, float x, float y, float z, float px, float py, float pz);

// void _SetState(const mjtNum* xdata) {mju_copy(m_data->qpos, (xdata), NQ); mju_copy(m_data->qvel, (xdata)+NQ, NV); }
// void _SetControl(const mjtNum* udata) {for (int i=0; i < m_actuatedDims.size(); ++i) m_u[m_actuatedDims[i]] = (udata)[i];}
@@ -213,6 +219,20 @@ void PyMJCWorld2::_PlotInit() {
}
}

void PyMJCWorld2::_PlotInit(float x, float y, float z, float px, float py, float pz) {
if (m_viewer == NULL) {
m_viewer = new MujocoOSGViewer(osg::Vec3(x, y, z),osg::Vec3(px, py, pz));
m_viewer->SetModel(m_model);
}
}

void PyMJCWorld2::_PlotInit(int width, int height, float x, float y, float z, float px, float py, float pz) {
if (m_viewer == NULL) {
m_viewer = new MujocoOSGViewer(width, height, osg::Vec3(x, y, z),osg::Vec3(px, py, pz));
m_viewer->SetModel(m_model);
}
}

void PyMJCWorld2::Plot(const bn::ndarray& x) {
FAIL_IF_FALSE(x.get_dtype() == MJTNUM_DTYPE && x.get_nd() == 1 && x.get_flags() & bn::ndarray::C_CONTIGUOUS);
_PlotInit();
@@ -221,6 +241,14 @@ void PyMJCWorld2::Plot(const bn::ndarray& x) {
m_viewer->RenderOnce();
}

void PyMJCWorld2::InitCam(float cx, float cy, float cz, float px, float py, float pz) {
_PlotInit(cx, cy, cz, px, py, pz);
}

void PyMJCWorld2::InitViewer(int width, int height, float cx, float cy, float cz, float px, float py, float pz) {
_PlotInit(width, height, cx, cy, cz, px, py, pz);
}

void PyMJCWorld2::Idle(const bn::ndarray& x) {
FAIL_IF_FALSE(x.get_dtype() == MJTNUM_DTYPE && x.get_nd() == 1 && x.get_flags() & bn::ndarray::C_CONTIGUOUS);
_PlotInit();
@@ -281,6 +309,49 @@ void PyMJCWorld2::SetData(bp::dict d) {

}

// grabs current image, returns an array of shape [height, width, channels]
bp::dict PyMJCWorld2::GetImage() {
m_viewer->RenderOnce();
bp::dict out;
const unsigned char* tmp = static_cast<const unsigned char*>(m_viewer->m_image->getDataPointer());
//out["pixel_data"] = toNdarray1<unsigned char>(tmp, m_viewer->m_image->getTotalDataSize ());
out["num_pixels"] = m_viewer->m_image->getTotalDataSize();
out["width"] = m_viewer->m_image->s();
out["height"] = m_viewer->m_image->t();
int num_channels = 0;
if (m_viewer->m_image->getTotalDataSize() > 0)
{
num_channels = m_viewer->m_image->getTotalDataSize() / m_viewer->m_image->s() / m_viewer->m_image->t();
}
out["img"] = toNdarray3<unsigned char>(tmp, m_viewer->m_image->t(), m_viewer->m_image->s(), num_channels);
out["num_channels"] = num_channels;
return out;
}

// grabs current image, returns an array of shape [height, width, channels]
bp::dict PyMJCWorld2::GetImageScaled(int width, int height) {
m_viewer->RenderOnce();
m_viewer->m_image->scaleImage(width,height,m_viewer->m_image->r());
bp::dict out;
const unsigned char* tmp = static_cast<const unsigned char*>(m_viewer->m_image->getDataPointer());
//out["pixel_data"] = toNdarray1<unsigned char>(tmp, m_viewer->m_image->getTotalDataSize ());
out["num_pixels"] = m_viewer->m_image->getTotalDataSize();
out["width"] = m_viewer->m_image->s();
out["height"] = m_viewer->m_image->t();
int num_channels = 0;
if (m_viewer->m_image->getTotalDataSize() > 0)
{
num_channels = m_viewer->m_image->getTotalDataSize() / m_viewer->m_image->s() / m_viewer->m_image->t();
}
out["img"] = toNdarray3<unsigned char>(tmp, m_viewer->m_image->t(), m_viewer->m_image->s(), num_channels);
out["num_channels"] = num_channels;
return out;
}

void PyMJCWorld2::SetCamera(float x, float y, float z, float px, float py, float pz){
// place camera at (x,y,z) pointing to (px,py,pz)
m_viewer->SetCamera(x,y,z,px,py,pz);
}

BOOST_PYTHON_MODULE(mjcpy) {
bn::initialize();
@@ -300,13 +371,17 @@ BOOST_PYTHON_MODULE(mjcpy) {
.def("get_data",&PyMJCWorld2::GetData)
.def("set_data",&PyMJCWorld2::SetData)
.def("plot",&PyMJCWorld2::Plot)
.def("init_cam",&PyMJCWorld2::InitCam)
.def("init_viewer",&PyMJCWorld2::InitViewer)
.def("idle",&PyMJCWorld2::Idle)
.def("get_COM_multi",&PyMJCWorld2::GetCOMMulti)
.def("get_jac_site",&PyMJCWorld2::GetJacSite)
.def("kinematics",&PyMJCWorld2::Kinematics)
// .def("SetModel",&PyMJCWorld::SetModel)
// .def("GetImage",&PyMJCWorld::GetImage)
.def("get_image",&PyMJCWorld2::GetImage)
.def("get_image_scaled",&PyMJCWorld2::GetImageScaled)
.def("set_num_steps",&PyMJCWorld2::SetNumSteps)
.def("set_camera",&PyMJCWorld2::SetCamera)
;


@@ -22,6 +22,32 @@

#define PLOT_JOINTS 0

class PhotoCallback : public osg::Camera::DrawCallback
{
public:
PhotoCallback( osg::Image* img )
: _image(img), _fileIndex(0) {}

virtual void operator()( osg::RenderInfo& renderInfo ) const
{
if ( _image.valid() )
{
osg::GraphicsContext* gc = renderInfo.getState()->getGraphicsContext();
if ( gc->getTraits() )
{
int width = gc->getTraits()->width;
int height = gc->getTraits()->height;
GLenum pixelFormat = (gc->getTraits()->alpha ? GL_RGBA : GL_RGB);
_image->readPixels( 0, 0, width, height, pixelFormat, GL_UNSIGNED_BYTE );
}
}
}

protected:
osg::ref_ptr<osg::Image> _image;
mutable int _fileIndex;
};

struct EventHandler : public osgGA::GUIEventHandler
{
EventHandler( MujocoOSGViewer* parent ) :
@@ -279,6 +305,11 @@ MujocoOSGViewer::MujocoOSGViewer()
m_root = new osg::Group;
m_robot = new osg::Group;
m_root->addChild(m_robot);

m_image = new osg::Image;
osg::ref_ptr<PhotoCallback> pcb = new PhotoCallback( m_image.get() );
m_viewer.getCamera()->setPostDrawCallback( pcb.get() );

m_viewer.setSceneData(m_root.get());
m_viewer.setUpViewInWindow(0, 0, 640, 480);
m_viewer.realize();
@@ -291,6 +322,64 @@ MujocoOSGViewer::MujocoOSGViewer()
m_viewer.addEventHandler(m_handler.get());
}

MujocoOSGViewer::MujocoOSGViewer(int width, int height, osg::Vec3 cam_pos, osg::Vec3 cam_target)
: m_data(NULL), m_model(NULL)
{
m_viewer.setThreadingModel(osgViewer::ViewerBase::SingleThreaded);

m_root = new osg::Group;
m_robot = new osg::Group;
m_root->addChild(m_robot);

m_image = new osg::Image;
osg::ref_ptr<PhotoCallback> pcb = new PhotoCallback( m_image.get() );
m_viewer.getCamera()->setPostDrawCallback( pcb.get() );

m_viewer.setSceneData(m_root.get());
m_viewer.setUpViewInWindow(0, 0, width, height);
m_viewer.realize();

osg::ref_ptr<osgGA::TrackballManipulator> man = new osgGA::TrackballManipulator;
man->setHomePosition(cam_pos, cam_target, cam_pos);
m_viewer.setCameraManipulator(man);

m_handler = new EventHandler(this);
m_viewer.addEventHandler(m_handler.get());
}

MujocoOSGViewer::MujocoOSGViewer(osg::Vec3 cam_pos, osg::Vec3 cam_target)
: m_data(NULL), m_model(NULL)
{
m_viewer.setThreadingModel(osgViewer::ViewerBase::SingleThreaded);

m_root = new osg::Group;
m_robot = new osg::Group;
m_root->addChild(m_robot);

m_image = new osg::Image;
osg::ref_ptr<PhotoCallback> pcb = new PhotoCallback( m_image.get() );
m_viewer.getCamera()->setPostDrawCallback( pcb.get() );

m_viewer.setSceneData(m_root.get());
m_viewer.setUpViewInWindow(0, 0, 640, 480);
m_viewer.realize();

osg::ref_ptr<osgGA::TrackballManipulator> man = new osgGA::TrackballManipulator;
man->setHomePosition(cam_pos, cam_target, osg::Vec3(-1, -1.5, 2));
m_viewer.setCameraManipulator(man);

m_handler = new EventHandler(this);
m_viewer.addEventHandler(m_handler.get());
}

void MujocoOSGViewer::SetCamera(float x, float y, float z, float px, float py, float pz){
// place camera at (x,y,z) pointing to (px,py,pz)
m_viewer.getCameraManipulator()->setHomePosition(osg::Vec3(x, y, z), osg::Vec3(px, py, pz), osg::Vec3(x, y, z),false);
//osg::ref_ptr<osgGA::CameraManipulator> man = m_viewer.getCameraManipulator();
//man->setHomePosition(osg::Vec3(0.5, -1.5, 5), osg::Vec3(0.5, -1.5, 0), osg::Vec3(-1, -1.5, 2));
//m_viewer.setCameraManipulator(man);
}

void MujocoOSGViewer::Idle() {
EventHandler* handler = dynamic_cast<EventHandler*>(m_handler.get());
handler->m_idling = true;
@@ -9,19 +9,23 @@
class MujocoOSGViewer {
public:
MujocoOSGViewer();
MujocoOSGViewer(osg::Vec3, osg::Vec3);
MujocoOSGViewer(int, int, osg::Vec3, osg::Vec3);
void Idle(); // Block and draw in a loop until the 'p' key is pressed
void RenderOnce();
void HandleInput();
void StartAsyncRendering();
void StopAsyncRendering(); //
void SetModel(const mjModel*);
void SetData(const mjData*);
void _UpdateTransforms();
void _UpdateTransforms();
void SetCamera(float x, float y, float z, float px, float py, float pz);

// osg::ref_ptr<EventHandler> m_handler;
mjData* m_data;
const mjModel* m_model;
osg::ref_ptr<osg::Group> m_root, m_robot;
osg::ref_ptr<osg::Image> m_image;
osgViewer::Viewer m_viewer;
std::vector<osg::MatrixTransform*> m_geomTFs, m_axTFs;
osg::ref_ptr<osgGA::GUIEventHandler> m_handler;
9 src/proto/gps.proto 100644 → 100755
@@ -20,6 +20,9 @@ enum SampleType {
TOTAL_DATA_TYPES = 13;
POSITION = 14;
LINEAR_VELOCITY = 15;
RGB_IMAGE_SIZE = 16;
DOMAIN_IMAGE = 17;
DOMAIN_IMAGE_SIZE = 18;
}

// Message containing the data for a single sample.
@@ -30,10 +33,11 @@ message Sample {
optional uint32 dU = 3; // dimension of action U
optional uint32 dO = 4; // dimension of observation

// Data arrays holding X, U, and obs.
// Data arrays holding X, U, obs, and meta data.
repeated float X = 5 [packed = true];
repeated float U = 6 [packed = true];
repeated float obs = 7 [packed = true];
repeated float meta = 8 [packed = true];
}

// Enum for actuator types (Specified in Relax/PositionCommand msgs)
@@ -54,6 +58,5 @@ enum PositionControlMode {
enum ControllerType {
LIN_GAUSS_CONTROLLER = 0;
CAFFE_CONTROLLER = 1;
TF_CONTROLLER = 2;
TOTAL_CONTROLLER_TYPES = 3;
TOTAL_CONTROLLER_TYPES = 2;
}