Merge pull request #1 from tianheyu927/kevin_update

Kevin update
cbfinn · Jul 16, 2016 · 6b57b6d · 6b57b6d
2 parents a270d18 + c021862
commit 6b57b6d
Show file tree

Hide file tree

Showing 31 changed files with 2,332 additions and 97 deletions.
diff --git a/experiments/mjc_badmm_example/hyperparams.py b/experiments/mjc_badmm_example/hyperparams.py
@@ -61,8 +61,10 @@
     'substeps': 5,
     'conditions': common['conditions'],
     'pos_body_idx': np.array([1]),
-    'pos_body_offset': [np.array([0.1, 0.1, 0]), np.array([0.1, -0.1, 0]),
-                        np.array([-0.1, -0.1, 0]), np.array([-0.1, 0.1, 0])],
+    # 'pos_body_offset': [np.array([0.1, 0.1, 0]), np.array([0.1, -0.1, 0]),
+    #                     np.array([-0.1, -0.1, 0]), np.array([-0.1, 0.1, 0])],
+    'pos_body_offset': [np.array([0.08, 0.08, 0]), np.array([0.08, -0.08, 0]),
+                         np.array([-0.08, -0.08, 0]), np.array([-0.08, 0.08, 0])],
     'T': 100,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,

diff --git a/experiments/mjc_peg_example/hyperparams.py b/experiments/mjc_peg_example/hyperparams.py
@@ -0,0 +1,139 @@
+""" Hyperparameters for MJC peg insertion trajectory optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.algorithm_traj_opt import AlgorithmTrajOpt
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.utility.demo_utils import generate_pos_body_offset, generate_x0, generate_pos_idx
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.gui.config import generate_experiment_info
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 7,
+    JOINT_VELOCITIES: 7,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 7,
+}
+
+PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
+
+BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
+EXP_DIR = BASE_DIR + '/../experiments/mjc_peg_example/'
+
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': 25,
+}
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': generate_x0(np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                      np.zeros(7)]), common['conditions']),
+    'dt': 0.05,
+    'substeps': 5,
+    'conditions': common['conditions'],
+    'pos_body_idx': generate_pos_idx(common['conditions']),
+    # 'pos_body_offset': [np.array([0, 0.2, 0]), np.array([0, 0.1, 0]),
+    #                     np.array([0, -0.1, 0]), np.array([0, -0.2, 0])],
+    'pos_body_offset': generate_pos_body_offset(common['conditions']),
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+}
+
+algorithm = {
+    'type': AlgorithmTrajOpt,
+    'conditions': common['conditions'],
+    'iterations': 10,
+    'max_ent_traj': 0.0,  # NOTE - this was not set to 1 when initial demos were generated
+    'agent_x0': agent['x0'],
+    'agent_pos_body_idx': agent['pos_body_idx'],
+    'agent_pos_body_offset': agent['pos_body_offset'],
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains':  1.0 / PR2_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 1.0,
+    'stiffness_vel': 0.5,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 5e-5 / PR2_GAINS,
+}
+
+fk_cost = {
+    'type': CostFK,
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+    'wp': np.array([1, 1, 1, 1, 1, 1]),
+    'l1': 0.1,
+    'l2': 10.0,
+    'alpha': 1e-5,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, fk_cost],
+    'weights': [1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 20,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'num_samples': 5,
+    'verbose_trials': 0,
+    'common': common,
+    'agent': agent,
+    'gui_on': True,
+    'algorithm': algorithm,
+}
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/mjc_peg_ioc_example/hyperparams.py b/experiments/mjc_peg_ioc_example/hyperparams.py
@@ -0,0 +1,191 @@
+""" Hyperparameters for MJC peg insertion trajectory optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.algorithm_traj_opt import AlgorithmTrajOpt
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_ioc_nn import CostIOCNN
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.utility.demo_utils import generate_pos_body_offset, generate_x0, generate_pos_idx
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.gui.config import generate_experiment_info
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 7,
+    JOINT_VELOCITIES: 7,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 7,
+}
+
+PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
+
+BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
+EXP_DIR = BASE_DIR + '/../experiments/mjc_peg_ioc_example/'
+DEMO_DIR = BASE_DIR + '/../experiments/mjc_peg_example/'
+
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'demo_controller_file': DEMO_DIR + 'data_files/algorithm_itr_09.pkl',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': 1,
+    # 'demo_conditions': 20,
+    # 'demo_conditions': 25,
+}
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                          np.zeros(7)]),
+    'dt': 0.05,
+    'substeps': 5,
+    'conditions': common['conditions'],
+    'pos_body_idx': np.array([1]),
+    # 'pos_body_offset': [np.array([0, 0.2, 0]), np.array([0, 0.1, 0]),
+    #                     np.array([0, -0.1, 0]), np.array([0, -0.2, 0])],
+    'pos_body_offset': [np.array([0, 0.0, 0])],
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+}
+
+demo_agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': generate_x0(np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                      np.zeros(7)]), 25),
+    'dt': 0.05,
+    'substeps': 5,
+    'conditions': 25,
+    'pos_body_idx': generate_pos_idx(25),
+    # 'pos_body_offset': [np.array([0, 0.2, 0]), np.array([0, 0.1, 0]),
+    #                     np.array([0, -0.1, 0]), np.array([0, -0.2, 0])],
+    'pos_body_offset': generate_pos_body_offset(25),
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+}
+
+algorithm = {
+    'type': AlgorithmTrajOpt,
+    'conditions': common['conditions'],
+    'ioc' : True,
+    'kl_step': 0.5,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.01,
+    'max_ent_traj': 1.0,
+    'demo_distr_empest': True,
+    'demo_cond': 15,
+    # 'demo_cond': 25,
+    'num_demos': 3,
+    'iterations': 20,
+    'synthetic_cost_samples': 100,
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains':  1.0 / PR2_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 5.0,
+    'stiffness': 1.0,
+    'stiffness_vel': 0.5,
+    'final_weight': 50.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 5e-5 / PR2_GAINS,
+}
+
+fk_cost = {
+    'type': CostFK,
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+    'wp': np.array([1, 1, 1, 1, 1, 1]),
+    'l1': 0.1,
+    'l2': 10.0,
+    'alpha': 1e-5,
+}
+
+algorithm['gt_cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, fk_cost],
+    'weights': [1.0, 1.0],
+}
+
+algorithm['cost'] = {
+    'type': CostIOCNN,
+    'wu': 5e-5 / PR2_GAINS,
+    'T': 100,
+    'dO': 26,
+    'iterations': 5000,
+}
+
+# algorithm['gt_cost'] = {
+#     'type': CostFK,
+#     'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+#     'wp': np.array([1, 1, 1, 1, 1, 1]),
+#     'l1': 0.1,
+#     'l2': 10.0,
+#     'alpha': 1e-5,
+# }
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 20,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'num_samples': 5,
+    'verbose_trials': 1,
+    'common': common,
+    'agent': agent,
+    'demo_agent': demo_agent,
+    'gui_on': True,
+    'algorithm': algorithm,
+}
+
+common['info'] = generate_experiment_info(config)