so many things... trying to fix reacher3 now

cbfinn · May 19, 2016 · 6d67939 · 6d67939
1 parent f285901
commit 6d67939
Show file tree

Hide file tree

Showing 45 changed files with 2,079 additions and 44 deletions.
diff --git a/.gitignore b/.gitignore
@@ -89,6 +89,7 @@ MUJOCO_LOG.TXT
 
 # Data files
 *.pkl
+*.csv
 
 # Experiment files
 experiments/**/data_files/

diff --git a/analyze.py b/analyze.py
@@ -0,0 +1,110 @@
+import sys, imp
+import os.path
+import cPickle
+import numpy as np
+import matplotlib.pyplot as plt
+from IPython.core.debugger import Tracer; debug_here = Tracer()
+
+# Add gps/python to path so that imports work.
+# (should be run from ~/gps)
+sys.path.append(os.path.abspath('python'))
+from gps.sample.sample_list import SampleList
+
+tasks = ['peg_blind_big']
+expts = ['lqr', 'badmm', 'mdgps_lqr', 'mdgps_nn', 'mdgps_lqr_new', 'mdgps_nn_new']
+seeds = [0, 1, 2]
+iters = range(30)
+colors = ['k', 'r', 'b', 'c', 'm', 'g']
+
+SUCCESS_THRESHOLD = -0.5 + 0.06
+
+def pickle_final_eepts(task, expt, seed, itr):
+    print "Pickling task %s, expt %s, seed %s, itr %s" % (task, expt, seed, itr)
+
+    # extract samples
+    dirname = "experiments/%s/%s/%s/data_files" % (task, expt, seed)
+    if expt == 'lqr':
+        fname = "%s/traj_sample_itr_%02d.pkl" % (dirname, itr)
+    else:
+        fname = "%s/pol_sample_itr_%02d.pkl" % (dirname, itr)
+
+    with open(fname, 'rb') as f:
+        samples = cPickle.load(f)
+
+    # magic final_eepts extraction (get first final_eepts position of eepts)
+    final_eepts = np.array([[s._data[3][-1] for s in ss] for ss in samples]) #(M, N, num_eepts)
+
+    # save final_eepts for faster replotting
+    fname = "%s/final_eepts_itr_%02d.pkl" % (dirname, itr)
+    with open(fname, 'wb') as f:
+        cPickle.dump(final_eepts, f, -1)
+
+def unpickle_final_eepts(task, expt, seed, itr):
+    dirname = "experiments/%s/%s/%s/data_files" % (task, expt, seed)
+    fname = "%s/final_eepts_itr_%02d.pkl" % (dirname, itr)
+    with open(fname, 'rb') as f:
+        final_eepts = cPickle.load(f)
+    return final_eepts
+
+def get_final_eepts(task, expt, itr):
+    print "Processing task %s, expt %s, itr %s" % (task, expt, itr)
+
+    eepts = []
+    for seed in seeds:
+        dirname = "experiments/%s/%s/%s/data_files" % (task, expt, seed)
+
+        # skip if the seed hasn't been run
+        fname = "%s/algorithm_itr_%02d.pkl" % (dirname, itr)
+        if not os.path.exists(fname):
+            continue
+
+        # pickle if the seed hasn't been pickled
+        fname = "%s/final_eepts_itr_%02d.pkl" % (dirname, itr)
+        if not os.path.exists(fname):
+            pickle_final_eepts(task, expt, seed, itr)
+
+        eepts.append(unpickle_final_eepts(task, expt, seed, itr))
+
+    return np.array(eepts) # (num_seeds, M, N, num_eepts)
+
+task = tasks[0]
+csv = open('peg_blind_big.csv', 'w')
+csv.write("iteration\t")
+[csv.write("%12s\t" % expt) for expt in expts]
+csv.write("\n")
+for i in [10, 20, 30]:
+    csv.write("%8s\t" % i)
+    for expt in expts:
+        eepts = get_final_eepts(task, expt, i-1)
+        if eepts.shape[0] == 0:
+            csv.write("%12s\t" % "N/A")
+            continue
+        zpos = eepts[:, :, :, 2].flatten()
+        pct = np.mean(zpos < SUCCESS_THRESHOLD)
+        csv.write("%12f\t" % pct)
+    csv.write('\n')
+
+#def plot_expts(expts, colors):
+#    for expt, color in zip(expts, colors):
+#        print "Plotting %s" % expt
+#
+#        Z = np.zeros((iters, 2))
+#        for itr in range(iters):
+#            Z[itr] = process_z(expt, itr)
+#
+#        plt.errorbar(range(iters), Z[:,0] + 0.5, Z[:,1], c=color, label=expt)
+#
+#    height = 0.1*np.ones(iters)
+#    plt.plot(range(iters), height, 'k--')
+#    plt.legend()
+#    plt.xlabel('Iterations')
+#    plt.xlim((0, iters-1))
+#    plt.ylabel('Distance to target')
+#    plt.ylim((0, 0.5))
+#
+#
+#
+#plot_expts(expts, colors)
+#plt.title("Blind Peg Insertion")
+#plt.savefig("peg_blind_big.png")
+#plt.clf()
diff --git a/experiments/debug_peg_blind/hyperparams.py b/experiments/debug_peg_blind/hyperparams.py
@@ -60,7 +60,7 @@
 
 algorithm = {
     'conditions': common['conditions'],
-    'iterations': 20,
+    'iterations': 30,
     'kl_step': 2.0,
     'min_step_mult': 0.01,
     'max_step_mult': 1.0,

diff --git a/experiments/peg/mdgps_lqr_new/0/hyperparams.py b/experiments/peg/mdgps_lqr_new/0/hyperparams.py
@@ -0,0 +1,51 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+# Algorithm
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'agent_use_nn_policy': False,
+    'step_rule': 'new',
+})
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPythonMDGPS,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+    'iterations': 4000,
+}
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'verbose_policy_trials': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/peg/mdgps_lqr_new/1/hyperparams.py b/experiments/peg/mdgps_lqr_new/1/hyperparams.py
@@ -0,0 +1,52 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+# Algorithm
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'agent_use_nn_policy': False,
+    'step_rule': 'new',
+})
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPythonMDGPS,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+    'iterations': 4000,
+}
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'verbose_policy_trials': 1,
+    'seed': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/peg/mdgps_lqr_new/2/hyperparams.py b/experiments/peg/mdgps_lqr_new/2/hyperparams.py
@@ -0,0 +1,52 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+# Algorithm
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'agent_use_nn_policy': False,
+    'step_rule': 'new',
+})
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPythonMDGPS,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+    'iterations': 4000,
+}
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'verbose_policy_trials': 1,
+    'seed': 2,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/peg/mdgps_nn_new/0/hyperparams.py b/experiments/peg/mdgps_nn_new/0/hyperparams.py
@@ -0,0 +1,52 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from IPython.core.debugger import Tracer; debug_here = Tracer()
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+# Algorithm
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'agent_use_nn_policy': True,
+    'step_rule': 'new',
+})
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPythonMDGPS,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+    'iterations': 4000,
+}
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'verbose_policy_trials': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/peg/mdgps_nn_new/1/hyperparams.py b/experiments/peg/mdgps_nn_new/1/hyperparams.py
@@ -0,0 +1,53 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from IPython.core.debugger import Tracer; debug_here = Tracer()
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+# Algorithm
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'agent_use_nn_policy': True,
+    'step_rule': 'new',
+})
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPythonMDGPS,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+    'iterations': 4000,
+}
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'verbose_policy_trials': 1,
+    'seed': 1,
+})
+
+common['info'] = generate_experiment_info(config)