Skip to content

Commit

Permalink
so many things... trying to fix reacher3 now
Browse files Browse the repository at this point in the history
  • Loading branch information
wmontgomery4 committed May 19, 2016
1 parent f285901 commit 6d67939
Show file tree
Hide file tree
Showing 45 changed files with 2,079 additions and 44 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -89,6 +89,7 @@ MUJOCO_LOG.TXT

# Data files
*.pkl
*.csv

# Experiment files
experiments/**/data_files/
Expand Down
110 changes: 110 additions & 0 deletions analyze.py
@@ -0,0 +1,110 @@
import sys, imp
import os.path
import cPickle
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.debugger import Tracer; debug_here = Tracer()

# Add gps/python to path so that imports work.
# (should be run from ~/gps)
sys.path.append(os.path.abspath('python'))
from gps.sample.sample_list import SampleList

tasks = ['peg_blind_big']
expts = ['lqr', 'badmm', 'mdgps_lqr', 'mdgps_nn', 'mdgps_lqr_new', 'mdgps_nn_new']
seeds = [0, 1, 2]
iters = range(30)
colors = ['k', 'r', 'b', 'c', 'm', 'g']

SUCCESS_THRESHOLD = -0.5 + 0.06

def pickle_final_eepts(task, expt, seed, itr):
print "Pickling task %s, expt %s, seed %s, itr %s" % (task, expt, seed, itr)

# extract samples
dirname = "experiments/%s/%s/%s/data_files" % (task, expt, seed)
if expt == 'lqr':
fname = "%s/traj_sample_itr_%02d.pkl" % (dirname, itr)
else:
fname = "%s/pol_sample_itr_%02d.pkl" % (dirname, itr)

with open(fname, 'rb') as f:
samples = cPickle.load(f)

# magic final_eepts extraction (get first final_eepts position of eepts)
final_eepts = np.array([[s._data[3][-1] for s in ss] for ss in samples]) #(M, N, num_eepts)

# save final_eepts for faster replotting
fname = "%s/final_eepts_itr_%02d.pkl" % (dirname, itr)
with open(fname, 'wb') as f:
cPickle.dump(final_eepts, f, -1)

def unpickle_final_eepts(task, expt, seed, itr):
dirname = "experiments/%s/%s/%s/data_files" % (task, expt, seed)
fname = "%s/final_eepts_itr_%02d.pkl" % (dirname, itr)
with open(fname, 'rb') as f:
final_eepts = cPickle.load(f)
return final_eepts

def get_final_eepts(task, expt, itr):
print "Processing task %s, expt %s, itr %s" % (task, expt, itr)

eepts = []
for seed in seeds:
dirname = "experiments/%s/%s/%s/data_files" % (task, expt, seed)

# skip if the seed hasn't been run
fname = "%s/algorithm_itr_%02d.pkl" % (dirname, itr)
if not os.path.exists(fname):
continue

# pickle if the seed hasn't been pickled
fname = "%s/final_eepts_itr_%02d.pkl" % (dirname, itr)
if not os.path.exists(fname):
pickle_final_eepts(task, expt, seed, itr)

eepts.append(unpickle_final_eepts(task, expt, seed, itr))

return np.array(eepts) # (num_seeds, M, N, num_eepts)

task = tasks[0]
csv = open('peg_blind_big.csv', 'w')
csv.write("iteration\t")
[csv.write("%12s\t" % expt) for expt in expts]
csv.write("\n")
for i in [10, 20, 30]:
csv.write("%8s\t" % i)
for expt in expts:
eepts = get_final_eepts(task, expt, i-1)
if eepts.shape[0] == 0:
csv.write("%12s\t" % "N/A")
continue
zpos = eepts[:, :, :, 2].flatten()
pct = np.mean(zpos < SUCCESS_THRESHOLD)
csv.write("%12f\t" % pct)
csv.write('\n')

#def plot_expts(expts, colors):
# for expt, color in zip(expts, colors):
# print "Plotting %s" % expt
#
# Z = np.zeros((iters, 2))
# for itr in range(iters):
# Z[itr] = process_z(expt, itr)
#
# plt.errorbar(range(iters), Z[:,0] + 0.5, Z[:,1], c=color, label=expt)
#
# height = 0.1*np.ones(iters)
# plt.plot(range(iters), height, 'k--')
# plt.legend()
# plt.xlabel('Iterations')
# plt.xlim((0, iters-1))
# plt.ylabel('Distance to target')
# plt.ylim((0, 0.5))
#
#
#
#plot_expts(expts, colors)
#plt.title("Blind Peg Insertion")
#plt.savefig("peg_blind_big.png")
#plt.clf()
2 changes: 1 addition & 1 deletion experiments/debug_peg_blind/hyperparams.py
Expand Up @@ -60,7 +60,7 @@

algorithm = {
'conditions': common['conditions'],
'iterations': 20,
'iterations': 30,
'kl_step': 2.0,
'min_step_mult': 0.01,
'max_step_mult': 1.0,
Expand Down
51 changes: 51 additions & 0 deletions experiments/peg/mdgps_lqr_new/0/hyperparams.py
@@ -0,0 +1,51 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe

BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

# Update the defaults
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

# Algorithm
algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmMDGPS,
'agent_use_nn_policy': False,
'step_rule': 'new',
})

algorithm['traj_opt'] = {
'type': TrajOptLQRPythonMDGPS,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'weights_file_prefix': EXP_DIR + 'policy',
'iterations': 4000,
}

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'verbose_policy_trials': 1,
})

common['info'] = generate_experiment_info(config)
52 changes: 52 additions & 0 deletions experiments/peg/mdgps_lqr_new/1/hyperparams.py
@@ -0,0 +1,52 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe

BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

# Update the defaults
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

# Algorithm
algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmMDGPS,
'agent_use_nn_policy': False,
'step_rule': 'new',
})

algorithm['traj_opt'] = {
'type': TrajOptLQRPythonMDGPS,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'weights_file_prefix': EXP_DIR + 'policy',
'iterations': 4000,
}

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'verbose_policy_trials': 1,
'seed': 1,
})

common['info'] = generate_experiment_info(config)
52 changes: 52 additions & 0 deletions experiments/peg/mdgps_lqr_new/2/hyperparams.py
@@ -0,0 +1,52 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe

BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

# Update the defaults
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

# Algorithm
algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmMDGPS,
'agent_use_nn_policy': False,
'step_rule': 'new',
})

algorithm['traj_opt'] = {
'type': TrajOptLQRPythonMDGPS,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'weights_file_prefix': EXP_DIR + 'policy',
'iterations': 4000,
}

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'verbose_policy_trials': 1,
'seed': 2,
})

common['info'] = generate_experiment_info(config)
52 changes: 52 additions & 0 deletions experiments/peg/mdgps_nn_new/0/hyperparams.py
@@ -0,0 +1,52 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
from IPython.core.debugger import Tracer; debug_here = Tracer()

BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

# Update the defaults
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

# Algorithm
algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmMDGPS,
'agent_use_nn_policy': True,
'step_rule': 'new',
})

algorithm['traj_opt'] = {
'type': TrajOptLQRPythonMDGPS,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'weights_file_prefix': EXP_DIR + 'policy',
'iterations': 4000,
}

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'verbose_policy_trials': 1,
})

common['info'] = generate_experiment_info(config)
53 changes: 53 additions & 0 deletions experiments/peg/mdgps_nn_new/1/hyperparams.py
@@ -0,0 +1,53 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
from gps.algorithm.traj_opt.traj_opt_lqr_python_mdgps import TrajOptLQRPythonMDGPS
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
from IPython.core.debugger import Tracer; debug_here = Tracer()

BASE_DIR = '/'.join(str.split(__file__, '/')[:-3])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

# Update the defaults
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

# Algorithm
algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmMDGPS,
'agent_use_nn_policy': True,
'step_rule': 'new',
})

algorithm['traj_opt'] = {
'type': TrajOptLQRPythonMDGPS,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'weights_file_prefix': EXP_DIR + 'policy',
'iterations': 4000,
}

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'verbose_policy_trials': 1,
'seed': 1,
})

common['info'] = generate_experiment_info(config)

0 comments on commit 6d67939

Please sign in to comment.