In [11]:
# python imports
import os
import yaml
import pickle
import json
import numpy as np
import tensorflow as tf

# algo imports
from policy_random import Policy_Random
from data_manipulation import get_indices
from helper_funcs import create_env

prt = False

# load params file
yaml_path = os.path.abspath('yaml_files/ant_trajfollow.yaml')
with open(yaml_path, 'r') as f:
    params = yaml.load(f)

# print values in param file    
for key, value in params.iteritems():
    if prt:
        print '\n', key, value
    

In [2]:
#save params from specified file
which_agent = params['which_agent']
follow_trajectories = params['follow_trajectories']

#data collection
use_threading = False #params['data_collection']['use_threading']
num_rollouts_train = params['data_collection']['num_rollouts_train']
num_rollouts_val = params['data_collection']['num_rollouts_val']

#dynamics model
num_fc_layers = params['dyn_model']['num_fc_layers']
depth_fc_layers = params['dyn_model']['depth_fc_layers']
batchsize = params['dyn_model']['batchsize']
lr = params['dyn_model']['lr']
nEpoch = params['dyn_model']['nEpoch']
fraction_use_new = params['dyn_model']['fraction_use_new']

#controller
horizon = params['controller']['horizon']
num_control_samples = params['controller']['num_control_samples']
if(which_agent==1):
    #if(args.desired_traj_type=='straight'): # I just uncomment this line...
    # you can chose the trajectory as straight, left_turn, right_turn, etc...
    num_control_samples=3000
        
#aggregation
num_aggregation_iters = params['aggregation']['num_aggregation_iters']
num_trajectories_for_aggregation = params['aggregation']['num_trajectories_for_aggregation']
rollouts_forTraining = params['aggregation']['rollouts_forTraining']

#noise
make_aggregated_dataset_noisy = params['noise']['make_aggregated_dataset_noisy']
make_training_dataset_noisy = params['noise']['make_training_dataset_noisy']
noise_actions_during_MPC_rollouts = params['noise']['noise_actions_during_MPC_rollouts']

#steps
dt_steps = params['steps']['dt_steps']
steps_per_episode = params['steps']['steps_per_episode']
steps_per_rollout_train = params['steps']['steps_per_rollout_train']
steps_per_rollout_val = params['steps']['steps_per_rollout_val']

#saving
min_rew_for_saving = params['saving']['min_rew_for_saving']

#generic
visualize_True = params['generic']['visualize_True']
visualize_False = params['generic']['visualize_False']

In [3]:
# build directory in order to save all collected data
save_dir = 'run_'+ str(0) #str(args.run_num) # this was commented out - just increase the num: 0, 1, 2, ...
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    os.makedirs(save_dir+'/losses')
    os.makedirs(save_dir+'/models')
    os.makedirs(save_dir+'/saved_forwardsim')
    os.makedirs(save_dir+'/saved_trajfollow')
    os.makedirs(save_dir+'/training_data')
    

In [4]:
# set variables
np.random.seed(0)
tf.set_random_seed(0)

In [5]:
#more vars
x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index = get_indices(which_agent)
tf_datatype = tf.float64
noiseToSignal = 0.01

# n is noisy, c is clean... 1st letter is what action's executed and 2nd letter is what action's aggregated
actions_ag='nc'

In [6]:
print x_index, y_index, z_index

29 30 31


In [7]:
#################################################
######## save param values to a file ############
#################################################

param_dict={}
param_dict['which_agent']= which_agent
param_dict['use_existing_training_data']= False # str(args.use_existing_training_data) - I comment this out
param_dict['desired_traj_type']= 'straight' # args.desired_traj_type - I comment this out
param_dict['visualize_MPC_rollout']= False # str(args.visualize_MPC_rollout) - I comment this out
param_dict['num_rollouts_save_for_mf']= 60 # args.num_rollouts_save_for_mf - I comment this out
param_dict['seed']= 0 # args.seed - I comment this out
param_dict['follow_trajectories']= str(follow_trajectories)
param_dict['use_threading']= str(use_threading)
param_dict['num_rollouts_train']= num_rollouts_train
param_dict['num_fc_layers']= num_fc_layers
param_dict['depth_fc_layers']= depth_fc_layers
param_dict['batchsize']= batchsize
param_dict['lr']= lr
param_dict['nEpoch']= nEpoch
param_dict['fraction_use_new']= fraction_use_new
param_dict['horizon']= horizon
param_dict['num_control_samples']= num_control_samples
param_dict['num_aggregation_iters']= num_aggregation_iters
param_dict['num_trajectories_for_aggregation']= num_trajectories_for_aggregation
param_dict['rollouts_forTraining']= rollouts_forTraining
param_dict['make_aggregated_dataset_noisy']= str(make_aggregated_dataset_noisy)
param_dict['make_training_dataset_noisy']= str(make_training_dataset_noisy)
param_dict['noise_actions_during_MPC_rollouts']= str(noise_actions_during_MPC_rollouts)
param_dict['dt_steps']= dt_steps
param_dict['steps_per_episode']= steps_per_episode
param_dict['steps_per_rollout_train']= steps_per_rollout_train
param_dict['steps_per_rollout_val']= steps_per_rollout_val
param_dict['min_rew_for_saving']= min_rew_for_saving
param_dict['x_index']= x_index
param_dict['y_index']= y_index
param_dict['tf_datatype']= str(tf_datatype)
param_dict['noiseToSignal']= noiseToSignal

In [8]:
# save params to file
with open(save_dir+'/params.pkl', 'wb') as f:
    pickle.dump(param_dict, f, pickle.HIGHEST_PROTOCOL)
with open(save_dir+'/params.txt', 'w') as f:
    f.write(json.dumps(param_dict))

In [9]:
# create environement
env, dt_from_xml= create_env(which_agent)

('\n\n the dt is: ', 0.02, '\n\n')
('--------------------------------- \nState space dimension: ', (125,))
('Action space dimension: ', (8,), '\n -----------------------------------')


In [12]:
#create random policy for data collection
random_policy = Policy_Random(env)

('Created a random policy, where actions are selected between ', array([-1., -1., -1., -1., -1., -1., -1., -1.]), ', and ', array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]))
