In [1]:
# Source code for paper "Learning modular robot control policies" in Transactions on Robotics
# MLP comparisons
# Julian Whitman, Dec. 2022. 
## Apply the shared trunk policy. It can only apply to designs seen during training. 

import torch
import numpy as np
from robot_env import robot_env
from utils import to_tensors, combine_state, wrap_to_pi, rotate
import os, sys
sys.path.insert(0,'..') # print_xacros and urdfs are in the parent directory
from print_xacros import get_names, compile_to_urdf
from apply_policy_MLP import apply_policy, make_goal_memory
from planning_utils import compare_velocities

all_name_list = get_names()
print(len(all_name_list), ' designs')

# divide them up into seen and unseen
unseen_inds = []
seen_inds = []
for i_urdf in range(len(all_name_list)):
    urdf = all_name_list[i_urdf]
    if (urdf[0:3]==urdf[3:6][::-1]):
        seen_inds.append(i_urdf)
    else:
        unseen_inds.append(i_urdf)
print(len(seen_inds), ' seen designs')
print(len(unseen_inds), ' unseen designs')


144  designs
12  seen designs
132  unseen designs


In [2]:
from shared_MLP_utils import get_in_out_lens
from shared_MLP_policy import shared_trunk_policy

device = torch.device('cpu')

# make some direction goals
goal_memory = make_goal_memory(41) # 10*4 + 1

# load up a learned policy to test
folder = 'saved/shared_trunk_tripod1' 
control_fname = os.path.join(folder, 'shared_trunk_control_iter3.pt')

print('Loading weights from ' + control_fname)
save_dict = torch.load( control_fname, map_location=lambda storage, loc: storage)
urdf_names = save_dict['urdf_names']
fd_input_lens, fd_output_lens, policy_input_lens,action_lens,limb_types = get_in_out_lens(urdf_names)

fd_input_lens_sums = [sum(s) for s in fd_input_lens]
fd_output_lens_sums = [sum(s) for s in fd_output_lens]
action_lens_sums = [sum(a) for a in action_lens]
policy_input_lens_sums = [sum(s) for s in policy_input_lens]
print('fd_input_lens_sums, action_lens_sums, policy_input_lens_sums,fd_output_lens_sums: ' + 
    str(fd_input_lens_sums) + ', ' +
    str(action_lens_sums) +', ' +
    str(policy_input_lens_sums) +', ' +
    str(fd_output_lens_sums))

state_dict= save_dict['state_dict'] 
n_hidden_layers = save_dict['n_hidden_layers'] 
hidden_layer_size = save_dict['hidden_layer_size']
goal_len =3

print(save_dict['comment'])

# env.reset_robot(urdf_name=urdf_name, randomize_start=False)
# attachments = env.attachments
# modules_types = env.modules_types
# print('attachments: ' + str(attachments))
# print('modules_types: ' + str(modules_types))
# n_modules = len(modules_types)

# env_state_init = env.get_state()
# module_state_len = []
# for s in env_state_init:
#     module_state_len.append(len(s))

# state_len= np.sum(module_state_len)
# action_len = env.num_joints
# module_action_len = list(np.diff(env.action_indexes))

# module_sa_len = module_state_len+ module_action_len

n_hidden_layers
policy_network = shared_trunk_policy(
    policy_input_lens_sums, action_lens_sums, 
    goal_len, n_hidden_layers, hidden_layer_size)

policy_network.load_state_dict(state_dict)


Loading weights from saved/shared_trunk_tripod1/shared_trunk_control_iter3.pt
fd_input_lens_sums, action_lens_sums, policy_input_lens_sums,fd_output_lens_sums: [45, 27, 39, 33, 39, 33, 33, 33, 27, 27, 39, 21], [18, 10, 16, 12, 16, 14, 14, 14, 12, 10, 16, 8], [41, 23, 35, 29, 35, 29, 29, 29, 23, 23, 35, 17], [48, 30, 42, 36, 42, 36, 36, 36, 30, 30, 42, 24]



<All keys matched successfully>

In [3]:

## Note these must be in the same order as seen in training, since they are stored by index
urdf_names = ['llllll', 'lnwwnl', 'llwwll', 'lnllnl', 
              'lwllwl', 'lwwwwl', 'wlwwlw', 'wwllww', 
              'wwwwww', 'wnllnw', 'wllllw', 'wnwwnw']
print(urdf_names)



data_subfolder = os.path.join(folder,'transfer_data')

if not(os.path.exists(data_subfolder)):
    os.mkdir(data_subfolder)
    print('Created folder ' + data_subfolder)
else:
    print('Using folder ' + data_subfolder)
    


CREATE_VIDEOS= False
# CREATE_VIDEOS= True
for i_urdf in range(len(urdf_names)):
    urdf_name = urdf_names[i_urdf]
    urdf_file =  os.path.join(os.path.split(os.getcwd())[0], 'urdf/' + urdf_name  + '.urdf')
    if not(os.path.exists(urdf_file)):
        compile_to_urdf(urdf_name)

['llllll', 'lnwwnl', 'llwwll', 'lnllnl', 'lwllwl', 'lwwwwl', 'wlwwlw', 'wwllww', 'wwwwww', 'wnllnw', 'wllllw', 'wnwwnw']
Created folder saved/shared_trunk_tripod1/transfer_data


In [4]:
T = 20
vel_metric_list = []
vel_baseline_list = []
video_names = []
for i_urdf in range(len(urdf_names)):
    urdf = urdf_names[i_urdf]

    save_path = os.path.join(data_subfolder, urdf + '_apply_policy.ptx')
    video_name = os.path.join(data_subfolder, urdf + '_goal')
    video_names.append(video_name)

    apply_policy(urdf, i_urdf, goal_memory, 
                 policy_network, device, save_path, show_GUI=False)

    save_dict = torch.load(save_path, map_location=lambda storage, loc: storage)
    vel_metric, vel_metric_baseline = compare_velocities(
            save_dict['states_memory'],
            save_dict['goal_memory'], 
            save_dict['run_lens'],
            10, T )
    vel_metric_list.append(vel_metric)
    vel_baseline_list.append(vel_metric_baseline)
    print(urdf + ': ' + str(np.round(vel_metric,2))
          + ' baseline ' + str(np.round(vel_metric_baseline,2))
          + ' ' + str(i_urdf) + '/' + str(len(urdf_names)))

llllll: 0.12 baseline 0.12 0/12
lnwwnl: 0.08 baseline 0.12 1/12
llwwll: 0.09 baseline 0.12 2/12
lnllnl: 0.1 baseline 0.12 3/12
lwllwl: 0.1 baseline 0.12 4/12
lwwwwl: 0.08 baseline 0.12 5/12
wlwwlw: 0.07 baseline 0.12 6/12
wwllww: 0.1 baseline 0.12 7/12
wwwwww: 0.06 baseline 0.12 8/12
wnllnw: 0.09 baseline 0.12 9/12
wllllw: 0.1 baseline 0.12 10/12
wnwwnw: 0.04 baseline 0.12 11/12


In [5]:
seen_names = urdf_names
if True:
# if len(urdf_names)>50:
    vel_data_path = os.path.join(data_subfolder, 'transfer_results.ptx')
    vel_dict = dict()
#     vel_dict['unseen_inds'] = unseen_inds
#     vel_dict['seen_inds'] = seen_inds
    vel_dict['urdf_names'] = urdf_names
    vel_dict['vel_metric_list'] = vel_metric_list
    vel_dict['vel_baseline_list'] = vel_baseline_list
    torch.save(vel_dict, vel_data_path)

    vel_save_path = os.path.join(data_subfolder, 'transfer_results.csv')
#     seen_names = [urdf_names[s] for s in seen_inds]
#     unseen_names = [urdf_names[s] for s in unseen_inds]

    vel_metric_list = np.array(vel_metric_list)
    vel_baseline_list = np.array(vel_baseline_list)

    with open(vel_save_path, 'w') as fp:
        names_text = ''
#         for urdf in unseen_names:
#             names_text = names_text + urdf + ',' 

#         fp.write('--- Unseen Names: ---\n')
#         fp.write(names_text + '\n')
#         fp.write('Metric Mean: ' + str(np.mean(vel_metric_list[unseen_inds]))+'\n')
#         fp.write('Metric Min: ' + str(np.min(vel_metric_list[unseen_inds]))+'\n')
#         fp.write('Metric Max: ' + str(np.max(vel_metric_list[unseen_inds]))+'\n')
#         fp.write('Metric Rescaled: ' + str(
#             np.mean( (vel_baseline_list[unseen_inds] - vel_metric_list[unseen_inds])
#                     /vel_baseline_list[unseen_inds])
#             )+'\n')

#         fp.write('--- Metric: ---\n')
#         np.savetxt(fp, vel_metric_list[unseen_inds], delimiter=',')   
#         fp.write('--- Baseline: ---\n')
#         np.savetxt(fp, vel_baseline_list[unseen_inds], delimiter=',')

        names_text = ''
        for urdf in seen_names:
            names_text = names_text + urdf + ',' 

        fp.write('--- Seen Names: ---\n')
        fp.write(names_text + '\n')
        fp.write('Metric Mean: ' + str(np.mean(vel_metric_list))+'\n')
        fp.write('Metric Min: ' + str(np.min(vel_metric_list))+'\n')
        fp.write('Metric Max: ' + str(np.max(vel_metric_list))+'\n')
        fp.write('Metric Rescaled: ' + str(
            np.mean( (vel_baseline_list - vel_metric_list)
                    /vel_baseline_list)
            )+'\n')
        fp.write('--- Metric: ---\n')
        np.savetxt(fp, vel_metric_list, delimiter=',')   
        fp.write('--- Baseline: ---\n')
        np.savetxt(fp, vel_baseline_list, delimiter=',')
        print('wrote file  ' + vel_data_path)

wrote file  saved/shared_trunk_tripod1/transfer_data/transfer_results.ptx
