In [1]:
# Source code for paper "Learning modular robot control policies" in Transactions on Robotics
# MLP comparisons
# Julian Whitman, Dec. 2022. 
## Apply the hardware conditioned policy to both seen and unseen designs

import torch
import numpy as np
from robot_env import robot_env
from utils import to_tensors, combine_state, wrap_to_pi, rotate
import os, sys
sys.path.insert(0,'..') # print_xacros and urdfs are in the parent directory
from print_xacros import get_names, compile_to_urdf
from apply_policy_MLP import apply_policy, make_goal_memory
from planning_utils import compare_velocities

all_name_list = get_names()
print(len(all_name_list), ' designs')


# divide them up into seen and unseen
unseen_inds = []
seen_inds = []
for i_urdf in range(len(all_name_list)):
    urdf = all_name_list[i_urdf]
    if (urdf[0:3]==urdf[3:6][::-1]):
        seen_inds.append(i_urdf)
    else:
        unseen_inds.append(i_urdf)
print(len(seen_inds), ' seen designs')
print(len(unseen_inds), ' unseen designs')




144  designs
12  seen designs
132  unseen designs


In [2]:
from shared_MLP_utils import get_in_out_lens
from shared_MLP_policy import hardware_conditioned_policy

device = torch.device('cpu')

# make some direction goals
goal_memory = make_goal_memory(41) # 10*4 + 1

folder = 'saved/hc_tripod1'
control_fname = os.path.join(folder, 'hardware_conditioned_control_iter3.pt')

print('Loading weights from ' + control_fname)
save_dict = torch.load( control_fname, map_location=lambda storage, loc: storage)

urdf_names = all_name_list


# fd_input_lens, fd_output_lens, policy_input_lens,action_lens,limb_types = get_in_out_lens(urdf_names)

# fd_input_lens_sums = [sum(s) for s in fd_input_lens]
# fd_output_lens_sums = [sum(s) for s in fd_output_lens]
# action_lens_sums = [sum(a) for a in action_lens]
# policy_input_lens_sums = [sum(s) for s in policy_input_lens]
# print('fd_input_lens_sums, action_lens_sums, policy_input_lens_sums,fd_output_lens_sums: ' + 
#     str(fd_input_lens_sums) + ', ' +
#     str(action_lens_sums) +', ' +
#     str(policy_input_lens_sums) +', ' +
#     str(fd_output_lens_sums))

state_dict= save_dict['state_dict'] 
n_hidden_layers = save_dict['n_hidden_layers'] 
hidden_layer_size = save_dict['hidden_layer_size']
goal_len =3

print(save_dict['comment'])

# env.reset_robot(urdf_name=urdf_name, randomize_start=False)

# attachments = env.attachments
# modules_types = env.modules_types
# print('attachments: ' + str(attachments))
# print('modules_types: ' + str(modules_types))
# n_modules = len(modules_types)

# env_state_init = env.get_state()
# module_state_len = []
# for s in env_state_init:
#     module_state_len.append(len(s))

# state_len= np.sum(module_state_len)
# action_len = env.num_joints
# module_action_len = list(np.diff(env.action_indexes))

# module_sa_len = module_state_len+ module_action_len

max_state_input_lens = [9, 6, 6, 6, 6, 6, 6]
max_state_output_lens = [12, 6, 6, 6, 6, 6, 6]
max_action_lens = [0, 3, 3, 3, 3, 3, 3]
policy_network = hardware_conditioned_policy(
    max_state_input_lens, max_action_lens, 
    goal_len, n_hidden_layers, hidden_layer_size)

policy_network.load_state_dict(state_dict)


Loading weights from saved/hc_tripod1/hardware_conditioned_control_iter3.pt



<All keys matched successfully>

In [3]:
# random_choice = np.random.choice(len(unseen_inds), 8*6, replace=False)
# random_choice = np.random.choice(len(unseen_inds), 4*3, replace=False)
# urdf_names = [all_name_list[unseen_inds[ind]] for ind in random_choice]

# selected for best, middle, worst
# urdf_names =['wnwwlw', 'wwwwnw', 'lnwwll', 'wwwwll', 
#              'llwlwl', 'wwllnw', 'lnwlwl', 'llllww', 
#              'lnlwwl', 'wnwlnl', 'lnllwl', 'lwllnl']
# urdf_names = [all_name_list[ind] for ind in unseen_inds]

print(urdf_names)

# urdf_names = ['llllll', 'wnwwnw', 'llwwll', 'lnwwnl']
# urdf_names += [ 'lnllnl', 'lwllwl', 'lwwwwl', 'wlwwlw', 
#                'wwllww', 'wwwwww', 'wnllnw', 'wllllw']

# if 'llllll' in urdf_names:
#     data_subfolder = os.path.join(folder,'data_seen')
# else:
data_subfolder = os.path.join(folder,'transfer_data')

if not(os.path.exists(data_subfolder)):
    os.mkdir(data_subfolder)
    print('Created folder ' + data_subfolder)
else:
    print('Using folder ' + data_subfolder)
    


CREATE_VIDEOS= False
# CREATE_VIDEOS= True
for i_urdf in range(len(urdf_names)):
    urdf_name = urdf_names[i_urdf]
    urdf_file =   os.path.join(os.path.split(os.getcwd())[0], 'urdf/' + urdf_name  + '.urdf')
    if not(os.path.exists(urdf_file)):
        compile_to_urdf(urdf_name)

['llllll', 'lllllw', 'llllwl', 'llllww', 'llllnl', 'llllnw', 'lllwll', 'lllwlw', 'lllwwl', 'lllwww', 'lllwnl', 'lllwnw', 'llwlll', 'llwllw', 'llwlwl', 'llwlww', 'llwlnl', 'llwlnw', 'llwwll', 'llwwlw', 'llwwwl', 'llwwww', 'llwwnl', 'llwwnw', 'lwllll', 'lwlllw', 'lwllwl', 'lwllww', 'lwllnl', 'lwllnw', 'lwlwll', 'lwlwlw', 'lwlwwl', 'lwlwww', 'lwlwnl', 'lwlwnw', 'lwwlll', 'lwwllw', 'lwwlwl', 'lwwlww', 'lwwlnl', 'lwwlnw', 'lwwwll', 'lwwwlw', 'lwwwwl', 'lwwwww', 'lwwwnl', 'lwwwnw', 'lnllll', 'lnlllw', 'lnllwl', 'lnllww', 'lnllnl', 'lnllnw', 'lnlwll', 'lnlwlw', 'lnlwwl', 'lnlwww', 'lnlwnl', 'lnlwnw', 'lnwlll', 'lnwllw', 'lnwlwl', 'lnwlww', 'lnwlnl', 'lnwlnw', 'lnwwll', 'lnwwlw', 'lnwwwl', 'lnwwww', 'lnwwnl', 'lnwwnw', 'wlllll', 'wllllw', 'wlllwl', 'wlllww', 'wlllnl', 'wlllnw', 'wllwll', 'wllwlw', 'wllwwl', 'wllwww', 'wllwnl', 'wllwnw', 'wlwlll', 'wlwllw', 'wlwlwl', 'wlwlww', 'wlwlnl', 'wlwlnw', 'wlwwll', 'wlwwlw', 'wlwwwl', 'wlwwww', 'wlwwnl', 'wlwwnw', 'wwllll', 'wwlllw', 'wwllwl', 'wwllww',

In [4]:
len(urdf_names)

144

In [5]:
T = 20
vel_metric_list = []
vel_baseline_list = []
video_names = []
for i_urdf in range(len(urdf_names)):
    urdf = urdf_names[i_urdf]

    save_path = os.path.join(data_subfolder, urdf + '_apply_policy.ptx')
    video_name = os.path.join(data_subfolder, urdf + '_goal')
    video_names.append(video_name)

    apply_policy(urdf, i_urdf, goal_memory, 
                 policy_network, device, save_path, show_GUI=False)

    save_dict = torch.load(save_path, map_location=lambda storage, loc: storage)
    vel_metric, vel_metric_baseline = compare_velocities(
            save_dict['states_memory'],
            save_dict['goal_memory'], 
            save_dict['run_lens'],
            10, T )
    vel_metric_list.append(vel_metric)
    vel_baseline_list.append(vel_metric_baseline)
    print(urdf + ': ' + str(np.round(vel_metric,2))
          + ' baseline ' + str(np.round(vel_metric_baseline,2))
          + ' ' + str(i_urdf) + '/' + str(len(urdf_names)))

llllll: 0.11 baseline 0.12 0/144
lllllw: 0.12 baseline 0.12 1/144
llllwl: 0.13 baseline 0.12 2/144
llllww: 0.13 baseline 0.12 3/144
llllnl: 0.12 baseline 0.12 4/144
llllnw: 0.12 baseline 0.12 5/144
lllwll: 0.12 baseline 0.12 6/144
lllwlw: 0.11 baseline 0.12 7/144
lllwwl: 0.12 baseline 0.12 8/144
lllwww: 0.12 baseline 0.12 9/144
lllwnl: 0.11 baseline 0.12 10/144
lllwnw: 0.12 baseline 0.12 11/144
llwlll: 0.11 baseline 0.12 12/144
llwllw: 0.11 baseline 0.12 13/144
llwlwl: 0.12 baseline 0.12 14/144
llwlww: 0.12 baseline 0.12 15/144
llwlnl: 0.12 baseline 0.12 16/144
llwlnw: 0.11 baseline 0.12 17/144
llwwll: 0.1 baseline 0.12 18/144
llwwlw: 0.11 baseline 0.12 19/144
llwwwl: 0.12 baseline 0.12 20/144
llwwww: 0.11 baseline 0.12 21/144
llwwnl: 0.11 baseline 0.12 22/144
llwwnw: 0.11 baseline 0.12 23/144
lwllll: 0.12 baseline 0.12 24/144
lwlllw: 0.11 baseline 0.12 25/144
lwllwl: 0.09 baseline 0.12 26/144
lwllww: 0.1 baseline 0.12 27/144
lwllnl: 0.11 baseline 0.12 28/144
lwllnw: 0.11 baseline 0.12

In [6]:
vel_metric_list

[0.11056033932801229,
 0.1156227686543649,
 0.132822944385215,
 0.13240942516291018,
 0.11624411999935795,
 0.1200258433962128,
 0.11586662131139457,
 0.11427168516884954,
 0.1187599332519937,
 0.12358137947773536,
 0.11337911152482257,
 0.11601155669586911,
 0.1149183211111875,
 0.10589065607485383,
 0.11788341546494362,
 0.11510638020920394,
 0.1188013524412399,
 0.11105576394640777,
 0.09820373328548594,
 0.10962171538978023,
 0.1191627538232136,
 0.10614427393745883,
 0.10644812794838117,
 0.10534737274762429,
 0.11739058444776347,
 0.11178981987746607,
 0.09403684497373095,
 0.10222182979184087,
 0.10531463203798846,
 0.10687581456315738,
 0.11171164202653311,
 0.10815162150318845,
 0.09547008976279125,
 0.09808027610858921,
 0.10447909994285659,
 0.10113172855380442,
 0.11088268205223763,
 0.11380271184132358,
 0.11625819791162027,
 0.1073868635634696,
 0.11588263939606679,
 0.11214594013485783,
 0.10661447834458009,
 0.10338402478631244,
 0.09469411169838042,
 0.0951737027595092

In [7]:
if True:
# if len(urdf_names)>50:
    vel_data_path = os.path.join(data_subfolder, 'transfer_results.ptx')
    vel_dict = dict()
    vel_dict['unseen_inds'] = unseen_inds
    vel_dict['seen_inds'] = seen_inds
    vel_dict['urdf_names'] = urdf_names
    vel_dict['vel_metric_list'] = vel_metric_list
    vel_dict['vel_baseline_list'] = vel_baseline_list
    torch.save(vel_dict, vel_data_path)

    vel_save_path = os.path.join(data_subfolder, 'transfer_results.csv')
    seen_names = [urdf_names[s] for s in seen_inds]
    unseen_names = [urdf_names[s] for s in unseen_inds]

    vel_metric_list = np.array(vel_metric_list)
    vel_baseline_list = np.array(vel_baseline_list)

    with open(vel_save_path, 'w') as fp:
        names_text = ''
        for urdf in unseen_names:
            names_text = names_text + urdf + ',' 

        fp.write('--- Unseen Names: ---\n')
        fp.write(names_text + '\n')
        fp.write('Metric Mean: ' + str(np.mean(vel_metric_list[unseen_inds]))+'\n')
        fp.write('Metric Min: ' + str(np.min(vel_metric_list[unseen_inds]))+'\n')
        fp.write('Metric Max: ' + str(np.max(vel_metric_list[unseen_inds]))+'\n')
        fp.write('Metric Rescaled: ' + str(
            np.mean( (vel_baseline_list[unseen_inds] - vel_metric_list[unseen_inds])
                    /vel_baseline_list[unseen_inds])
            )+'\n')

        fp.write('--- Metric: ---\n')
        np.savetxt(fp, vel_metric_list[unseen_inds], delimiter=',')   
        fp.write('--- Baseline: ---\n')
        np.savetxt(fp, vel_baseline_list[unseen_inds], delimiter=',')

        names_text = ''
        for urdf in seen_names:
            names_text = names_text + urdf + ',' 

        fp.write('--- Seen Names: ---\n')
        fp.write(names_text + '\n')
        fp.write('Metric Mean: ' + str(np.mean(vel_metric_list[seen_inds]))+'\n')
        fp.write('Metric Min: ' + str(np.min(vel_metric_list[seen_inds]))+'\n')
        fp.write('Metric Max: ' + str(np.max(vel_metric_list[seen_inds]))+'\n')
        fp.write('Metric Rescaled: ' + str(
            np.mean( (vel_baseline_list[seen_inds] - vel_metric_list[seen_inds])
                    /vel_baseline_list[seen_inds])
            )+'\n')
        fp.write('--- Metric: ---\n')
        np.savetxt(fp, vel_metric_list[seen_inds], delimiter=',')   
        fp.write('--- Baseline: ---\n')
        np.savetxt(fp, vel_baseline_list[seen_inds], delimiter=',')
        print('wrote file ' + vel_save_path)

wrote file saved/hc_tripod1/transfer_data/transfer_results.csv
