In [1]:
import numpy as np
import robosuite as suite
from robosuite.environments.manipulation.empty import Empty
from scipy import interpolate
from robosuite.utils.mjmod import DynamicsModder

In [2]:
horizon = 200

In [3]:
def plan(start_pose, middle_pose, end_pose, horizon):
    cs = interpolate.CubicSpline([0, horizon // 2, horizon], [start_pose, middle_pose, end_pose], 
                                       axis=0, bc_type='clamped')
    return cs(range(horizon))  
def difference(traj):
    traj = np.array(traj)
    new_traj = [traj[0]]
    for i in range(len(traj)):
        if i == 0:
            continue
        new_traj.append(traj[i] - traj[i - 1])
    return new_traj

In [4]:
suite.environments.base.register_env(Empty)

In [5]:
controller_config = suite.load_controller_config(default_controller="JOINT_POSITION")
controller_config["damping_ratio"] = 0.1
controller_config["ramp_ratio"] = 0.1
print(controller_config)

{'type': 'JOINT_POSITION', 'input_max': 1, 'input_min': -1, 'output_max': 0.05, 'output_min': -0.05, 'kp': 50, 'damping_ratio': 0.1, 'impedance_mode': 'fixed', 'kp_limits': [0, 300], 'damping_ratio_limits': [0, 10], 'qpos_limits': None, 'interpolation': None, 'ramp_ratio': 0.1}


In [6]:
env = suite.make(
    "Empty",
    robots="IIWA",             # load a Sawyer robot and a Panda robot
    gripper_types="ClothGripper",                # use default grippers per robot arm
    controller_configs=controller_config, 
    has_renderer=True,                      # on-screen rendering
    render_camera="sideview",              # visualize the "frontview" camera
    has_offscreen_renderer=False,           # no off-screen rendering
    render_collision_mesh=True,
    control_freq=20,                        # 20 hz control for applied actions
    horizon=horizon,                            # each episode terminates after 200 steps
    use_object_obs=False,                   # no observations needed
    use_camera_obs=False,                   # no observations needed
)

Creating window glfw


In [7]:
import tqdm
import random

num_cloth_joints = 11

all_geom_positions = []
all_parameters = []
for _ in tqdm.tqdm(range(5)):
    done = False
    obs = env.reset()
    
    modder = DynamicsModder(sim=env.sim)
    damping = random.random() * 20
    stiffness = random.random() * 20
    all_parameters.append([damping, stiffness])
    
    for i in range(num_cloth_joints):
        modder.mod("gripper0_joint" + str(i), "damping", damping)
        modder.mod("gripper0_joint" + str(i), "stiffness", stiffness)
    
    geom_positions = []
    traj = difference(plan([0, 0, 0, 0, 0, 0, 0], [0, -10, 0, 10, 0, -10, 0], [0, 0, 0, 0, 0, 0, 0], horizon))
    while not done:
        action = traj[i]
        for j in range(num_cloth_joints):
            pos = env.sim.data.geom_xpos[env.sim.model.geom_name2id("gripper0_g{}_col".format(j))]
            geom_positions.append(pos[0])
            geom_positions.append(pos[2])
        obs, reward, done, info = env.step(action) 
#         env.render()
    all_geom_positions.append(geom_positions)
    
all_geom_positions = np.array(all_geom_positions)
all_parameters = np.array(all_parameters)

  0%|          | 0/5 [00:00<?, ?it/s]

Creating window glfw


 20%|██        | 1/5 [00:03<00:13,  3.44s/it]

Creating window glfw


 40%|████      | 2/5 [00:06<00:10,  3.40s/it]

Creating window glfw


 60%|██████    | 3/5 [00:10<00:06,  3.42s/it]

Creating window glfw


 80%|████████  | 4/5 [00:13<00:03,  3.30s/it]

Creating window glfw


100%|██████████| 5/5 [00:16<00:00,  3.29s/it]


In [8]:
# Have some prior distribution of magazine parameters
# Model predicts parameters distribution from actions and movement
# Each cycle
# Sample parameters and choose actions to minimize entropy of predictions

In [9]:
# Use DIAYN for trajectories

In [10]:
all_geom_positions.shape, all_parameters.shape

((5, 4400), (5, 2))

In [11]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

train_split = 0.8
train_length = int(train_split * len(all_geom_positions))
val_length = len(all_geom_positions) - train_length
all_dataset = torch.utils.data.TensorDataset(torch.from_numpy(all_geom_positions), torch.from_numpy(all_parameters))
train_dataset, val_dataset = torch.utils.data.random_split(all_dataset, [train_length, val_length])

In [12]:
model = torch.nn.Sequential(
    torch.nn.Linear(4400, 2048),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.1),
    torch.nn.Linear(2048, 1024),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.1),
    torch.nn.Linear(1024, 1024),
    torch.nn.ReLU(),
    torch.nn.Linear(1024, 2 * 2),
).to("cuda")
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [13]:
loss_fn = torch.nn.GaussianNLLLoss()
for epoch in range(100):
    model.train()
    train_losses = []
    for position, target_params in train_dataset:
        position = position.to("cuda").float()
        target_params = target_params.to("cuda").float()
        
        pred_params_mu, pred_params_logvar = torch.split(model(position), [2, 2])
        loss = loss_fn(pred_params_mu, target_params, torch.exp(pred_params_logvar))
        
        train_losses.append(loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        val_losses = []
        for position, target_params in val_dataset:
            position = position.to("cuda").float()
            target_params = target_params.to("cuda").float()

            pred_params_mu, pred_params_logvar = torch.split(model(position), [2, 2])
            loss = loss_fn(pred_params_mu, target_params, torch.exp(pred_params_logvar))

            val_losses.append(loss.item())
        
    print("Epoch: {}, Train Loss: {}, Val Loss: {}".format(epoch, np.mean(train_losses), np.mean(val_losses)))

Epoch: 0, Train Loss: 36.99732756614685, Val Loss: 13.162463188171387
Epoch: 1, Train Loss: 8.291607648134232, Val Loss: 4986111.0
Epoch: 2, Train Loss: 407657.73228788376, Val Loss: 5.507568359375
Epoch: 3, Train Loss: 6.2811033725738525, Val Loss: 6.350114345550537
Epoch: 4, Train Loss: 5.041744589805603, Val Loss: 3.5206222534179688
Epoch: 5, Train Loss: 3.3673874735832214, Val Loss: 2.759005069732666
Epoch: 6, Train Loss: 2.9817898273468018, Val Loss: 3.1587002277374268
Epoch: 7, Train Loss: 3.4947081208229065, Val Loss: 2.5191245079040527
Epoch: 8, Train Loss: 2.7493642568588257, Val Loss: 1.9828379154205322
Epoch: 9, Train Loss: 2.4707754850387573, Val Loss: 2.1543312072753906
Epoch: 10, Train Loss: 2.491608589887619, Val Loss: 2.3578391075134277
Epoch: 11, Train Loss: 2.4580265879631042, Val Loss: 2.4599709510803223
Epoch: 12, Train Loss: 2.7011000514030457, Val Loss: 1.8945189714431763
Epoch: 13, Train Loss: 3.1196702122688293, Val Loss: 1.6788885593414307
Epoch: 14, Train Loss