In [8]:
import robot
from torch import nn
import torch

In [218]:
# net_L: x(b x dim)->l(b x ndim*(ndim+1)/2)
# net_g: x(b x dim)->g(b x ndim)
# q(b x ndim)--net-->l(b x ndim*(ndim+1)/2)--reshape-->L(b x ndim x ndim,lower triangle), dLdq(b x ndim x ndim x ndim, lower triangle)
# dLdt(b x ndim x ndim)
# dHdt(b x ndim x ndim)
# dHdq(b x ndim x ndim x ndim)

def get_batch_jacobian(net, x, noutputs):
    x = x.unsqueeze(1)
    n = x.size()[0]
    x = x.repeat(1, noutputs, 1)
    x.requires_grad_(True)
    y = net(x)
    input_val = torch.eye(noutputs).reshape(1,noutputs, noutputs).repeat(n, 1, 1)
    x.retain_grad()
    y.backward(input_val)
    return y[:,0,:], x.grad.data


def get_LdLdq(net, q):
    nbatch=q.shape[0]
    ndim=q.shape[1]
    l, l_jac=get_batch_jacobian(net, q, int(ndim*(ndim+1)/2))
    L = torch.zeros((nbatch, ndim, ndim))
    tril_indices = torch.tril_indices(row=ndim, col=ndim, offset=0)
    L[:,tril_indices[0], tril_indices[1]] = l
    dLdq=torch.zeros((nbatch, ndim, ndim, ndim))
    dLdq[:,tril_indices[0], tril_indices[1],:]=l_jac
    return L, dLdq

def inverse_model(net_L, net_g, q, dq, ddq):
    nbatch=q.shape[0]
    ndim=q.shape[1]
    l, l_jac=get_batch_jacobian(net_L, q, int(ndim*(ndim+1)/2))
    L = torch.zeros((nbatch, ndim, ndim))
    tril_indices = torch.tril_indices(row=ndim, col=ndim, offset=0)
    L[:,tril_indices[0], tril_indices[1]] = l
    dLdq=torch.zeros((nbatch, ndim, ndim, ndim))
    dLdq[:,tril_indices[0], tril_indices[1],:]=l_jac
    dLdt=(dLdq@dq.unsqueeze(2)).squeeze()
    H=L@L.transpose(1,2)
    dHdt=L@dLdt.transpose(1,2)+dLdt@L.transpose(1,2)
    dHdq=dLdq.permute(0,3,1,2)@L.transpose(1,2)+L@dLdq.permute(0,3,2,1)
    quad=((dq.unsqueeze(1)@dHdq)@dq.unsqueeze(2)).squeeze() # d(dqHdq)dq
    tau=(H@ddq.unsqueeze(2)).squeeze()+(dHdt@dq.unsqueeze(2)).squeeze()-0.5*quad+net_g(q)
    return tau

In [184]:
class TestModule(nn.Module):
    # Test module only for ndim=3
    def __init__(self):
        super(TestModule, self).__init__()
        
    def forward(self, x):
        return torch.cat([torch.exp(x),torch.exp(2*x)],-1)

In [216]:
model=TestModule()

q=torch.stack([torch.log(torch.arange(1,4).float()),torch.log(torch.arange(2,5).float()),torch.log(torch.arange(5,8).float())])
dq=torch.ones(q.shape)
ddq=torch.ones(q.shape)


L,dLdq=get_LdLdq(model,q)
dLdt=(dLdq@dq.unsqueeze(2)).squeeze()
H=L@L.transpose(1,2)
dHdt=L@dLdt.transpose(1,2)+dLdt@L.transpose(1,2)
dHdq=dLdq.permute(0,3,1,2)@L.transpose(1,2)+L@dLdq.permute(0,3,2,1)
quad=((dq.unsqueeze(1)@dHdq)@dq.unsqueeze(2)).squeeze() # d(dqHdq)dq

tau=(H@ddq.unsqueeze(2)).squeeze()+(dHdt@dq.unsqueeze(2)).squeeze()-0.5*quad+model_g(q)

In [219]:
model_L=TestModule()
model_g=nn.Linear(3,3)
q=torch.stack([torch.log(torch.arange(1,4).float()),torch.log(torch.arange(2,5).float()),torch.log(torch.arange(5,8).float())])
dq=torch.ones(q.shape)
ddq=torch.ones(q.shape)

tau=inverse_model(model_L, model_g, q,dq,ddq)

In [112]:
model = nn.Sequential(
    nn.Linear(3, 256),
    nn.ReLU(),
    nn.Linear(256, 256),
    nn.ReLU(),
    nn.Sequential(
        nn.Softplus(),
        nn.Linear(256, 256),
        nn.Linear(256, 256),
        nn.ReLU(),
    ),
    nn.Linear(256, 256),
    nn.Softplus(),
    nn.Linear(256, 6),
)

x = torch.rand((1, 3)).requires_grad_(True)

y = model(x)

#### RobotArm test

In [1]:
from gym.wrappers import TimeLimit
from robot.envs.hyrule.rl_env import ArmReachWithXYZ
import numpy as np
from robot.model.arm.extra import lagrangian_v2 as lg
from torch import nn
import torch
from robot.model.arm.dataset import *

Using default glsl path /home/derek/anaconda3/lib/python3.7/site-packages/sapien/glsl_shader/130
USE sapien core


In [2]:
make_dataset('arm')

Using default glsl path /home/derek/anaconda3/lib/python3.7/site-packages/sapien/glsl_shader/130
USE sapien core


  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity


  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
  delta = np.linalg.lstsq(jac[:3], goal-achieved)[0] * 10 # desired_velocity
 76%|███████▋  | 3824/5000 [44:15<12:38,  1.55it/s] 

saving...  ./dataset/arm/11.pkl


 77%|███████▋  | 3828/5000 [44:17<10:50,  1.80it/s]

saving...  ./dataset/arm/13.pkl
saving...  ./dataset/arm/10.pkl


 77%|███████▋  | 3832/5000 [44:19<12:29,  1.56it/s]

saving...  ./dataset/arm/14.pkl


 77%|███████▋  | 3835/5000 [44:21<10:51,  1.79it/s]

saving...  ./dataset/arm/12.pkl


 79%|███████▉  | 3945/5000 [45:36<11:35,  1.52it/s]

saving...  ./dataset/arm/9.pkl


 81%|████████▏ | 4066/5000 [46:57<11:25,  1.36it/s]

saving...  ./dataset/arm/8.pkl


 84%|████████▎ | 4179/5000 [48:14<09:17,  1.47it/s]

saving...  ./dataset/arm/7.pkl


 86%|████████▌ | 4293/5000 [49:27<07:32,  1.56it/s]

saving...  ./dataset/arm/6.pkl


 87%|████████▋ | 4359/5000 [50:10<06:57,  1.54it/s]

saving...  ./dataset/arm/5.pkl


 88%|████████▊ | 4421/5000 [50:45<06:13,  1.55it/s]

saving...  ./dataset/arm/4.pkl


 90%|█████████ | 4517/5000 [51:44<04:51,  1.66it/s]

saving...  ./dataset/arm/3.pkl


 92%|█████████▏| 4596/5000 [52:32<04:23,  1.53it/s]

saving...  ./dataset/arm/2.pkl


 94%|█████████▍| 4694/5000 [53:30<02:52,  1.77it/s]

saving...  ./dataset/arm/1.pkl


 96%|█████████▌| 4792/5000 [54:27<02:17,  1.51it/s]

saving...  ./dataset/arm/0.pkl


 98%|█████████▊| 4915/5000 [55:33<00:30,  2.76it/s]

saving...  ./dataset/arm/18.pkl


 98%|█████████▊| 4925/5000 [55:38<00:40,  1.87it/s]

saving...  ./dataset/arm/17.pkl


 99%|█████████▉| 4938/5000 [55:44<00:26,  2.36it/s]

saving...  ./dataset/arm/15.pkl


 99%|█████████▉| 4948/5000 [55:49<00:22,  2.33it/s]

saving...  ./dataset/arm/16.pkl


100%|██████████| 5000/5000 [56:16<00:00,  1.48it/s]


saving...  ./dataset/arm/19.pkl


In [59]:
env=TimeLimit(ArmReachWithXYZ(), 50)
obs=env.reset()
print(obs['observation'])

[ 0.0000000e+00 -1.3825793e+00 -2.9427279e-02  5.7432674e-02
 -9.7876757e-01  3.7991413e-01  6.0942268e-01  2.4935515e+00
  8.0001003e-01  8.0001003e-01  8.0000019e-01  1.1211396e-13
  3.5619316e-12 -1.8626451e-09 -6.7977220e-02 -1.1120176e+00
  2.8683582e-01 -1.0252906e+00 -2.1472794e-01  7.6243766e-02
  2.1023731e-01  1.1284603e-05  1.0477379e-09 -1.4394755e-06
  4.8441251e-12  0.0000000e+00 -1.2245178e-03 -1.9696816e+00
 -2.1866995e+01  6.0620375e+00 -1.8629780e+01 -4.7180300e+00
  2.4600406e+00  5.1464810e+00  1.8125772e-03 -5.9604645e-06
  1.0341406e-05  3.6397523e-08  5.2817632e-07  8.8724220e-01
  3.2058075e-02  1.2223468e+00]




In [None]:
for i in env.agent.get_joints():
    print(i.name)
    
actuator=['right_shoulder_pan_joint',
                          'right_shoulder_lift_joint',
                          'right_arm_half_joint',
                          'right_elbow_joint',
                          'right_wrist_spherical_1_joint',
                          'right_wrist_spherical_2_joint',
                          'right_wrist_3_joint',
                          ]

In [2]:
from gym.wrappers import TimeLimit
from robot.envs.hyrule.rl_env import ArmReachWithXYZ
import numpy as np
from robot.model.arm.extra import lagrangian_v2 as lg
from torch import nn
import torch
from robot.model.arm.dataset import *
dataset=Dataset('./dataset/arm', device='cuda:3')

def get_info(data):
    q=data[0][:,1,1:8]
    dq=data[0][:,1,14:21]
    ddq=data[0][:,1,27:34]
    return q, dq, ddq

MAX ACTION [1. 1. 1. 1. 1. 1. 1.]
MAX Q [6.28318214 2.47949553 6.28318214 6.13466597 6.28318501 2.6306982
 6.28317213]
MAX DQ [ 494.97387695  102.62372589  481.76843262  259.27737427 1409.93457031
  554.1619873  1217.09179688]
num train 80000
num valid 20000


In [9]:
torch.cuda.set_device('cuda:3')

# model_l = nn.Sequential(
#     nn.Linear(7, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 28),
# )

# model_g = nn.Sequential(
#     nn.Linear(7, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 7),
# )

class LagModel(nn.Module):
    
    def __init__(self, ndim):
        super(LagModel, self).__init__()
        self.feat=nn.Sequential(
            nn.Linear(ndim, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
        )
        self.diag=nn.Sequential(
            nn.Linear(256, ndim),
#             nn.ReLU(),
        )
        self.tril=nn.Linear(256, int(ndim*(ndim-1)/2))
        self.gravity=nn.Linear(256, ndim)
        
    def forward(self, q):
        feature=self.feat(q)
        out=torch.cat([self.tril(feature), self.diag(feature)], dim=-1)
        return out, self.gravity(feature)
        
model_lag=LagModel(7)      
model_naive = nn.Sequential(
    nn.Linear(21, 256),
    nn.ReLU(),
    nn.Linear(256, 256),
    nn.ReLU(),
    nn.Linear(256, 7),

)

model_generator=LagModel(7)
model_generator=model_generator.cuda()

# model_l=model_l.cuda()
# model_g=model_g.cuda()
model_lag=model_lag.cuda()
model_naive=model_naive.cuda()
crit = nn.MSELoss()


train_loss_lag=[]
train_loss_naive=[]
val_loss_lag=[]
val_loss_naive=[]

In [10]:
from importlib import reload
reload(lg)
optimizer_lag=torch.optim.AdamW(model_lag.parameters(), lr=1e-3, weight_decay=1e-6)
optimizer_naive=torch.optim.Adam(model_naive.parameters(), lr=5e-4)
for t in range(5):
    for i in range(dataset.num_train):
        data=dataset.sample()
        q,dq,ddq=get_info(data)
        target=lg.inverse_model(model_generator,q,dq,ddq).detach()
        
        # Lagrangian network
        tau=lg.inverse_model(model_lag,q,dq,ddq)
        optimizer_lag.zero_grad()
        loss_lag=crit(tau,target)
        loss_lag.backward() 
        optimizer_lag.step()
        
        # Naive network
        tau=model_naive(torch.cat([q,dq,ddq], axis=1))
        optimizer_naive.zero_grad()
        loss_naive=crit(tau,target)
        loss_naive.backward()
        optimizer_naive.step()
        
        print(t, i, loss_lag.data.item(), loss_naive.data.item())
#         if i%1000==0:
#             data=dataset.sample('valid')
            
#             q,dq,ddq=get_info(data)
#             tau=lg.inverse_model(model_lag,q,dq,ddq)
#             loss_val_lag=crit(tau,data[1].squeeze()*50)
# #             train_loss_lag.append(loss_lag.data.item())
# #             val_loss_lag.append(loss_val_lag.item())
            
#             # Naive network
#             tau=model_naive(torch.cat([q,dq,ddq], axis=1))
#             loss_val_naive=crit(tau,data[1].squeeze()*50)
# #             train_loss_naive.append(loss_naive.data.item())
# #             val_loss_naive.append(loss_val_naive.item())   
            
#             print('Val:', t, i, loss_val_lag.data.item() ,loss_val_naive.data.item())


0 0 1689.92822265625 21548.4375
0 1 1149.251708984375 14343.0556640625
0 2 2594.150146484375 5165.333984375
0 3 667.4290771484375 4364.5166015625
0 4 826.2850952148438 3985.76171875
0 5 980.0744018554688 3315.215087890625
0 6 556.5008544921875 5556.27685546875
0 7 415.4665222167969 2671.2265625
0 8 1224.758544921875 4887.533203125
0 9 417.32720947265625 3339.4755859375
0 10 793.4598388671875 3376.645751953125
0 11 1125.57470703125 13526.5693359375
0 12 686.3563232421875 2505.206787109375
0 13 1097.1552734375 3078.85400390625
0 14 324.2840576171875 1599.1400146484375
0 15 498.485107421875 1888.528076171875
0 16 543.4044799804688 2317.162841796875
0 17 382.64349365234375 2502.919677734375
0 18 434.55023193359375 2146.846435546875
0 19 305.61627197265625 3263.9716796875
0 20 162.92697143554688 1495.4925537109375
0 21 363.69195556640625 2054.06396484375
0 22 304.6307373046875 2277.99658203125
0 23 408.9666748046875 2235.634765625
0 24 397.4864501953125 1627.51806640625
0 25 310.61221313476

0 201 393.8295593261719 751.3467407226562
0 202 281.01885986328125 595.609375
0 203 292.3800964355469 979.6925048828125
0 204 348.90704345703125 658.7974853515625
0 205 201.24624633789062 701.2838134765625
0 206 184.7581024169922 671.334716796875
0 207 175.81675720214844 790.6422119140625
0 208 155.28648376464844 598.1217041015625
0 209 159.01846313476562 466.3292236328125
0 210 241.9447479248047 691.3192138671875
0 211 176.1152801513672 617.8717041015625
0 212 215.55162048339844 636.6962280273438
0 213 234.29225158691406 903.234130859375
0 214 154.02273559570312 708.1536254882812
0 215 262.8282775878906 860.488037109375
0 216 185.61061096191406 558.8157348632812
0 217 162.712646484375 447.9344482421875
0 218 219.66116333007812 862.0879516601562
0 219 191.2963104248047 522.5612182617188
0 220 246.8496551513672 1028.738037109375
0 221 172.50436401367188 868.4924926757812
0 222 119.51495361328125 461.76507568359375
0 223 98.54203033447266 419.6011047363281
0 224 226.71450805664062 823.80

KeyboardInterrupt: 

In [73]:
for name, module in model_lag.named_modules():
    print(name)


feat
feat.0
feat.1
feat.2
feat.3
diag
diag.0
tril
gravity


In [78]:
for i,j in model_lag.feat.named_modules():
    print(i,j)

 Sequential(
  (0): Linear(in_features=7, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
)
0 Linear(in_features=7, out_features=256, bias=True)
1 ReLU()
2 Linear(in_features=256, out_features=256, bias=True)
3 ReLU()


In [96]:
model_lag.diag[0].weight.grad

tensor([[ 0.0000e+00, -7.7214e+01,  1.3162e+01,  ...,  5.9595e+01,
         -6.8283e-02, -2.4130e+02],
        [ 0.0000e+00, -2.2030e+01,  4.2801e+01,  ...,  8.9456e+00,
         -2.9239e-02, -1.8897e+01],
        [ 0.0000e+00, -3.2994e+01, -5.4616e+01,  ..., -2.1125e+01,
          1.3532e-01,  1.5908e+02],
        ...,
        [ 0.0000e+00, -7.7974e+02, -7.8320e+02,  ..., -1.6180e+02,
         -5.2150e+00, -7.1043e+02],
        [ 0.0000e+00, -3.9131e+01, -5.8152e+02,  ..., -5.6040e+01,
         -5.8999e-03, -6.0449e+01],
        [ 0.0000e+00, -1.1678e+02, -8.9093e+01,  ..., -3.7642e+01,
         -1.3136e-04,  3.2812e+01]], device='cuda:3')

In [28]:
    
class TestModule(nn.Module):
    # Test module only for ndim=3
    def __init__(self):
        super(TestModule, self).__init__()
        self.g=nn.Linear(3,3)
        
    def forward(self, x):
        y=torch.cat([torch.exp(x),torch.exp(2*x)],-1)
        return y,self.g(x)
reload(lg)

torch.cuda.set_device('cuda:3')
    
model_L=TestModule().cuda()
model_g=nn.Linear(3,3).cuda()
q=torch.stack([torch.log(torch.arange(1,4).float()),torch.log(torch.arange(2,5).float()),torch.log(torch.arange(5,8).float()),torch.log(torch.arange(7,10).float())]).cuda()
dq=torch.ones(q.shape).cuda()
ddq=torch.ones(q.shape).cuda()

L,dLdq,dLdt,H,dHdq,dHdt,quad,tau=lg.inverse_model(model_L.cuda(), q,dq,ddq, True)
print(L[0])
print(dLdq[0])
print(dLdt[0])
print(H[0])
print(dHdq[0])
print(dHdt[0])
print(quad[0])
print(tau[0])

tensor([[1., 0., 0.],
        [1., 4., 0.],
        [2., 3., 9.]], device='cuda:3', grad_fn=<SelectBackward>)
tensor([[[ 2.,  0.,  0.],
         [ 0.,  0.,  0.],
         [ 0.,  0.,  0.]],

        [[ 1.,  0.,  0.],
         [ 0.,  8.,  0.],
         [ 0.,  0.,  0.]],

        [[ 0.,  2.,  0.],
         [ 0.,  0.,  3.],
         [ 0.,  0., 18.]]], device='cuda:3', grad_fn=<SelectBackward>)
tensor([[ 2.,  0.,  0.],
        [ 1.,  8.,  0.],
        [ 2.,  3., 18.]], device='cuda:3', grad_fn=<SelectBackward>)
tensor([[ 1.,  1.,  2.],
        [ 1., 17., 14.],
        [ 2., 14., 94.]], device='cuda:3', grad_fn=<SelectBackward>)
tensor([[[  4.,   3.,   4.],
         [  3.,   2.,   2.],
         [  4.,   2.,   0.]],

        [[  0.,   0.,   2.],
         [  0.,  64.,  26.],
         [  2.,  26.,   8.]],

        [[  0.,   0.,   0.],
         [  0.,   0.,  12.],
         [  0.,  12., 342.]]], device='cuda:3', grad_fn=<SelectBackward>)
tensor([[  4.,   3.,   6.],
        [  3.,  66.,  40.],
   