In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import crocoddyl as c
import numdifftools as nd
c.switchToNumpyArray()

In [3]:
# get the residual data
positions = []
residuals = []
model = c.ActionModelUnicycle()


for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    for d in ddp.datas():
        residual = d.r
    positions.append(x0)
    residuals.append(residual)

positions = np.asarray(positions)
residuals = np.asarray(residuals)
del model

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
x_train = torch.as_tensor(positions, device = device, dtype = torch.float32)
y_train = torch.as_tensor(residuals, device = device, dtype = torch.float32)
print(y_train.size())

torch.Size([1000, 5])


In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(3, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 5)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
net.cuda()
net = net.float()
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
def weights_init_normal(m):
    '''Takes in a module and initializes all linear layers with weight
       values taken from a normal distribution.'''

    classname = m.__class__.__name__
    # for every Linear layer in a model
    if classname.find('Linear') != -1:
        y = m.in_features
    # m.weight.data shoud be taken from a normal distribution
        m.weight.data.normal_(0.0,1/np.sqrt(y))
    # m.bias.data should be 0
        m.bias.data.fill_(0)

net.apply(weights_init_normal)

Net(
  (fc1): Linear(in_features=3, out_features=8, bias=True)
  (fc2): Linear(in_features=8, out_features=8, bias=True)
  (fc3): Linear(in_features=8, out_features=5, bias=True)
)

In [7]:
for epoch in range(100):
    #total_loss = 0
    for inputs, target in zip(x_train, y_train):   
        optimizer.zero_grad()
        y_hat = net(x_train.float())
        loss = criterion(y_hat, y_train)
        loss.backward()
        optimizer.step()
        
        #total_loss += loss.item()
    #print("Epoch : ", epoch , "Loss : ", total_loss )

In [8]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()
c.switchToNumpyMatrix()
class UnicycleTerminal(c.ActionModelAbstract):
    def __init__(self, r, cost):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        self.r = r
        self.cost = cost
        self.dt = .1
        self.costWeights = [10., 1.]
        
    def calc(self, data, x, u=None):
        data.r = self.r
        data.cost = self.cost
            
        
    def calcDiff(self, data, x, u=None, recalc=True):
        if u is None:
            u = self.unone
        if recalc:
            self.calc(data, x, u)

        v, w = m2a(u)
        px, py, theta = m2a(x)
        # Cost derivatives
        data.Lx = a2m(m2a(x) * ([self.costWeights[0]**2] * self.state.nx))
        data.Lu = a2m(m2a(u) * ([self.costWeights[1]**2] * self.nu))
        data.Lxx = np.diag([self.costWeights[0]**2] * self.state.nx)
        data.Luu = np.diag([self.costWeights[1]**2] * self.nu)
        # Dynamic derivatives
        c, s, dt = np.cos(theta), np.sin(theta), self.dt
        v, w = m2a(u)
        data.Fx = np.matrix([[1, 0, -s * v * dt], [0, 1, c * v * dt], [0, 0, 1]])
        data.Fu = np.matrix([[c * self.dt, 0], [s * self.dt, 0], [0, self.dt]])



In [None]:
from time import perf_counter

In [14]:
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(100):
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    x = torch.as_tensor(x0.reshape(1, -1), device = device, dtype = torch.float32)
    prediction = net(x).view(5, 1)
    r_ = prediction.cpu()
    r = r_.detach().numpy()
    cost = 0.5 * float(torch.matmul(prediction.T, prediction))
    print(cost)
    terminal_model = UnicycleTerminal(r, cost)
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
    print(ddp.cost)
end = perf_counter()

0.006832639686763287
289.97471583473055
0.15769615769386292
820.9182095273733
0.17169751226902008
435.67316735974066
0.19839900732040405
534.5062136411034
0.1414482444524765
440.34663534356605
0.1753014475107193
624.7543022204886
0.162895068526268
929.5148985039737
0.1529536247253418
170.36758265973688
0.1739140897989273
729.3758044350878
0.09297199547290802
627.7124009502083
0.07953260093927383
193.75965637152265
0.1942114531993866
500.95008538559017
0.1990814357995987
266.8373987559319
0.191854327917099
470.4791005802483
2.0836983821936883e-05
567.5560755127806
0.1648905724287033
994.7830586889813
0.17051434516906738
787.3189361702559
0.21959547698497772
198.56497761020537
0.15425559878349304
520.6484657182333
0.22920012474060059
184.88274262712494
0.17786835134029388
534.5361924999154
0.16885267198085785
183.6074357730502
0.15959398448467255
222.73441200932803
0.12886328995227814
587.2685069471877
0.1780531108379364
540.9546894120236
0.10768949240446091
607.739837734873
0.1817679405

In [13]:
print(end-start)

55.36551141399832
