In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import crocoddyl as c
import numdifftools as nd
c.switchToNumpyArray()

In [3]:
# get the cost
positions = []
cost = []
model = c.ActionModelUnicycle()


for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    
    positions.append(x0)
    cost.append(np.array([ddp.cost]))

positions = np.asarray(positions)
cost = np.asarray(cost)
del model

In [4]:
cost

array([[ 296.28887758],
       [ 571.56313353],
       [ 196.36692581],
       [ 696.51728657],
       [ 144.55735257],
       [ 285.94236543],
       [ 417.02780126],
       [ 580.4140015 ],
       [ 193.38183562],
       [ 274.18713787],
       [ 465.75222449],
       [ 261.70433207],
       [ 186.65879611],
       [ 685.70925761],
       [ 591.37129124],
       [ 706.72381603],
       [ 167.73250689],
       [ 454.52172957],
       [  92.20636939],
       [ 486.71631725],
       [ 534.11815529],
       [ 572.47392544],
       [ 139.11699083],
       [ 635.74784669],
       [ 334.11868216],
       [ 315.93868782],
       [ 484.28816087],
       [ 825.47639888],
       [ 517.59726767],
       [ 485.58284395],
       [ 545.36152042],
       [ 280.99967593],
       [ 589.10959581],
       [ 568.92694551],
       [ 239.84454943],
       [ 594.29781376],
       [ 135.95322084],
       [  35.34127887],
       [ 168.06867542],
       [ 321.52939547],
       [ 916.36274064],
       [ 241.262

In [5]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')

In [6]:
x_train = torch.as_tensor(positions, device = device, dtype = torch.float32)
y_train = torch.as_tensor(cost, device = device, dtype = torch.float32)

In [13]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(3, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
#net.cuda()
net = net.float()
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
def weights_init_normal(m):
    '''Takes in a module and initializes all linear layers with weight
       values taken from a normal distribution.'''

    classname = m.__class__.__name__
    # for every Linear layer in a model
    if classname.find('Linear') != -1:
        y = m.in_features
    # m.weight.data shoud be taken from a normal distribution
        m.weight.data.normal_(0.0,1/np.sqrt(y))
    # m.bias.data should be 0
        m.bias.data.fill_(0)

net.apply(weights_init_normal)

Net(
  (fc1): Linear(in_features=3, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)

In [14]:
for epoch in range(100):
    #total_loss = 0
    for inputs, target in zip(x_train, y_train):   
        optimizer.zero_grad()
        y_hat = net(x_train.float())
        loss = criterion(y_hat, y_train)
        loss.backward()
        optimizer.step()
        
        #total_loss += loss.item()
    #print("Epoch : ", epoch , "Loss : ", total_loss )

In [15]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()


c.switchToNumpyMatrix()
class UnicycleTerminal(c.ActionModelAbstract):
    def __init__(self, cost, lx, lxx):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        self.cost = cost
        self.lx = lx
        self.lxx = lxx
        
        
    def calc(self, data, x, u=None):        
        data.cost = self.cost
            
        
    def calcDiff(self, data, x, u=None, recalc=True):
        if u is None:
            u = self.unone
        if recalc:
            self.calc(data, x, u)
            
        data.Lx = self.lx
        data.Lxx = self.lxx

        


In [16]:
w1 = net.fc1.weight.data.cpu().numpy()
b1 = net.fc1.bias.data.cpu().numpy()

w2 = net.fc2.weight.data.cpu().numpy()
b2 = net.fc2.bias.data.cpu().numpy()

w3 = net.fc3.weight.data.cpu().numpy()
b3 = net.fc3.bias.data.cpu().numpy()

def value_function(y):
    out1 = np.tanh(y.dot(w1.T) + b1)
    out2 = np.tanh(out1.dot(w2.T) + b2)
    out3 = out2.dot(w3.T) + b3
    
    return out3

j = nd.Jacobian(value_function)
h = nd.Hessian(value_function)


In [17]:
from time import perf_counter

In [18]:
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(10):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    x = torch.as_tensor(x0.reshape(1, -1), device = device, dtype = torch.float32)
    cost = float(net(x).view(1, 1))
    lx = a2m(j(x0))
    lxx = a2m(h(x0))
    
    terminal_model = UnicycleTerminal(cost, lx, lxx)
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
    print(cost, ddp.cost)
end = perf_counter()

288.38092041015625 717.0010967071714 [288.38107254]
39.01960372924805 82.1243925869176 [39.01964898]
733.360595703125 2199.6170188194783 [733.36070695]
815.4467163085938 2548.1085137107984 [815.44676119]
693.6716918945312 2292.8069156377214 [693.67194255]
99.19132995605469 204.5358553395633 [99.1912869]
245.0181121826172 556.3089224453674 [245.01804804]
847.3269653320312 2698.799862853303 [847.32712175]
239.103515625 496.22945775749133 [239.103545]
430.0514831542969 1164.1076879182374 [430.05159235]


In [None]:
print(end - start)