In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import crocoddyl as c
import numdifftools as nd
c.switchToNumpyArray()

In [3]:
# get the cost
positions = []
cost = []
model = c.ActionModelUnicycle()


for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    
    positions.append(x0)
    cost.append(np.array([ddp.cost]))

positions = np.asarray(positions)
cost = np.asarray(cost)
del model

In [5]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')

In [6]:
x_train = torch.as_tensor(positions, device = device, dtype = torch.float32)
y_train = torch.as_tensor(cost, device = device, dtype = torch.float32)

In [7]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(3, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
    
        return x


net = Net()
#net.cuda()
net = net.float()
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
def weights_init_normal(m):
    '''Takes in a module and initializes all linear layers with weight
       values taken from a normal distribution.'''

    classname = m.__class__.__name__
    # for every Linear layer in a model
    if classname.find('Linear') != -1:
        y = m.in_features
    # m.weight.data shoud be taken from a normal distribution
        m.weight.data.normal_(0.0,1/np.sqrt(y))
    # m.bias.data should be 0
        m.bias.data.fill_(0)

net.apply(weights_init_normal)

Net(
  (fc1): Linear(in_features=3, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)

In [8]:
for epoch in range(100):
    #total_loss = 0
    for inputs, target in zip(x_train, y_train):   
        optimizer.zero_grad()
        y_hat = net(x_train.float())
        loss = criterion(y_hat, y_train)
        loss.backward()
        optimizer.step()
        
        #total_loss += loss.item()
    #print("Epoch : ", epoch , "Loss : ", total_loss )

In [9]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()


c.switchToNumpyMatrix()
class UnicycleTerminal(c.ActionModelAbstract):
    def __init__(self, cost, lx, lxx):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        self.cost = cost
        self.lx = lx
        self.lxx = lxx
        
        
    def calc(self, data, x, u=None):        
        data.cost = self.cost
            
        
    def calcDiff(self, data, x, u=None, recalc=True):
        if u is None:
            u = self.unone
        if recalc:
            self.calc(data, x, u)
            
        data.Lx = self.lx
        data.Lxx = self.lxx

        


In [10]:
w1 = net.fc1.weight.data.cpu().numpy()
b1 = net.fc1.bias.data.cpu().numpy()

w2 = net.fc2.weight.data.cpu().numpy()
b2 = net.fc2.bias.data.cpu().numpy()

w3 = net.fc3.weight.data.cpu().numpy()
b3 = net.fc3.bias.data.cpu().numpy()

def value_function(y):
    out1 = np.tanh(y.dot(w1.T) + b1)
    out2 = np.tanh(out1.dot(w2.T) + b2)
    out3 = out2.dot(w3.T) + b3
    
    return out3

j = nd.Jacobian(value_function)
h = nd.Hessian(value_function)


In [11]:
from time import perf_counter

In [12]:
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(10):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    x = torch.as_tensor(x0.reshape(1, -1), device = device, dtype = torch.float32)
    cost = float(net(x).view(1, 1))
    lx = a2m(j(x0))
    lxx = a2m(h(x0))
    
    terminal_model = UnicycleTerminal(cost, lx, lxx)
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
    print(cost, ddp.cost)
end = perf_counter()

441.9880676269531 984.6124323667799
441.9880676269531 1357.134989352498
441.9880676269531 1215.2737078100658
441.9880676269531 444.8051725800973
441.9880676269531 1165.5644261785594
441.9880676269531 917.362850435374
441.9880676269531 1198.170112704945
441.9880676269531 1338.5674010218877
441.9880676269531 1468.2752816801956
441.9880676269531 1012.9699018350611


In [28]:
x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
x = torch.as_tensor(x0.reshape(1, -1), device = device, dtype=torch.float32)
x.requires_grad=True    

In [29]:
torch.autograd.gradcheck(net, x)

  'At least one of the inputs that requires gradient '


True