In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import crocoddyl as c
import numdifftools as nd
c.switchToNumpyArray()

In [3]:
# get the residual data
positions = []
residuals = []
model = c.ActionModelUnicycle()


for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    for d in ddp.datas():
        residual = d.r
    positions.append(x0)
    residuals.append(residual)

positions = np.asarray(positions)
residuals = np.asarray(residuals)
del model

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [15]:
x_train = torch.as_tensor(positions, device = device, dtype = torch.float32)
y_train = torch.as_tensor(residuals, device = device, dtype = torch.float32)
print(y_train.size())

torch.Size([1000, 5])


In [16]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(3, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 5)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
net.cuda()
net = net.float()
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
def weights_init_normal(m):
    '''Takes in a module and initializes all linear layers with weight
       values taken from a normal distribution.'''

    classname = m.__class__.__name__
    # for every Linear layer in a model
    if classname.find('Linear') != -1:
        y = m.in_features
    # m.weight.data shoud be taken from a normal distribution
        m.weight.data.normal_(0.0,1/np.sqrt(y))
    # m.bias.data should be 0
        m.bias.data.fill_(0)

net.apply(weights_init_normal)

Net(
  (fc1): Linear(in_features=3, out_features=8, bias=True)
  (fc2): Linear(in_features=8, out_features=8, bias=True)
  (fc3): Linear(in_features=8, out_features=5, bias=True)
)

In [17]:
epochs = 100

In [18]:
for epoch in range(epochs):
    total_loss = 0
    for inputs, target in zip(x_train, y_train):   
        optimizer.zero_grad()
        y_hat = net(x_train.float())
        loss = criterion(y_hat, y_train)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    print("Epoch : ", epoch , "Loss : ", total_loss )

Epoch :  0 Loss :  30.94287591986358
Epoch :  1 Loss :  4.403426186647266
Epoch :  2 Loss :  3.352968043880537
Epoch :  3 Loss :  1.7456619532313198
Epoch :  4 Loss :  0.7955069093150087
Epoch :  5 Loss :  0.5252071935683489
Epoch :  6 Loss :  0.3610587641596794
Epoch :  7 Loss :  0.24468998490192462
Epoch :  8 Loss :  0.18974918192543555
Epoch :  9 Loss :  0.1600945186946774
Epoch :  10 Loss :  0.13214549219992477
Epoch :  11 Loss :  0.10875453901826404
Epoch :  12 Loss :  0.09284755276166834
Epoch :  13 Loss :  0.08137349266326055
Epoch :  14 Loss :  0.07356259170046542
Epoch :  15 Loss :  0.0690423114792793
Epoch :  16 Loss :  0.0655575795826735
Epoch :  17 Loss :  0.062085748664685525
Epoch :  18 Loss :  0.05900180100434227
Epoch :  19 Loss :  0.05714824477036018
Epoch :  20 Loss :  0.05618440467878827
Epoch :  21 Loss :  0.05551967022984172
Epoch :  22 Loss :  0.05491925545720733
Epoch :  23 Loss :  0.05435348484388669
Epoch :  24 Loss :  0.05383762993005803
Epoch :  25 Loss :  0.

In [79]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()
c.switchToNumpyMatrix()
class UnicycleTerminal(c.ActionModelAbstract):
    def __init__(self, net):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        self.net = net
        self.dt = .1
        self.costWeights = [10., 1.]
        
    def calc(self, data, x, u=None):
        x = torch.as_tensor(x.reshape(1, -1), device = device, dtype = torch.float32)
        prediction = self.net(x).view(5, 1)
        r_ = prediction.cpu()
        data.r = r_.detach().numpy()
        #data.r = a2m(prediction.cpu().detach().numpy())
        data.cost = 0.5 * float(torch.matmul(prediction.T, prediction))
        #print(float(prediction.T @ prediction))
            
        
    def calcDiff(self, data, x, u=None, recalc=True):
        if u is None:
            u = self.unone
        if recalc:
            self.calc(data, x, u)

        v, w = m2a(u)
        px, py, theta = m2a(x)
        # Cost derivatives
        data.Lx = a2m(m2a(x) * ([self.costWeights[0]**2] * self.state.nx))
        data.Lu = a2m(m2a(u) * ([self.costWeights[1]**2] * self.nu))
        data.Lxx = np.diag([self.costWeights[0]**2] * self.state.nx)
        data.Luu = np.diag([self.costWeights[1]**2] * self.nu)
        # Dynamic derivatives
        c, s, dt = np.cos(theta), np.sin(theta), self.dt
        v, w = m2a(u)
        data.Fx = np.matrix([[1, 0, -s * v * dt], [0, 1, c * v * dt], [0, 0, 1]])
        data.Fu = np.matrix([[c * self.dt, 0], [s * self.dt, 0], [0, self.dt]])



In [81]:
terminal_model = UnicycleTerminal(net)
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(1000):
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
end = perf_counter()

In [82]:
print(end - start)

859.6277446029999
