In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np
import crocoddyl as c
import numdifftools as nd
from sklearn.neural_network import MLPRegressor
c.switchToNumpyArray()

In [2]:
# get the residual data
positions = []
residuals = []
model = c.ActionModelUnicycle()

for _ in range(1000):
    
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve()

    for d in ddp.datas():
        residual = d.r
    positions.append(x0)
    residuals.append(residual)

positions = np.asarray(positions)
residuals = np.asarray(residuals)
del model

In [3]:
nn = MLPRegressor(
    hidden_layer_sizes=(8,8),  activation='tanh', solver='lbfgs', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True,
    random_state=9, tol=0.0001, verbose=True, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

nn.fit(positions, residuals)

MLPRegressor(activation='tanh', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(8, 8), learning_rate='constant',
             learning_rate_init=0.01, max_fun=15000, max_iter=1000,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=9, shuffle=True, solver='lbfgs',
             tol=0.0001, validation_fraction=0.01, verbose=True,
             warm_start=False)

In [20]:
def a2m(a):
    return np.matrix(a).T

def m2a(m):
    return np.array(m).squeeze()
c.switchToNumpyMatrix()
class UnicycleTerminal(c.ActionModelAbstract):
    def __init__(self):
        c.ActionModelAbstract.__init__(self, c.StateVector(3), 2, 5)
        #self.net = nn
        self.dt = .1
        self.costWeights = [10., 1.]
        
    def calc(self, data, x, u=None):
        #prediction = self.net.predict(x.reshape(1, -1))
        #data.r = a2m(prediction)
        #data.cost = 0.5 * np.dot(data.r.T, data.r)
        if u is None:
            u = self.unone
        v, w = m2a(u)
        px, py, theta = m2a(x)
        c, s = np.cos(theta), np.sin(theta)
        # Rollout the dynamics
        data.xnext = a2m([px + c * v * self.dt, py + s * v * self.dt, theta + w * self.dt])
        # Compute the cost value
        data.r = np.vstack([self.costWeights[0] * x, self.costWeights[1] * u])
        data.cost = .5 * sum(m2a(data.r)**2)

        
    def calcDiff(self, data, x, u=None, recalc=True):
        if u is None:
            u = self.unone
        if recalc:
            self.calc(data, x, u)

        v, w = m2a(u)
        px, py, theta = m2a(x)
        # Cost derivatives
        data.Lx = a2m(m2a(x) * ([self.costWeights[0]**2] * self.state.nx))
        data.Lu = a2m(m2a(u) * ([self.costWeights[1]**2] * self.nu))
        data.Lxx = np.diag([self.costWeights[0]**2] * self.state.nx)
        data.Luu = np.diag([self.costWeights[1]**2] * self.nu)
        # Dynamic derivatives
        c, s, dt = np.cos(theta), np.sin(theta), self.dt
        v, w = m2a(u)
        data.Fx = np.matrix([[1, 0, -s * v * dt], [0, 1, c * v * dt], [0, 0, 1]])
        data.Fu = np.matrix([[c * self.dt, 0], [s * self.dt, 0], [0, self.dt]])



In [21]:
from time import perf_counter

In [23]:
terminal_model = UnicycleTerminal()
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(1000):
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, terminal_model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
end = perf_counter()

In [24]:
print(end - start)


9.138099836000038


In [25]:
model = c.ActionModelUnicycle()
start = perf_counter()
for _ in range(1000):
    x0 = np.array([np.random.uniform(-2.1, 2.1), np.random.uniform(-2.1, 2.1), np.random.uniform(0,1)])
    T = 30
    problem = c.ShootingProblem(x0.T, [ model ] * T, model)
    ddp = c.SolverDDP(problem)
    ddp.solve([], [], 1000)
end = perf_counter()

In [26]:
print(end-start)

1.3816586909997568
